Upload folder using huggingface_hub
Browse files- chat_template.jinja +45 -0
- config.json +61 -0
- generation_config.json +9 -0
- model.safetensors +3 -0
- run_meta.json +14 -14
- tokenizer.json +0 -0
- tokenizer_config.json +20 -0
- train10k.txt +101 -0
- training_args.bin +3 -0
chat_template.jinja
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{{- bos_token -}}
|
| 2 |
+
{%- set keep_past_thinking = keep_past_thinking | default(false) -%}
|
| 3 |
+
{%- set ns = namespace(system_prompt="") -%}
|
| 4 |
+
{%- if messages[0]["role"] == "system" -%}
|
| 5 |
+
{%- set ns.system_prompt = messages[0]["content"] -%}
|
| 6 |
+
{%- set messages = messages[1:] -%}
|
| 7 |
+
{%- endif -%}
|
| 8 |
+
{%- if tools -%}
|
| 9 |
+
{%- set ns.system_prompt = ns.system_prompt + ("\n" if ns.system_prompt else "") + "List of tools: [" -%}
|
| 10 |
+
{%- for tool in tools -%}
|
| 11 |
+
{%- if tool is not string -%}
|
| 12 |
+
{%- set tool = tool | tojson -%}
|
| 13 |
+
{%- endif -%}
|
| 14 |
+
{%- set ns.system_prompt = ns.system_prompt + tool -%}
|
| 15 |
+
{%- if not loop.last -%}
|
| 16 |
+
{%- set ns.system_prompt = ns.system_prompt + ", " -%}
|
| 17 |
+
{%- endif -%}
|
| 18 |
+
{%- endfor -%}
|
| 19 |
+
{%- set ns.system_prompt = ns.system_prompt + "]" -%}
|
| 20 |
+
{%- endif -%}
|
| 21 |
+
{%- if ns.system_prompt -%}
|
| 22 |
+
{{- "<|im_start|>system\n" + ns.system_prompt + "<|im_end|>\n" -}}
|
| 23 |
+
{%- endif -%}
|
| 24 |
+
{%- set ns.last_assistant_index = -1 -%}
|
| 25 |
+
{%- for message in messages -%}
|
| 26 |
+
{%- if message["role"] == "assistant" -%}
|
| 27 |
+
{%- set ns.last_assistant_index = loop.index0 -%}
|
| 28 |
+
{%- endif -%}
|
| 29 |
+
{%- endfor -%}
|
| 30 |
+
{%- for message in messages -%}
|
| 31 |
+
{{- "<|im_start|>" + message["role"] + "\n" -}}
|
| 32 |
+
{%- set content = message["content"] -%}
|
| 33 |
+
{%- if content is not string -%}
|
| 34 |
+
{%- set content = content | tojson -%}
|
| 35 |
+
{%- endif -%}
|
| 36 |
+
{%- if message["role"] == "assistant" and not keep_past_thinking and loop.index0 != ns.last_assistant_index -%}
|
| 37 |
+
{%- if "</think>" in content -%}
|
| 38 |
+
{%- set content = content.split("</think>")[-1] | trim -%}
|
| 39 |
+
{%- endif -%}
|
| 40 |
+
{%- endif -%}
|
| 41 |
+
{{- content + "<|im_end|>\n" -}}
|
| 42 |
+
{%- endfor -%}
|
| 43 |
+
{%- if add_generation_prompt -%}
|
| 44 |
+
{{- "<|im_start|>assistant\n" -}}
|
| 45 |
+
{%- endif -%}
|
config.json
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"Lfm2ForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"block_auto_adjust_ff_dim": true,
|
| 6 |
+
"block_dim": 2048,
|
| 7 |
+
"block_ff_dim": 12288,
|
| 8 |
+
"block_ffn_dim_multiplier": 1.0,
|
| 9 |
+
"block_mlp_init_scale": 1.0,
|
| 10 |
+
"block_multiple_of": 256,
|
| 11 |
+
"block_norm_eps": 1e-05,
|
| 12 |
+
"block_out_init_scale": 1.0,
|
| 13 |
+
"block_use_swiglu": true,
|
| 14 |
+
"block_use_xavier_init": true,
|
| 15 |
+
"bos_token_id": 1,
|
| 16 |
+
"conv_L_cache": 3,
|
| 17 |
+
"conv_bias": false,
|
| 18 |
+
"conv_dim": 2048,
|
| 19 |
+
"conv_use_xavier_init": true,
|
| 20 |
+
"dtype": "bfloat16",
|
| 21 |
+
"eos_token_id": 7,
|
| 22 |
+
"hidden_size": 2048,
|
| 23 |
+
"initializer_range": 0.02,
|
| 24 |
+
"intermediate_size": 12288,
|
| 25 |
+
"layer_types": [
|
| 26 |
+
"conv",
|
| 27 |
+
"conv",
|
| 28 |
+
"full_attention",
|
| 29 |
+
"conv",
|
| 30 |
+
"conv",
|
| 31 |
+
"full_attention",
|
| 32 |
+
"conv",
|
| 33 |
+
"conv",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"conv",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"conv",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"conv",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"conv"
|
| 42 |
+
],
|
| 43 |
+
"max_position_embeddings": 128000,
|
| 44 |
+
"model_type": "lfm2",
|
| 45 |
+
"norm_eps": 1e-05,
|
| 46 |
+
"num_attention_heads": 32,
|
| 47 |
+
"num_heads": 32,
|
| 48 |
+
"num_hidden_layers": 16,
|
| 49 |
+
"num_key_value_heads": 8,
|
| 50 |
+
"pad_token_id": 0,
|
| 51 |
+
"rope_parameters": {
|
| 52 |
+
"rope_theta": 1000000.0,
|
| 53 |
+
"rope_type": "default"
|
| 54 |
+
},
|
| 55 |
+
"tie_embedding": true,
|
| 56 |
+
"tie_word_embeddings": true,
|
| 57 |
+
"transformers_version": "5.2.0",
|
| 58 |
+
"use_cache": false,
|
| 59 |
+
"use_pos_enc": true,
|
| 60 |
+
"vocab_size": 65536
|
| 61 |
+
}
|
generation_config.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_from_model_config": true,
|
| 3 |
+
"bos_token_id": 1,
|
| 4 |
+
"eos_token_id": [
|
| 5 |
+
7
|
| 6 |
+
],
|
| 7 |
+
"pad_token_id": 0,
|
| 8 |
+
"transformers_version": "5.2.0"
|
| 9 |
+
}
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:850061de923b002e5085928549185e88bca0c00b117b8989bf1f993e6788ae67
|
| 3 |
+
size 2340697936
|
run_meta.json
CHANGED
|
@@ -1,25 +1,25 @@
|
|
| 1 |
{
|
| 2 |
-
"run_name": "dpo_fft_LFM2.5-1.2B-
|
| 3 |
"model": "LiquidAI/LFM2.5-1.2B-Instruct",
|
| 4 |
-
"dataset": "
|
| 5 |
-
"timestamp": "
|
| 6 |
"args": {
|
| 7 |
-
"dataset": "
|
| 8 |
"dataset_split": "train",
|
| 9 |
-
"instruction_col":
|
| 10 |
-
"chosen_col":
|
| 11 |
-
"rejected_col":
|
| 12 |
"max_samples": null,
|
| 13 |
"seed": 42,
|
| 14 |
"model_name": "LiquidAI/LFM2.5-1.2B-Instruct",
|
| 15 |
"ref_4bit": false,
|
| 16 |
-
"num_epochs":
|
| 17 |
"batch_size": 4,
|
| 18 |
"grad_accum": 4,
|
| 19 |
"learning_rate": 2e-06,
|
| 20 |
"beta": 0.2,
|
| 21 |
"max_length": 1024,
|
| 22 |
-
"max_prompt_length":
|
| 23 |
"warmup_ratio": 0.1,
|
| 24 |
"optim": "paged_adamw_8bit",
|
| 25 |
"logging_steps": 10,
|
|
@@ -29,11 +29,11 @@
|
|
| 29 |
"run_name": null
|
| 30 |
},
|
| 31 |
"train_metrics": {
|
| 32 |
-
"train_runtime":
|
| 33 |
-
"train_samples_per_second": 4.
|
| 34 |
-
"train_steps_per_second": 0.
|
| 35 |
"total_flos": 0.0,
|
| 36 |
-
"train_loss": 0.
|
| 37 |
-
"epoch":
|
| 38 |
}
|
| 39 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"run_name": "dpo_fft_LFM2.5-1.2B-Instruct_xinlai__Math-Step-DPO-10K_20260223_022854",
|
| 3 |
"model": "LiquidAI/LFM2.5-1.2B-Instruct",
|
| 4 |
+
"dataset": "xinlai/Math-Step-DPO-10K",
|
| 5 |
+
"timestamp": "20260223_022854",
|
| 6 |
"args": {
|
| 7 |
+
"dataset": "xinlai/Math-Step-DPO-10K",
|
| 8 |
"dataset_split": "train",
|
| 9 |
+
"instruction_col": "initial_reason_steps",
|
| 10 |
+
"chosen_col": "chosen",
|
| 11 |
+
"rejected_col": "rejected",
|
| 12 |
"max_samples": null,
|
| 13 |
"seed": 42,
|
| 14 |
"model_name": "LiquidAI/LFM2.5-1.2B-Instruct",
|
| 15 |
"ref_4bit": false,
|
| 16 |
+
"num_epochs": 1,
|
| 17 |
"batch_size": 4,
|
| 18 |
"grad_accum": 4,
|
| 19 |
"learning_rate": 2e-06,
|
| 20 |
"beta": 0.2,
|
| 21 |
"max_length": 1024,
|
| 22 |
+
"max_prompt_length": 768,
|
| 23 |
"warmup_ratio": 0.1,
|
| 24 |
"optim": "paged_adamw_8bit",
|
| 25 |
"logging_steps": 10,
|
|
|
|
| 29 |
"run_name": null
|
| 30 |
},
|
| 31 |
"train_metrics": {
|
| 32 |
+
"train_runtime": 2398.316,
|
| 33 |
+
"train_samples_per_second": 4.276,
|
| 34 |
+
"train_steps_per_second": 0.267,
|
| 35 |
"total_flos": 0.0,
|
| 36 |
+
"train_loss": 0.5289894797128746,
|
| 37 |
+
"epoch": 1.0
|
| 38 |
}
|
| 39 |
}
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"backend": "tokenizers",
|
| 3 |
+
"bos_token": "<|startoftext|>",
|
| 4 |
+
"clean_up_tokenization_spaces": false,
|
| 5 |
+
"eos_token": "<|im_end|>",
|
| 6 |
+
"is_local": false,
|
| 7 |
+
"legacy": false,
|
| 8 |
+
"model_input_names": [
|
| 9 |
+
"input_ids",
|
| 10 |
+
"attention_mask"
|
| 11 |
+
],
|
| 12 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 13 |
+
"pad_token": "<|pad|>",
|
| 14 |
+
"padding_side": "right",
|
| 15 |
+
"sp_model_kwargs": {},
|
| 16 |
+
"spaces_between_special_tokens": false,
|
| 17 |
+
"tokenizer_class": "TokenizersBackend",
|
| 18 |
+
"use_default_system_prompt": false,
|
| 19 |
+
"use_fast": true
|
| 20 |
+
}
|
train10k.txt
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
========================================
|
| 2 |
+
DPO Full Fine-Tuning
|
| 3 |
+
========================================
|
| 4 |
+
Model : LiquidAI/LFM2.5-1.2B-Instruct
|
| 5 |
+
Dataset : xinlai/Math-Step-DPO-10K
|
| 6 |
+
Epochs : 1
|
| 7 |
+
Batch size : 4 (grad_accum=4, eff=16)
|
| 8 |
+
Learning rate : 2e-6
|
| 9 |
+
DPO beta : 0.2
|
| 10 |
+
Reference : NF4 4-bit (pass --no_ref_4bit for bfloat16)
|
| 11 |
+
Output dir : models
|
| 12 |
+
========================================
|
| 13 |
+
|
| 14 |
+
[dpo_train] Run : dpo_fft_LFM2.5-1.2B-Instruct_xinlai__Math-Step-DPO-10K_20260223_022854
|
| 15 |
+
[dpo_train] Output : models/dpo_fft_LFM2.5-1.2B-Instruct_xinlai__Math-Step-DPO-10K_20260223_022854
|
| 16 |
+
[dpo_train] Loading dataset: xinlai/Math-Step-DPO-10K split=train
|
| 17 |
+
[dpo_train] Full size : 10,795 rows | columns: ['dataset', 'prompt', 'initial_reason_steps', 'chosen', 'rejected', 'full_chosen', 'full_rejected', 'answer']
|
| 18 |
+
[dpo_train] Columns : instruction='initial_reason_steps' chosen='chosen' rejected='rejected'
|
| 19 |
+
[dpo_train] After cleaning: 10,795 rows
|
| 20 |
+
[dpo_train] Train: 10,255 Eval: 540
|
| 21 |
+
[dpo_train] Loading policy model (bfloat16, trainable) …
|
| 22 |
+
[dpo_train] Loading reference model (bfloat16, frozen) …
|
| 23 |
+
[dpo_train] Policy params : 1170M (all trainable)
|
| 24 |
+
|
| 25 |
+
[dpo_train] Starting DPO full fine-tuning (epochs=1 eff_batch=16) …
|
| 26 |
+
|
| 27 |
+
{'loss': '0.6923', 'grad_norm': '54.5', 'learning_rate': '2.769e-07', 'rewards/chosen': '0.009294', 'rewards/rejected': '0.004497', 'rewards/accuracies': '0.4187', 'rewards/margins': '0.004797', 'logps/chosen': '-129.8', 'logps/rejected': '-139.6', 'logits/chosen': '-0.9724', 'logits/rejected': '-0.9586', 'epoch': '0.0156'}
|
| 28 |
+
{'loss': '0.6889', 'grad_norm': '42.75', 'learning_rate': '5.846e-07', 'rewards/chosen': '-0.0111', 'rewards/rejected': '-0.02361', 'rewards/accuracies': '0.5375', 'rewards/margins': '0.01251', 'logps/chosen': '-132.7', 'logps/rejected': '-141.9', 'logits/chosen': '-0.9568', 'logits/rejected': '-0.9363', 'epoch': '0.0312'}
|
| 29 |
+
{'loss': '0.672', 'grad_norm': '52.25', 'learning_rate': '8.923e-07', 'rewards/chosen': '-0.1026', 'rewards/rejected': '-0.1517', 'rewards/accuracies': '0.6313', 'rewards/margins': '0.04918', 'logps/chosen': '-139.9', 'logps/rejected': '-141.5', 'logits/chosen': '-0.95', 'logits/rejected': '-0.9532', 'epoch': '0.0468'}
|
| 30 |
+
{'loss': '0.6529', 'grad_norm': '49.5', 'learning_rate': '1.2e-06', 'rewards/chosen': '-0.2349', 'rewards/rejected': '-0.3385', 'rewards/accuracies': '0.6375', 'rewards/margins': '0.1035', 'logps/chosen': '-139.2', 'logps/rejected': '-149.2', 'logits/chosen': '-1', 'logits/rejected': '-1.021', 'epoch': '0.0624'}
|
| 31 |
+
{'loss': '0.6221', 'grad_norm': '42.75', 'learning_rate': '1.508e-06', 'rewards/chosen': '-0.4437', 'rewards/rejected': '-0.6489', 'rewards/accuracies': '0.6687', 'rewards/margins': '0.2052', 'logps/chosen': '-129.3', 'logps/rejected': '-140.3', 'logits/chosen': '-0.999', 'logits/rejected': '-1.017', 'epoch': '0.078'}
|
| 32 |
+
{'loss': '0.6159', 'grad_norm': '39.5', 'learning_rate': '1.815e-06', 'rewards/chosen': '-0.5384', 'rewards/rejected': '-0.8259', 'rewards/accuracies': '0.6062', 'rewards/margins': '0.2875', 'logps/chosen': '-141.1', 'logps/rejected': '-143.8', 'logits/chosen': '-1.001', 'logits/rejected': '-0.978', 'epoch': '0.0936'}
|
| 33 |
+
{'loss': '0.5934', 'grad_norm': '47', 'learning_rate': '2e-06', 'rewards/chosen': '-0.2281', 'rewards/rejected': '-0.5834', 'rewards/accuracies': '0.6562', 'rewards/margins': '0.3553', 'logps/chosen': '-135.7', 'logps/rejected': '-145.3', 'logits/chosen': '-0.9597', 'logits/rejected': '-0.9586', 'epoch': '0.1092'}
|
| 34 |
+
{'loss': '0.5266', 'grad_norm': '40.75', 'learning_rate': '1.997e-06', 'rewards/chosen': '-0.2324', 'rewards/rejected': '-0.9226', 'rewards/accuracies': '0.6375', 'rewards/margins': '0.6902', 'logps/chosen': '-135', 'logps/rejected': '-149.8', 'logits/chosen': '-0.9169', 'logits/rejected': '-0.958', 'epoch': '0.1248'}
|
| 35 |
+
{'loss': '0.5594', 'grad_norm': '39.75', 'learning_rate': '1.991e-06', 'rewards/chosen': '-0.4102', 'rewards/rejected': '-1.067', 'rewards/accuracies': '0.6875', 'rewards/margins': '0.6563', 'logps/chosen': '-140.4', 'logps/rejected': '-161.5', 'logits/chosen': '-0.9946', 'logits/rejected': '-1.007', 'epoch': '0.1404'}
|
| 36 |
+
{'loss': '0.5212', 'grad_norm': '42', 'learning_rate': '1.983e-06', 'rewards/chosen': '-0.4026', 'rewards/rejected': '-1.258', 'rewards/accuracies': '0.7125', 'rewards/margins': '0.8554', 'logps/chosen': '-140.5', 'logps/rejected': '-151.5', 'logits/chosen': '-0.9036', 'logits/rejected': '-0.9337', 'epoch': '0.156'}
|
| 37 |
+
{'eval_loss': '0.5339', 'eval_runtime': '40.28', 'eval_samples_per_second': '13.41', 'eval_steps_per_second': '3.351', 'eval_rewards/chosen': '-0.4095', 'eval_rewards/rejected': '-1.24', 'eval_rewards/accuracies': '0.6593', 'eval_rewards/margins': '0.8309', 'eval_logps/chosen': '-141', 'eval_logps/rejected': '-154.7', 'eval_logits/chosen': '-0.9475', 'eval_logits/rejected': '-0.9603', 'epoch': '0.156'}
|
| 38 |
+
{'loss': '0.58', 'grad_norm': '55.5', 'learning_rate': '1.971e-06', 'rewards/chosen': '-0.3429', 'rewards/rejected': '-0.9838', 'rewards/accuracies': '0.6438', 'rewards/margins': '0.6409', 'logps/chosen': '-148.6', 'logps/rejected': '-154.7', 'logits/chosen': '-0.9508', 'logits/rejected': '-0.9895', 'epoch': '0.1716'}
|
| 39 |
+
{'loss': '0.5932', 'grad_norm': '50.5', 'learning_rate': '1.957e-06', 'rewards/chosen': '-0.4143', 'rewards/rejected': '-1.028', 'rewards/accuracies': '0.6187', 'rewards/margins': '0.6136', 'logps/chosen': '-135.8', 'logps/rejected': '-151.8', 'logits/chosen': '-0.9729', 'logits/rejected': '-0.9898', 'epoch': '0.1872'}
|
| 40 |
+
{'loss': '0.5053', 'grad_norm': '51', 'learning_rate': '1.94e-06', 'rewards/chosen': '-0.538', 'rewards/rejected': '-1.577', 'rewards/accuracies': '0.675', 'rewards/margins': '1.039', 'logps/chosen': '-137.6', 'logps/rejected': '-160.1', 'logits/chosen': '-0.9419', 'logits/rejected': '-0.9662', 'epoch': '0.2028'}
|
| 41 |
+
{'loss': '0.4892', 'grad_norm': '33', 'learning_rate': '1.92e-06', 'rewards/chosen': '-0.5704', 'rewards/rejected': '-1.761', 'rewards/accuracies': '0.6875', 'rewards/margins': '1.191', 'logps/chosen': '-139.2', 'logps/rejected': '-153.3', 'logits/chosen': '-0.9681', 'logits/rejected': '-1.009', 'epoch': '0.2184'}
|
| 42 |
+
{'loss': '0.5108', 'grad_norm': '36.5', 'learning_rate': '1.897e-06', 'rewards/chosen': '-0.7801', 'rewards/rejected': '-1.915', 'rewards/accuracies': '0.6938', 'rewards/margins': '1.134', 'logps/chosen': '-146.1', 'logps/rejected': '-160.9', 'logits/chosen': '-0.9884', 'logits/rejected': '-1.018', 'epoch': '0.234'}
|
| 43 |
+
{'loss': '0.5325', 'grad_norm': '40.75', 'learning_rate': '1.871e-06', 'rewards/chosen': '-0.6749', 'rewards/rejected': '-1.703', 'rewards/accuracies': '0.6625', 'rewards/margins': '1.028', 'logps/chosen': '-142.7', 'logps/rejected': '-156.6', 'logits/chosen': '-0.9584', 'logits/rejected': '-0.9916', 'epoch': '0.2496'}
|
| 44 |
+
{'loss': '0.4619', 'grad_norm': '40.25', 'learning_rate': '1.843e-06', 'rewards/chosen': '-0.3065', 'rewards/rejected': '-1.582', 'rewards/accuracies': '0.7312', 'rewards/margins': '1.276', 'logps/chosen': '-138.2', 'logps/rejected': '-153.2', 'logits/chosen': '-0.9471', 'logits/rejected': '-0.9452', 'epoch': '0.2652'}
|
| 45 |
+
{'loss': '0.542', 'grad_norm': '51', 'learning_rate': '1.813e-06', 'rewards/chosen': '-0.2091', 'rewards/rejected': '-1.171', 'rewards/accuracies': '0.6375', 'rewards/margins': '0.9614', 'logps/chosen': '-143', 'logps/rejected': '-152.9', 'logits/chosen': '-0.9258', 'logits/rejected': '-0.9312', 'epoch': '0.2808'}
|
| 46 |
+
{'loss': '0.5062', 'grad_norm': '44.5', 'learning_rate': '1.78e-06', 'rewards/chosen': '-0.1293', 'rewards/rejected': '-1.125', 'rewards/accuracies': '0.725', 'rewards/margins': '0.9959', 'logps/chosen': '-136', 'logps/rejected': '-146.1', 'logits/chosen': '-0.9742', 'logits/rejected': '-0.9909', 'epoch': '0.2964'}
|
| 47 |
+
{'loss': '0.5246', 'grad_norm': '40.25', 'learning_rate': '1.745e-06', 'rewards/chosen': '-0.1886', 'rewards/rejected': '-1.127', 'rewards/accuracies': '0.675', 'rewards/margins': '0.9379', 'logps/chosen': '-142', 'logps/rejected': '-156.9', 'logits/chosen': '-0.9913', 'logits/rejected': '-0.9865', 'epoch': '0.312'}
|
| 48 |
+
{'eval_loss': '0.5121', 'eval_runtime': '40.17', 'eval_samples_per_second': '13.44', 'eval_steps_per_second': '3.36', 'eval_rewards/chosen': '-0.2147', 'eval_rewards/rejected': '-1.269', 'eval_rewards/accuracies': '0.687', 'eval_rewards/margins': '1.055', 'eval_logps/chosen': '-140', 'eval_logps/rejected': '-154.8', 'eval_logits/chosen': '-0.955', 'eval_logits/rejected': '-0.9694', 'epoch': '0.312'}
|
| 49 |
+
{'loss': '0.5359', 'grad_norm': '41', 'learning_rate': '1.707e-06', 'rewards/chosen': '-0.3423', 'rewards/rejected': '-1.273', 'rewards/accuracies': '0.6562', 'rewards/margins': '0.9307', 'logps/chosen': '-140.6', 'logps/rejected': '-148.6', 'logits/chosen': '-1.003', 'logits/rejected': '-1.016', 'epoch': '0.3276'}
|
| 50 |
+
{'loss': '0.5775', 'grad_norm': '49', 'learning_rate': '1.668e-06', 'rewards/chosen': '-0.2953', 'rewards/rejected': '-1.016', 'rewards/accuracies': '0.6125', 'rewards/margins': '0.721', 'logps/chosen': '-135.6', 'logps/rejected': '-144.4', 'logits/chosen': '-0.9878', 'logits/rejected': '-0.9839', 'epoch': '0.3432'}
|
| 51 |
+
{'loss': '0.5026', 'grad_norm': '43.5', 'learning_rate': '1.626e-06', 'rewards/chosen': '-0.1806', 'rewards/rejected': '-1.201', 'rewards/accuracies': '0.7063', 'rewards/margins': '1.02', 'logps/chosen': '-143.2', 'logps/rejected': '-153.5', 'logits/chosen': '-0.9418', 'logits/rejected': '-0.9669', 'epoch': '0.3588'}
|
| 52 |
+
{'loss': '0.5222', 'grad_norm': '44.25', 'learning_rate': '1.582e-06', 'rewards/chosen': '0.05273', 'rewards/rejected': '-0.954', 'rewards/accuracies': '0.6687', 'rewards/margins': '1.007', 'logps/chosen': '-136.3', 'logps/rejected': '-148.8', 'logits/chosen': '-0.9431', 'logits/rejected': '-0.9376', 'epoch': '0.3744'}
|
| 53 |
+
{'loss': '0.4862', 'grad_norm': '41.5', 'learning_rate': '1.537e-06', 'rewards/chosen': '0.05254', 'rewards/rejected': '-1.19', 'rewards/accuracies': '0.6687', 'rewards/margins': '1.243', 'logps/chosen': '-135.3', 'logps/rejected': '-151', 'logits/chosen': '-0.9207', 'logits/rejected': '-0.9492', 'epoch': '0.39'}
|
| 54 |
+
{'loss': '0.5452', 'grad_norm': '66', 'learning_rate': '1.491e-06', 'rewards/chosen': '-0.02821', 'rewards/rejected': '-0.986', 'rewards/accuracies': '0.625', 'rewards/margins': '0.9578', 'logps/chosen': '-136.3', 'logps/rejected': '-151.1', 'logits/chosen': '-0.9681', 'logits/rejected': '-1.002', 'epoch': '0.4056'}
|
| 55 |
+
{'loss': '0.5194', 'grad_norm': '36.75', 'learning_rate': '1.442e-06', 'rewards/chosen': '-0.1964', 'rewards/rejected': '-1.143', 'rewards/accuracies': '0.7063', 'rewards/margins': '0.9462', 'logps/chosen': '-144', 'logps/rejected': '-149.6', 'logits/chosen': '-0.9456', 'logits/rejected': '-0.9882', 'epoch': '0.4212'}
|
| 56 |
+
{'loss': '0.4679', 'grad_norm': '36.5', 'learning_rate': '1.393e-06', 'rewards/chosen': '-0.03599', 'rewards/rejected': '-1.404', 'rewards/accuracies': '0.7437', 'rewards/margins': '1.368', 'logps/chosen': '-143.2', 'logps/rejected': '-149.7', 'logits/chosen': '-0.9715', 'logits/rejected': '-1', 'epoch': '0.4368'}
|
| 57 |
+
{'loss': '0.5053', 'grad_norm': '50.75', 'learning_rate': '1.342e-06', 'rewards/chosen': '0.1214', 'rewards/rejected': '-0.8408', 'rewards/accuracies': '0.7', 'rewards/margins': '0.9622', 'logps/chosen': '-137.8', 'logps/rejected': '-144.2', 'logits/chosen': '-1.024', 'logits/rejected': '-0.9874', 'epoch': '0.4524'}
|
| 58 |
+
{'loss': '0.4918', 'grad_norm': '41.5', 'learning_rate': '1.29e-06', 'rewards/chosen': '0.08779', 'rewards/rejected': '-0.99', 'rewards/accuracies': '0.7437', 'rewards/margins': '1.078', 'logps/chosen': '-136.1', 'logps/rejected': '-149.7', 'logits/chosen': '-0.9538', 'logits/rejected': '-0.991', 'epoch': '0.468'}
|
| 59 |
+
{'eval_loss': '0.5046', 'eval_runtime': '40.13', 'eval_samples_per_second': '13.46', 'eval_steps_per_second': '3.364', 'eval_rewards/chosen': '0.1132', 'eval_rewards/rejected': '-1.029', 'eval_rewards/accuracies': '0.6741', 'eval_rewards/margins': '1.142', 'eval_logps/chosen': '-138.4', 'eval_logps/rejected': '-153.6', 'eval_logits/chosen': '-0.9587', 'eval_logits/rejected': '-0.9758', 'epoch': '0.468'}
|
| 60 |
+
{'loss': '0.4804', 'grad_norm': '41.25', 'learning_rate': '1.238e-06', 'rewards/chosen': '0.05371', 'rewards/rejected': '-1.108', 'rewards/accuracies': '0.7875', 'rewards/margins': '1.162', 'logps/chosen': '-132.4', 'logps/rejected': '-152.8', 'logits/chosen': '-0.9403', 'logits/rejected': '-0.9417', 'epoch': '0.4836'}
|
| 61 |
+
{'loss': '0.5502', 'grad_norm': '54.5', 'learning_rate': '1.184e-06', 'rewards/chosen': '0.02032', 'rewards/rejected': '-0.762', 'rewards/accuracies': '0.7125', 'rewards/margins': '0.7824', 'logps/chosen': '-139.8', 'logps/rejected': '-155.8', 'logits/chosen': '-1.034', 'logits/rejected': '-0.9944', 'epoch': '0.4992'}
|
| 62 |
+
{'loss': '0.5258', 'grad_norm': '36.25', 'learning_rate': '1.131e-06', 'rewards/chosen': '0.01367', 'rewards/rejected': '-1.062', 'rewards/accuracies': '0.6562', 'rewards/margins': '1.076', 'logps/chosen': '-139.7', 'logps/rejected': '-149', 'logits/chosen': '-1.02', 'logits/rejected': '-1.025', 'epoch': '0.5148'}
|
| 63 |
+
{'loss': '0.513', 'grad_norm': '73.5', 'learning_rate': '1.076e-06', 'rewards/chosen': '-0.08328', 'rewards/rejected': '-1.046', 'rewards/accuracies': '0.7312', 'rewards/margins': '0.9629', 'logps/chosen': '-145', 'logps/rejected': '-151.1', 'logits/chosen': '-0.9922', 'logits/rejected': '-1.009', 'epoch': '0.5304'}
|
| 64 |
+
{'loss': '0.5549', 'grad_norm': '47.5', 'learning_rate': '1.022e-06', 'rewards/chosen': '-0.1006', 'rewards/rejected': '-0.9635', 'rewards/accuracies': '0.7', 'rewards/margins': '0.8629', 'logps/chosen': '-138.2', 'logps/rejected': '-153.4', 'logits/chosen': '-0.9654', 'logits/rejected': '-0.9779', 'epoch': '0.546'}
|
| 65 |
+
{'loss': '0.5152', 'grad_norm': '37.5', 'learning_rate': '9.673e-07', 'rewards/chosen': '-0.1695', 'rewards/rejected': '-1.247', 'rewards/accuracies': '0.6438', 'rewards/margins': '1.077', 'logps/chosen': '-132', 'logps/rejected': '-147.7', 'logits/chosen': '-0.9814', 'logits/rejected': '-0.9655', 'epoch': '0.5616'}
|
| 66 |
+
{'loss': '0.5521', 'grad_norm': '35.25', 'learning_rate': '9.128e-07', 'rewards/chosen': '-0.1743', 'rewards/rejected': '-0.9556', 'rewards/accuracies': '0.7', 'rewards/margins': '0.7813', 'logps/chosen': '-133.1', 'logps/rejected': '-141.6', 'logits/chosen': '-0.9953', 'logits/rejected': '-1.015', 'epoch': '0.5772'}
|
| 67 |
+
{'loss': '0.5144', 'grad_norm': '48.5', 'learning_rate': '8.587e-07', 'rewards/chosen': '-0.191', 'rewards/rejected': '-1.271', 'rewards/accuracies': '0.6625', 'rewards/margins': '1.08', 'logps/chosen': '-138.5', 'logps/rejected': '-154.2', 'logits/chosen': '-1.013', 'logits/rejected': '-1.014', 'epoch': '0.5928'}
|
| 68 |
+
{'loss': '0.5276', 'grad_norm': '45.5', 'learning_rate': '8.049e-07', 'rewards/chosen': '-0.1877', 'rewards/rejected': '-1.247', 'rewards/accuracies': '0.6875', 'rewards/margins': '1.059', 'logps/chosen': '-139.2', 'logps/rejected': '-144.6', 'logits/chosen': '-0.9666', 'logits/rejected': '-0.9784', 'epoch': '0.6084'}
|
| 69 |
+
{'loss': '0.5085', 'grad_norm': '38.75', 'learning_rate': '7.517e-07', 'rewards/chosen': '-0.2559', 'rewards/rejected': '-1.253', 'rewards/accuracies': '0.725', 'rewards/margins': '0.9974', 'logps/chosen': '-139.7', 'logps/rejected': '-151.4', 'logits/chosen': '-0.9496', 'logits/rejected': '-0.9449', 'epoch': '0.624'}
|
| 70 |
+
{'eval_loss': '0.5048', 'eval_runtime': '40.12', 'eval_samples_per_second': '13.46', 'eval_steps_per_second': '3.365', 'eval_rewards/chosen': '-0.2371', 'eval_rewards/rejected': '-1.434', 'eval_rewards/accuracies': '0.687', 'eval_rewards/margins': '1.197', 'eval_logps/chosen': '-140.1', 'eval_logps/rejected': '-155.6', 'eval_logits/chosen': '-0.9802', 'eval_logits/rejected': '-0.9983', 'epoch': '0.624'}
|
| 71 |
+
{'loss': '0.5101', 'grad_norm': '39.75', 'learning_rate': '6.993e-07', 'rewards/chosen': '-0.2794', 'rewards/rejected': '-1.527', 'rewards/accuracies': '0.7312', 'rewards/margins': '1.247', 'logps/chosen': '-136.8', 'logps/rejected': '-152', 'logits/chosen': '-0.9347', 'logits/rejected': '-0.9738', 'epoch': '0.6396'}
|
| 72 |
+
{'loss': '0.5132', 'grad_norm': '47', 'learning_rate': '6.477e-07', 'rewards/chosen': '-0.2584', 'rewards/rejected': '-1.329', 'rewards/accuracies': '0.7125', 'rewards/margins': '1.071', 'logps/chosen': '-132.8', 'logps/rejected': '-148.7', 'logits/chosen': '-0.951', 'logits/rejected': '-0.9631', 'epoch': '0.6552'}
|
| 73 |
+
{'loss': '0.5442', 'grad_norm': '57', 'learning_rate': '5.973e-07', 'rewards/chosen': '-0.354', 'rewards/rejected': '-1.106', 'rewards/accuracies': '0.7188', 'rewards/margins': '0.7517', 'logps/chosen': '-138.7', 'logps/rejected': '-154.1', 'logits/chosen': '-1.014', 'logits/rejected': '-1.03', 'epoch': '0.6708'}
|
| 74 |
+
{'loss': '0.4362', 'grad_norm': '33.25', 'learning_rate': '5.48e-07', 'rewards/chosen': '-0.1957', 'rewards/rejected': '-1.659', 'rewards/accuracies': '0.75', 'rewards/margins': '1.464', 'logps/chosen': '-133.2', 'logps/rejected': '-154.8', 'logits/chosen': '-0.999', 'logits/rejected': '-0.9674', 'epoch': '0.6864'}
|
| 75 |
+
{'loss': '0.4756', 'grad_norm': '39', 'learning_rate': '5e-07', 'rewards/chosen': '-0.24', 'rewards/rejected': '-1.591', 'rewards/accuracies': '0.7875', 'rewards/margins': '1.351', 'logps/chosen': '-146.1', 'logps/rejected': '-159.7', 'logits/chosen': '-0.9821', 'logits/rejected': '-0.9917', 'epoch': '0.702'}
|
| 76 |
+
{'loss': '0.5326', 'grad_norm': '50.25', 'learning_rate': '4.535e-07', 'rewards/chosen': '-0.2618', 'rewards/rejected': '-1.15', 'rewards/accuracies': '0.7', 'rewards/margins': '0.8884', 'logps/chosen': '-138.5', 'logps/rejected': '-143.4', 'logits/chosen': '-0.9793', 'logits/rejected': '-1.013', 'epoch': '0.7176'}
|
| 77 |
+
{'loss': '0.5539', 'grad_norm': '47.5', 'learning_rate': '4.087e-07', 'rewards/chosen': '-0.2694', 'rewards/rejected': '-1.051', 'rewards/accuracies': '0.675', 'rewards/margins': '0.7815', 'logps/chosen': '-133.2', 'logps/rejected': '-145.2', 'logits/chosen': '-0.9868', 'logits/rejected': '-1.012', 'epoch': '0.7332'}
|
| 78 |
+
{'loss': '0.4758', 'grad_norm': '34.25', 'learning_rate': '3.656e-07', 'rewards/chosen': '-0.2661', 'rewards/rejected': '-1.672', 'rewards/accuracies': '0.7437', 'rewards/margins': '1.406', 'logps/chosen': '-138.3', 'logps/rejected': '-152.5', 'logits/chosen': '-0.9374', 'logits/rejected': '-0.9612', 'epoch': '0.7488'}
|
| 79 |
+
{'loss': '0.5411', 'grad_norm': '48.75', 'learning_rate': '3.244e-07', 'rewards/chosen': '-0.1937', 'rewards/rejected': '-1.203', 'rewards/accuracies': '0.675', 'rewards/margins': '1.009', 'logps/chosen': '-139.5', 'logps/rejected': '-150.4', 'logits/chosen': '-1.003', 'logits/rejected': '-1.018', 'epoch': '0.7644'}
|
| 80 |
+
{'loss': '0.4845', 'grad_norm': '40.75', 'learning_rate': '2.852e-07', 'rewards/chosen': '-0.2131', 'rewards/rejected': '-1.448', 'rewards/accuracies': '0.7312', 'rewards/margins': '1.235', 'logps/chosen': '-137.3', 'logps/rejected': '-154.6', 'logits/chosen': '-0.9683', 'logits/rejected': '-0.9468', 'epoch': '0.78'}
|
| 81 |
+
{'eval_loss': '0.4993', 'eval_runtime': '40.18', 'eval_samples_per_second': '13.44', 'eval_steps_per_second': '3.36', 'eval_rewards/chosen': '-0.2173', 'eval_rewards/rejected': '-1.439', 'eval_rewards/accuracies': '0.7', 'eval_rewards/margins': '1.222', 'eval_logps/chosen': '-140', 'eval_logps/rejected': '-155.7', 'eval_logits/chosen': '-0.9807', 'eval_logits/rejected': '-0.9991', 'epoch': '0.78'}
|
| 82 |
+
{'loss': '0.4628', 'grad_norm': '46', 'learning_rate': '2.482e-07', 'rewards/chosen': '-0.1764', 'rewards/rejected': '-1.582', 'rewards/accuracies': '0.7375', 'rewards/margins': '1.406', 'logps/chosen': '-135.9', 'logps/rejected': '-150.9', 'logits/chosen': '-0.9796', 'logits/rejected': '-1.017', 'epoch': '0.7956'}
|
| 83 |
+
{'loss': '0.4962', 'grad_norm': '52.25', 'learning_rate': '2.133e-07', 'rewards/chosen': '-0.2718', 'rewards/rejected': '-1.503', 'rewards/accuracies': '0.7375', 'rewards/margins': '1.231', 'logps/chosen': '-139.9', 'logps/rejected': '-151.5', 'logits/chosen': '-0.9783', 'logits/rejected': '-0.9835', 'epoch': '0.8112'}
|
| 84 |
+
{'loss': '0.5023', 'grad_norm': '36.5', 'learning_rate': '1.808e-07', 'rewards/chosen': '-0.1669', 'rewards/rejected': '-1.383', 'rewards/accuracies': '0.6938', 'rewards/margins': '1.216', 'logps/chosen': '-143', 'logps/rejected': '-156.3', 'logits/chosen': '-0.9845', 'logits/rejected': '-1.014', 'epoch': '0.8268'}
|
| 85 |
+
{'loss': '0.489', 'grad_norm': '49.5', 'learning_rate': '1.508e-07', 'rewards/chosen': '-0.1592', 'rewards/rejected': '-1.255', 'rewards/accuracies': '0.8', 'rewards/margins': '1.095', 'logps/chosen': '-138.2', 'logps/rejected': '-147.7', 'logits/chosen': '-1.021', 'logits/rejected': '-1.025', 'epoch': '0.8424'}
|
| 86 |
+
{'loss': '0.5108', 'grad_norm': '40.5', 'learning_rate': '1.233e-07', 'rewards/chosen': '-0.1103', 'rewards/rejected': '-1.092', 'rewards/accuracies': '0.7', 'rewards/margins': '0.982', 'logps/chosen': '-135.1', 'logps/rejected': '-144.9', 'logits/chosen': '-0.9653', 'logits/rejected': '-0.9778', 'epoch': '0.858'}
|
| 87 |
+
{'loss': '0.4898', 'grad_norm': '40.25', 'learning_rate': '9.836e-08', 'rewards/chosen': '-0.1521', 'rewards/rejected': '-1.389', 'rewards/accuracies': '0.725', 'rewards/margins': '1.237', 'logps/chosen': '-132.6', 'logps/rejected': '-144.8', 'logits/chosen': '-0.9742', 'logits/rejected': '-1.003', 'epoch': '0.8736'}
|
| 88 |
+
{'loss': '0.5178', 'grad_norm': '44.25', 'learning_rate': '7.612e-08', 'rewards/chosen': '-0.2263', 'rewards/rejected': '-1.307', 'rewards/accuracies': '0.6687', 'rewards/margins': '1.081', 'logps/chosen': '-137.1', 'logps/rejected': '-151.2', 'logits/chosen': '-1.009', 'logits/rejected': '-1.01', 'epoch': '0.8892'}
|
| 89 |
+
{'loss': '0.5001', 'grad_norm': '29.38', 'learning_rate': '5.663e-08', 'rewards/chosen': '-0.196', 'rewards/rejected': '-1.202', 'rewards/accuracies': '0.7188', 'rewards/margins': '1.006', 'logps/chosen': '-137.1', 'logps/rejected': '-147.7', 'logits/chosen': '-0.9683', 'logits/rejected': '-1.018', 'epoch': '0.9048'}
|
| 90 |
+
{'loss': '0.4811', 'grad_norm': '44.5', 'learning_rate': '3.995e-08', 'rewards/chosen': '-0.2124', 'rewards/rejected': '-1.397', 'rewards/accuracies': '0.7437', 'rewards/margins': '1.184', 'logps/chosen': '-136.5', 'logps/rejected': '-152.3', 'logits/chosen': '-1.023', 'logits/rejected': '-1.046', 'epoch': '0.9204'}
|
| 91 |
+
{'loss': '0.5045', 'grad_norm': '27.75', 'learning_rate': '2.612e-08', 'rewards/chosen': '-0.2044', 'rewards/rejected': '-1.235', 'rewards/accuracies': '0.7437', 'rewards/margins': '1.03', 'logps/chosen': '-134.6', 'logps/rejected': '-151.1', 'logits/chosen': '-0.9686', 'logits/rejected': '-0.9986', 'epoch': '0.936'}
|
| 92 |
+
{'eval_loss': '0.5009', 'eval_runtime': '40.15', 'eval_samples_per_second': '13.45', 'eval_steps_per_second': '3.362', 'eval_rewards/chosen': '-0.2003', 'eval_rewards/rejected': '-1.422', 'eval_rewards/accuracies': '0.7148', 'eval_rewards/margins': '1.221', 'eval_logps/chosen': '-140', 'eval_logps/rejected': '-155.6', 'eval_logits/chosen': '-0.9798', 'eval_logits/rejected': '-0.9978', 'epoch': '0.936'}
|
| 93 |
+
{'loss': '0.5156', 'grad_norm': '43', 'learning_rate': '1.519e-08', 'rewards/chosen': '-0.2099', 'rewards/rejected': '-1.355', 'rewards/accuracies': '0.6938', 'rewards/margins': '1.145', 'logps/chosen': '-135.9', 'logps/rejected': '-156.8', 'logits/chosen': '-0.9927', 'logits/rejected': '-1.001', 'epoch': '0.9516'}
|
| 94 |
+
{'loss': '0.5259', 'grad_norm': '29.12', 'learning_rate': '7.19e-09', 'rewards/chosen': '-0.2067', 'rewards/rejected': '-1.218', 'rewards/accuracies': '0.6625', 'rewards/margins': '1.011', 'logps/chosen': '-143.6', 'logps/rejected': '-154.5', 'logits/chosen': '-1.017', 'logits/rejected': '-1.012', 'epoch': '0.9672'}
|
| 95 |
+
{'loss': '0.4977', 'grad_norm': '41', 'learning_rate': '2.141e-09', 'rewards/chosen': '-0.2147', 'rewards/rejected': '-1.301', 'rewards/accuracies': '0.7563', 'rewards/margins': '1.087', 'logps/chosen': '-142.9', 'logps/rejected': '-158.3', 'logits/chosen': '-0.9882', 'logits/rejected': '-0.9938', 'epoch': '0.9828'}
|
| 96 |
+
{'loss': '0.5042', 'grad_norm': '50.5', 'learning_rate': '5.949e-11', 'rewards/chosen': '-0.1956', 'rewards/rejected': '-1.333', 'rewards/accuracies': '0.7437', 'rewards/margins': '1.138', 'logps/chosen': '-136.6', 'logps/rejected': '-152.5', 'logits/chosen': '-0.9518', 'logits/rejected': '-0.9941', 'epoch': '0.9984'}
|
| 97 |
+
{'train_runtime': '2398', 'train_samples_per_second': '4.276', 'train_steps_per_second': '0.267', 'train_loss': '0.529', 'epoch': '1'}
|
| 98 |
+
[dpo_train] Final model saved → models/dpo_fft_LFM2.5-1.2B-Instruct_xinlai__Math-Step-DPO-10K_20260223_022854/final_model
|
| 99 |
+
[dpo_train] Run metadata → models/dpo_fft_LFM2.5-1.2B-Instruct_xinlai__Math-Step-DPO-10K_20260223_022854/run_meta.json
|
| 100 |
+
|
| 101 |
+
[dpo_train] Done.
|
training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c937a92854585ec95fb58489fcc514bfc02871c7b725449e33dbfcf1c2ffc49d
|
| 3 |
+
size 6225
|