jtmaxsoft commited on
Commit
f948a22
·
verified ·
1 Parent(s): fc417aa

Upload bf16 - Migration QLoRA DPO

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - ja
4
+ - ko
5
+ - en
6
+ license: apache-2.0
7
+ base_model: Qwen/Qwen3.5-9B
8
+ tags:
9
+ - migration
10
+ - mainframe
11
+ - cobol
12
+ - jcl
13
+ - assembler
14
+ - qlora
15
+ - dpo
16
+ - ofkms
17
+ library_name: transformers
18
+ pipeline_tag: text-generation
19
+ ---
20
+
21
+ # OFKMS Migration Design - Qwen3.5-9B DPO (bf16)
22
+
23
+ Mainframe migration design specialized model fine-tuned from Qwen3.5-9B.
24
+
25
+ ## Model Description
26
+
27
+ This model is fine-tuned for **COBOL/JCL/Assembler migration design** tasks,
28
+ trained on TmaxSoft Japan's proprietary migration knowledge base.
29
+
30
+ - **Base Model**: Qwen3.5-9B
31
+ - **Fine-tuning**: QLoRA (DPO)
32
+ - **Training Data**: 1,288 SFT entries + 1,288 DPO pairs
33
+ - **Languages**: Japanese (primary), Korean, English
34
+ - **Variant**: bf16
35
+
36
+ ## Training Details
37
+
38
+ - **Method**: QLoRA (rank=64, alpha=128)
39
+ - **Trainable params**: 174M / 8.4B (2.09%)
40
+ - **Epochs**: 3
41
+ - **Batch size**: 4 (gradient accumulation: 16, effective: 64)
42
+ - **Learning rate**: 2e-5 (cosine schedule)
43
+ - **Hardware**: NVIDIA A100 40GB
44
+
45
+ ## Supported Tasks
46
+
47
+ - COBOL source pattern analysis and conversion rules
48
+ - JCL to OpenFrame JCL migration
49
+ - Assembler to C/OFASM migration
50
+ - Migration design document generation
51
+ - Error pattern diagnosis (ABEND codes, JES messages)
52
+
53
+ ## Usage
54
+
55
+ ```python
56
+ from transformers import AutoModelForCausalLM, AutoTokenizer
57
+
58
+ model = AutoModelForCausalLM.from_pretrained("jtmaxsoft/OFKMS-Migration-Qwen3.5-9B-DPO")
59
+ tokenizer = AutoTokenizer.from_pretrained("jtmaxsoft/OFKMS-Migration-Qwen3.5-9B-DPO")
60
+
61
+ prompt = "COBOL PERFORM statement OpenFrame migration pattern"
62
+ inputs = tokenizer(prompt, return_tensors="pt")
63
+ outputs = model.generate(**inputs, max_new_tokens=512)
64
+ print(tokenizer.decode(outputs[0], skip_special_tokens=True))
65
+ ```
66
+
67
+ ## Organization
68
+
69
+ [TmaxSoft Japan](https://huggingface.co/jtmaxsoft)
chat_template.jinja ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0].role == 'system' %}
4
+ {{- messages[0].content + '\n\n' }}
5
+ {%- endif %}
6
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
7
+ {%- for tool in tools %}
8
+ {{- "\n" }}
9
+ {{- tool | tojson }}
10
+ {%- endfor %}
11
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
12
+ {%- else %}
13
+ {%- if messages[0].role == 'system' %}
14
+ {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
15
+ {%- endif %}
16
+ {%- endif %}
17
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
18
+ {%- for message in messages[::-1] %}
19
+ {%- set index = (messages|length - 1) - loop.index0 %}
20
+ {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
21
+ {%- set ns.multi_step_tool = false %}
22
+ {%- set ns.last_query_index = index %}
23
+ {%- endif %}
24
+ {%- endfor %}
25
+ {%- for message in messages %}
26
+ {%- if message.content is string %}
27
+ {%- set content = message.content %}
28
+ {%- else %}
29
+ {%- set content = '' %}
30
+ {%- endif %}
31
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
32
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
33
+ {%- elif message.role == "assistant" %}
34
+ {%- set reasoning_content = '' %}
35
+ {%- if message.reasoning_content is string %}
36
+ {%- set reasoning_content = message.reasoning_content %}
37
+ {%- else %}
38
+ {%- if '</think>' in content %}
39
+ {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
40
+ {%- set content = content.split('</think>')[-1].lstrip('\n') %}
41
+ {%- endif %}
42
+ {%- endif %}
43
+ {%- if loop.index0 > ns.last_query_index %}
44
+ {%- if loop.last or (not loop.last and reasoning_content) %}
45
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
46
+ {%- else %}
47
+ {{- '<|im_start|>' + message.role + '\n' + content }}
48
+ {%- endif %}
49
+ {%- else %}
50
+ {{- '<|im_start|>' + message.role + '\n' + content }}
51
+ {%- endif %}
52
+ {%- if message.tool_calls %}
53
+ {%- for tool_call in message.tool_calls %}
54
+ {%- if (loop.first and content) or (not loop.first) %}
55
+ {{- '\n' }}
56
+ {%- endif %}
57
+ {%- if tool_call.function %}
58
+ {%- set tool_call = tool_call.function %}
59
+ {%- endif %}
60
+ {{- '<tool_call>\n{"name": "' }}
61
+ {{- tool_call.name }}
62
+ {{- '", "arguments": ' }}
63
+ {%- if tool_call.arguments is string %}
64
+ {{- tool_call.arguments }}
65
+ {%- else %}
66
+ {{- tool_call.arguments | tojson }}
67
+ {%- endif %}
68
+ {{- '}\n</tool_call>' }}
69
+ {%- endfor %}
70
+ {%- endif %}
71
+ {{- '<|im_end|>\n' }}
72
+ {%- elif message.role == "tool" %}
73
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
74
+ {{- '<|im_start|>user' }}
75
+ {%- endif %}
76
+ {{- '\n<tool_response>\n' }}
77
+ {{- content }}
78
+ {{- '\n</tool_response>' }}
79
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
80
+ {{- '<|im_end|>\n' }}
81
+ {%- endif %}
82
+ {%- endif %}
83
+ {%- endfor %}
84
+ {%- if add_generation_prompt %}
85
+ {{- '<|im_start|>assistant\n' }}
86
+ {%- if enable_thinking is defined and enable_thinking is false %}
87
+ {{- '<think>\n\n</think>\n\n' }}
88
+ {%- endif %}
89
+ {%- endif %}
config.json ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "dtype": "bfloat16",
9
+ "eos_token_id": 151645,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 4096,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 12288,
15
+ "layer_types": [
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention"
52
+ ],
53
+ "max_position_embeddings": 40960,
54
+ "max_window_layers": 36,
55
+ "model_type": "qwen3",
56
+ "num_attention_heads": 32,
57
+ "num_hidden_layers": 36,
58
+ "num_key_value_heads": 8,
59
+ "pad_token_id": null,
60
+ "rms_norm_eps": 1e-06,
61
+ "rope_parameters": {
62
+ "rope_theta": 1000000,
63
+ "rope_type": "default"
64
+ },
65
+ "sliding_window": null,
66
+ "tie_word_embeddings": false,
67
+ "transformers_version": "5.3.0",
68
+ "use_cache": true,
69
+ "use_sliding_window": false,
70
+ "vocab_size": 151936
71
+ }
generation_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "temperature": 0.6,
10
+ "top_k": 20,
11
+ "top_p": 0.95,
12
+ "transformers_version": "5.3.0"
13
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b13ab529495892871ef1e774d60099b3cc4ced5fd5e36116557309de23e3a4fd
3
+ size 16381517208
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506
3
+ size 11422650
tokenizer_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "backend": "tokenizers",
4
+ "bos_token": null,
5
+ "clean_up_tokenization_spaces": false,
6
+ "eos_token": "<|im_end|>",
7
+ "errors": "replace",
8
+ "extra_special_tokens": [
9
+ "<|im_start|>",
10
+ "<|im_end|>",
11
+ "<|object_ref_start|>",
12
+ "<|object_ref_end|>",
13
+ "<|box_start|>",
14
+ "<|box_end|>",
15
+ "<|quad_start|>",
16
+ "<|quad_end|>",
17
+ "<|vision_start|>",
18
+ "<|vision_end|>",
19
+ "<|vision_pad|>",
20
+ "<|image_pad|>",
21
+ "<|video_pad|>"
22
+ ],
23
+ "is_local": true,
24
+ "model_max_length": 131072,
25
+ "pad_token": "<|endoftext|>",
26
+ "split_special_tokens": false,
27
+ "tokenizer_class": "Qwen2Tokenizer",
28
+ "unk_token": null
29
+ }
trainer_state.json ADDED
@@ -0,0 +1,514 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 2.0,
6
+ "eval_steps": 500,
7
+ "global_step": 322,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.062111801242236024,
14
+ "grad_norm": 2.031649589538574,
15
+ "learning_rate": 1.3636363636363636e-06,
16
+ "logits/chosen": -1.1706674098968506,
17
+ "logits/rejected": -1.0306828022003174,
18
+ "logps/chosen": -564.5164794921875,
19
+ "logps/rejected": -463.99603271484375,
20
+ "loss": 0.6916566371917725,
21
+ "rewards/accuracies": 0.4375,
22
+ "rewards/chosen": 0.010852223262190819,
23
+ "rewards/margins": 0.004624844063073397,
24
+ "rewards/rejected": 0.006227378733456135,
25
+ "step": 10
26
+ },
27
+ {
28
+ "epoch": 0.12422360248447205,
29
+ "grad_norm": 2.3162496089935303,
30
+ "learning_rate": 2.8787878787878793e-06,
31
+ "logits/chosen": -1.0546079874038696,
32
+ "logits/rejected": -0.9307195544242859,
33
+ "logps/chosen": -475.3399963378906,
34
+ "logps/rejected": -421.66693115234375,
35
+ "loss": 0.6910689353942872,
36
+ "rewards/accuracies": 0.5249999761581421,
37
+ "rewards/chosen": 0.020578766241669655,
38
+ "rewards/margins": 0.0066573722288012505,
39
+ "rewards/rejected": 0.013921394944190979,
40
+ "step": 20
41
+ },
42
+ {
43
+ "epoch": 0.18633540372670807,
44
+ "grad_norm": 2.447273015975952,
45
+ "learning_rate": 4.393939393939394e-06,
46
+ "logits/chosen": -1.1174654960632324,
47
+ "logits/rejected": -1.0690593719482422,
48
+ "logps/chosen": -606.117919921875,
49
+ "logps/rejected": -445.750244140625,
50
+ "loss": 0.6635114669799804,
51
+ "rewards/accuracies": 0.7250000238418579,
52
+ "rewards/chosen": 0.11121845245361328,
53
+ "rewards/margins": 0.06357013434171677,
54
+ "rewards/rejected": 0.047648314386606216,
55
+ "step": 30
56
+ },
57
+ {
58
+ "epoch": 0.2484472049689441,
59
+ "grad_norm": 2.597184896469116,
60
+ "learning_rate": 4.994684273748881e-06,
61
+ "logits/chosen": -1.0191378593444824,
62
+ "logits/rejected": -1.0478084087371826,
63
+ "logps/chosen": -552.3709716796875,
64
+ "logps/rejected": -423.89892578125,
65
+ "loss": 0.6465987205505371,
66
+ "rewards/accuracies": 0.737500011920929,
67
+ "rewards/chosen": 0.2686358094215393,
68
+ "rewards/margins": 0.10441793501377106,
69
+ "rewards/rejected": 0.16421787440776825,
70
+ "step": 40
71
+ },
72
+ {
73
+ "epoch": 0.3105590062111801,
74
+ "grad_norm": 1.94443941116333,
75
+ "learning_rate": 4.962281107627828e-06,
76
+ "logits/chosen": -1.295013427734375,
77
+ "logits/rejected": -0.8611756563186646,
78
+ "logps/chosen": -545.5277709960938,
79
+ "logps/rejected": -374.797119140625,
80
+ "loss": 0.5243598937988281,
81
+ "rewards/accuracies": 0.887499988079071,
82
+ "rewards/chosen": 0.5271034836769104,
83
+ "rewards/margins": 0.4082818925380707,
84
+ "rewards/rejected": 0.11882160604000092,
85
+ "step": 50
86
+ },
87
+ {
88
+ "epoch": 0.37267080745341613,
89
+ "grad_norm": 1.8612565994262695,
90
+ "learning_rate": 4.900810019766151e-06,
91
+ "logits/chosen": -1.106858253479004,
92
+ "logits/rejected": -0.8895740509033203,
93
+ "logps/chosen": -574.3599853515625,
94
+ "logps/rejected": -398.66595458984375,
95
+ "loss": 0.46582393646240233,
96
+ "rewards/accuracies": 0.8374999761581421,
97
+ "rewards/chosen": 1.0713833570480347,
98
+ "rewards/margins": 0.6883617639541626,
99
+ "rewards/rejected": 0.38302165269851685,
100
+ "step": 60
101
+ },
102
+ {
103
+ "epoch": 0.43478260869565216,
104
+ "grad_norm": 2.610835313796997,
105
+ "learning_rate": 4.8109966936634885e-06,
106
+ "logits/chosen": -0.9745047688484192,
107
+ "logits/rejected": -0.8878741264343262,
108
+ "logps/chosen": -603.6141357421875,
109
+ "logps/rejected": -422.431396484375,
110
+ "loss": 0.3722702980041504,
111
+ "rewards/accuracies": 0.862500011920929,
112
+ "rewards/chosen": 1.5592485666275024,
113
+ "rewards/margins": 1.0986794233322144,
114
+ "rewards/rejected": 0.46056899428367615,
115
+ "step": 70
116
+ },
117
+ {
118
+ "epoch": 0.4968944099378882,
119
+ "grad_norm": 1.7999701499938965,
120
+ "learning_rate": 4.693901400921782e-06,
121
+ "logits/chosen": -0.9704159498214722,
122
+ "logits/rejected": -0.8450535535812378,
123
+ "logps/chosen": -521.1560668945312,
124
+ "logps/rejected": -403.3153076171875,
125
+ "loss": 0.36702446937561034,
126
+ "rewards/accuracies": 0.875,
127
+ "rewards/chosen": 1.462435245513916,
128
+ "rewards/margins": 1.3280738592147827,
129
+ "rewards/rejected": 0.13436131179332733,
130
+ "step": 80
131
+ },
132
+ {
133
+ "epoch": 0.5590062111801242,
134
+ "grad_norm": 1.2426642179489136,
135
+ "learning_rate": 4.550906484440495e-06,
136
+ "logits/chosen": -0.9953195452690125,
137
+ "logits/rejected": -0.8482304811477661,
138
+ "logps/chosen": -531.6632080078125,
139
+ "logps/rejected": -381.96258544921875,
140
+ "loss": 0.28965752124786376,
141
+ "rewards/accuracies": 0.9375,
142
+ "rewards/chosen": 1.4531519412994385,
143
+ "rewards/margins": 1.6451427936553955,
144
+ "rewards/rejected": -0.19199064373970032,
145
+ "step": 90
146
+ },
147
+ {
148
+ "epoch": 0.6211180124223602,
149
+ "grad_norm": 0.7566975951194763,
150
+ "learning_rate": 4.38370003946948e-06,
151
+ "logits/chosen": -1.0843112468719482,
152
+ "logits/rejected": -0.9646870493888855,
153
+ "logps/chosen": -541.9859619140625,
154
+ "logps/rejected": -427.3182067871094,
155
+ "loss": 0.2813025951385498,
156
+ "rewards/accuracies": 0.9125000238418579,
157
+ "rewards/chosen": 1.0933747291564941,
158
+ "rewards/margins": 1.867444634437561,
159
+ "rewards/rejected": -0.7740699052810669,
160
+ "step": 100
161
+ },
162
+ {
163
+ "epoch": 0.6832298136645962,
164
+ "grad_norm": 1.8626788854599,
165
+ "learning_rate": 4.194255985169259e-06,
166
+ "logits/chosen": -1.16153883934021,
167
+ "logits/rejected": -1.1651047468185425,
168
+ "logps/chosen": -602.457763671875,
169
+ "logps/rejected": -449.3597106933594,
170
+ "loss": 0.1985023260116577,
171
+ "rewards/accuracies": 0.949999988079071,
172
+ "rewards/chosen": 0.8817703127861023,
173
+ "rewards/margins": 2.40962815284729,
174
+ "rewards/rejected": -1.5278576612472534,
175
+ "step": 110
176
+ },
177
+ {
178
+ "epoch": 0.7453416149068323,
179
+ "grad_norm": 1.6138923168182373,
180
+ "learning_rate": 3.984810761939578e-06,
181
+ "logits/chosen": -1.2249107360839844,
182
+ "logits/rejected": -1.1240571737289429,
183
+ "logps/chosen": -563.8658447265625,
184
+ "logps/rejected": -429.28021240234375,
185
+ "loss": 0.14866907596588136,
186
+ "rewards/accuracies": 0.9750000238418579,
187
+ "rewards/chosen": 0.36940866708755493,
188
+ "rewards/margins": 2.9657340049743652,
189
+ "rewards/rejected": -2.596325397491455,
190
+ "step": 120
191
+ },
192
+ {
193
+ "epoch": 0.8074534161490683,
194
+ "grad_norm": 1.6024304628372192,
195
+ "learning_rate": 3.757836929610803e-06,
196
+ "logits/chosen": -1.2749364376068115,
197
+ "logits/rejected": -1.2431113719940186,
198
+ "logps/chosen": -653.6280517578125,
199
+ "logps/rejected": -399.4922790527344,
200
+ "loss": 0.1276548385620117,
201
+ "rewards/accuracies": 0.9624999761581421,
202
+ "rewards/chosen": 0.43943363428115845,
203
+ "rewards/margins": 3.6862716674804688,
204
+ "rewards/rejected": -3.246838331222534,
205
+ "step": 130
206
+ },
207
+ {
208
+ "epoch": 0.8695652173913043,
209
+ "grad_norm": 1.489258885383606,
210
+ "learning_rate": 3.5160139781789615e-06,
211
+ "logits/chosen": -1.1981886625289917,
212
+ "logits/rejected": -1.1743550300598145,
213
+ "logps/chosen": -630.0133056640625,
214
+ "logps/rejected": -453.70233154296875,
215
+ "loss": 0.13757349252700807,
216
+ "rewards/accuracies": 0.9750000238418579,
217
+ "rewards/chosen": 1.116315484046936,
218
+ "rewards/margins": 3.7938003540039062,
219
+ "rewards/rejected": -2.6774849891662598,
220
+ "step": 140
221
+ },
222
+ {
223
+ "epoch": 0.9316770186335404,
224
+ "grad_norm": 1.529561996459961,
225
+ "learning_rate": 3.2621966956719265e-06,
226
+ "logits/chosen": -1.1383417844772339,
227
+ "logits/rejected": -1.1288671493530273,
228
+ "logps/chosen": -605.6137084960938,
229
+ "logps/rejected": -464.63616943359375,
230
+ "loss": 0.1510193705558777,
231
+ "rewards/accuracies": 0.9375,
232
+ "rewards/chosen": 1.3723418712615967,
233
+ "rewards/margins": 3.875206708908081,
234
+ "rewards/rejected": -2.5028648376464844,
235
+ "step": 150
236
+ },
237
+ {
238
+ "epoch": 0.9937888198757764,
239
+ "grad_norm": 1.2801965475082397,
240
+ "learning_rate": 2.9993814665729605e-06,
241
+ "logits/chosen": -1.1155999898910522,
242
+ "logits/rejected": -1.1486127376556396,
243
+ "logps/chosen": -607.6810913085938,
244
+ "logps/rejected": -428.43902587890625,
245
+ "loss": 0.14707612991333008,
246
+ "rewards/accuracies": 0.949999988079071,
247
+ "rewards/chosen": 1.2416237592697144,
248
+ "rewards/margins": 4.007672309875488,
249
+ "rewards/rejected": -2.7660484313964844,
250
+ "step": 160
251
+ },
252
+ {
253
+ "epoch": 1.0559006211180124,
254
+ "grad_norm": 0.6223127245903015,
255
+ "learning_rate": 2.730670898658255e-06,
256
+ "logits/chosen": -1.2678742408752441,
257
+ "logits/rejected": -1.2089016437530518,
258
+ "logps/chosen": -546.5106811523438,
259
+ "logps/rejected": -435.56201171875,
260
+ "loss": 0.07639291286468505,
261
+ "rewards/accuracies": 0.987500011920929,
262
+ "rewards/chosen": 1.2402414083480835,
263
+ "rewards/margins": 4.630614280700684,
264
+ "rewards/rejected": -3.3903732299804688,
265
+ "step": 170
266
+ },
267
+ {
268
+ "epoch": 1.1180124223602483,
269
+ "grad_norm": 0.27513387799263,
270
+ "learning_rate": 2.459237195838577e-06,
271
+ "logits/chosen": -1.1183346509933472,
272
+ "logits/rejected": -1.2728922367095947,
273
+ "logps/chosen": -560.7864990234375,
274
+ "logps/rejected": -461.84332275390625,
275
+ "loss": 0.06406531929969787,
276
+ "rewards/accuracies": 0.987500011920929,
277
+ "rewards/chosen": 1.1345335245132446,
278
+ "rewards/margins": 5.346749305725098,
279
+ "rewards/rejected": -4.212216377258301,
280
+ "step": 180
281
+ },
282
+ {
283
+ "epoch": 1.1801242236024845,
284
+ "grad_norm": 0.5560200810432434,
285
+ "learning_rate": 2.1882847093989544e-06,
286
+ "logits/chosen": -1.2576462030410767,
287
+ "logits/rejected": -1.2605979442596436,
288
+ "logps/chosen": -598.9000854492188,
289
+ "logps/rejected": -403.6759033203125,
290
+ "loss": 0.07317939400672913,
291
+ "rewards/accuracies": 0.987500011920929,
292
+ "rewards/chosen": 1.0486507415771484,
293
+ "rewards/margins": 4.786526203155518,
294
+ "rewards/rejected": -3.737874984741211,
295
+ "step": 190
296
+ },
297
+ {
298
+ "epoch": 1.2422360248447206,
299
+ "grad_norm": 3.307779312133789,
300
+ "learning_rate": 1.921012109729562e-06,
301
+ "logits/chosen": -1.1867707967758179,
302
+ "logits/rejected": -1.1512377262115479,
303
+ "logps/chosen": -564.1589965820312,
304
+ "logps/rejected": -499.6703186035156,
305
+ "loss": 0.07128837704658508,
306
+ "rewards/accuracies": 0.987500011920929,
307
+ "rewards/chosen": 1.1387567520141602,
308
+ "rewards/margins": 4.9200215339660645,
309
+ "rewards/rejected": -3.781264543533325,
310
+ "step": 200
311
+ },
312
+ {
313
+ "epoch": 1.3043478260869565,
314
+ "grad_norm": 0.951438307762146,
315
+ "learning_rate": 1.6605746251211642e-06,
316
+ "logits/chosen": -1.3087455034255981,
317
+ "logits/rejected": -1.4264456033706665,
318
+ "logps/chosen": -574.0267333984375,
319
+ "logps/rejected": -467.0670471191406,
320
+ "loss": 0.05501532554626465,
321
+ "rewards/accuracies": 0.987500011920929,
322
+ "rewards/chosen": 1.2620090246200562,
323
+ "rewards/margins": 5.137342929840088,
324
+ "rewards/rejected": -3.875333309173584,
325
+ "step": 210
326
+ },
327
+ {
328
+ "epoch": 1.3664596273291925,
329
+ "grad_norm": 0.4313580095767975,
330
+ "learning_rate": 1.4100467934067775e-06,
331
+ "logits/chosen": -1.2540130615234375,
332
+ "logits/rejected": -1.2541478872299194,
333
+ "logps/chosen": -557.1004028320312,
334
+ "logps/rejected": -483.9710388183594,
335
+ "loss": 0.08154834508895874,
336
+ "rewards/accuracies": 0.9750000238418579,
337
+ "rewards/chosen": 1.3047633171081543,
338
+ "rewards/margins": 5.482510566711426,
339
+ "rewards/rejected": -4.1777472496032715,
340
+ "step": 220
341
+ },
342
+ {
343
+ "epoch": 1.4285714285714286,
344
+ "grad_norm": 0.3101217746734619,
345
+ "learning_rate": 1.1723861661769e-06,
346
+ "logits/chosen": -1.071692705154419,
347
+ "logits/rejected": -1.244997262954712,
348
+ "logps/chosen": -586.4631958007812,
349
+ "logps/rejected": -464.3282165527344,
350
+ "loss": 0.05381497144699097,
351
+ "rewards/accuracies": 1.0,
352
+ "rewards/chosen": 1.0372705459594727,
353
+ "rewards/margins": 4.841385841369629,
354
+ "rewards/rejected": -3.8041152954101562,
355
+ "step": 230
356
+ },
357
+ {
358
+ "epoch": 1.4906832298136645,
359
+ "grad_norm": 0.4230075180530548,
360
+ "learning_rate": 9.503983940502795e-07,
361
+ "logits/chosen": -1.3364367485046387,
362
+ "logits/rejected": -1.3070945739746094,
363
+ "logps/chosen": -579.2850341796875,
364
+ "logps/rejected": -420.6127014160156,
365
+ "loss": 0.08604136109352112,
366
+ "rewards/accuracies": 0.987500011920929,
367
+ "rewards/chosen": 0.7024589776992798,
368
+ "rewards/margins": 4.885396480560303,
369
+ "rewards/rejected": -4.1829376220703125,
370
+ "step": 240
371
+ },
372
+ {
373
+ "epoch": 1.5527950310559007,
374
+ "grad_norm": 0.31521838903427124,
375
+ "learning_rate": 7.467041051784155e-07,
376
+ "logits/chosen": -1.3780875205993652,
377
+ "logits/rejected": -1.4410358667373657,
378
+ "logps/chosen": -569.69189453125,
379
+ "logps/rejected": -491.8089904785156,
380
+ "loss": 0.09177066087722778,
381
+ "rewards/accuracies": 0.9750000238418579,
382
+ "rewards/chosen": 0.8250600099563599,
383
+ "rewards/margins": 6.094225883483887,
384
+ "rewards/rejected": -5.269165992736816,
385
+ "step": 250
386
+ },
387
+ {
388
+ "epoch": 1.6149068322981366,
389
+ "grad_norm": 0.15115521848201752,
390
+ "learning_rate": 5.637079679923796e-07,
391
+ "logits/chosen": -1.3521003723144531,
392
+ "logits/rejected": -1.344463586807251,
393
+ "logps/chosen": -596.4379272460938,
394
+ "logps/rejected": -428.6333923339844,
395
+ "loss": 0.048569518327713015,
396
+ "rewards/accuracies": 1.0,
397
+ "rewards/chosen": 0.9481250047683716,
398
+ "rewards/margins": 6.150216102600098,
399
+ "rewards/rejected": -5.202090263366699,
400
+ "step": 260
401
+ },
402
+ {
403
+ "epoch": 1.6770186335403725,
404
+ "grad_norm": 0.23105435073375702,
405
+ "learning_rate": 4.035703034149463e-07,
406
+ "logits/chosen": -1.19241464138031,
407
+ "logits/rejected": -1.4326884746551514,
408
+ "logps/chosen": -554.9596557617188,
409
+ "logps/rejected": -461.3987731933594,
410
+ "loss": 0.05657889246940613,
411
+ "rewards/accuracies": 1.0,
412
+ "rewards/chosen": 0.8795555233955383,
413
+ "rewards/margins": 5.471189498901367,
414
+ "rewards/rejected": -4.5916337966918945,
415
+ "step": 270
416
+ },
417
+ {
418
+ "epoch": 1.7391304347826086,
419
+ "grad_norm": 0.14941351115703583,
420
+ "learning_rate": 2.681815816638503e-07,
421
+ "logits/chosen": -1.240888237953186,
422
+ "logits/rejected": -1.4535605907440186,
423
+ "logps/chosen": -582.7335205078125,
424
+ "logps/rejected": -481.8042907714844,
425
+ "loss": 0.043333661556243894,
426
+ "rewards/accuracies": 1.0,
427
+ "rewards/chosen": 0.9755797386169434,
428
+ "rewards/margins": 6.308679580688477,
429
+ "rewards/rejected": -5.333099365234375,
430
+ "step": 280
431
+ },
432
+ {
433
+ "epoch": 1.8012422360248448,
434
+ "grad_norm": 0.7390910983085632,
435
+ "learning_rate": 1.5914010471859947e-07,
436
+ "logits/chosen": -1.3577600717544556,
437
+ "logits/rejected": -1.4665708541870117,
438
+ "logps/chosen": -542.1375732421875,
439
+ "logps/rejected": -501.33306884765625,
440
+ "loss": 0.051105821132659913,
441
+ "rewards/accuracies": 1.0,
442
+ "rewards/chosen": 0.9414765238761902,
443
+ "rewards/margins": 6.377541542053223,
444
+ "rewards/rejected": -5.436064720153809,
445
+ "step": 290
446
+ },
447
+ {
448
+ "epoch": 1.8633540372670807,
449
+ "grad_norm": 0.29464995861053467,
450
+ "learning_rate": 7.773313791559545e-08,
451
+ "logits/chosen": -1.350490689277649,
452
+ "logits/rejected": -1.069253921508789,
453
+ "logps/chosen": -596.630859375,
454
+ "logps/rejected": -484.7491149902344,
455
+ "loss": 0.07570538520812989,
456
+ "rewards/accuracies": 0.987500011920929,
457
+ "rewards/chosen": 0.8024793863296509,
458
+ "rewards/margins": 5.369864463806152,
459
+ "rewards/rejected": -4.567385196685791,
460
+ "step": 300
461
+ },
462
+ {
463
+ "epoch": 1.9254658385093166,
464
+ "grad_norm": 0.14690952003002167,
465
+ "learning_rate": 2.492171341840405e-08,
466
+ "logits/chosen": -1.299762487411499,
467
+ "logits/rejected": -1.3709434270858765,
468
+ "logps/chosen": -577.23974609375,
469
+ "logps/rejected": -418.92852783203125,
470
+ "loss": 0.026452887058258056,
471
+ "rewards/accuracies": 1.0,
472
+ "rewards/chosen": 0.9688733816146851,
473
+ "rewards/margins": 5.997436046600342,
474
+ "rewards/rejected": -5.028562545776367,
475
+ "step": 310
476
+ },
477
+ {
478
+ "epoch": 1.9875776397515528,
479
+ "grad_norm": 0.13554854691028595,
480
+ "learning_rate": 1.3292849624821357e-09,
481
+ "logits/chosen": -1.3074995279312134,
482
+ "logits/rejected": -1.5039985179901123,
483
+ "logps/chosen": -596.8470458984375,
484
+ "logps/rejected": -474.729248046875,
485
+ "loss": 0.02374868541955948,
486
+ "rewards/accuracies": 1.0,
487
+ "rewards/chosen": 0.9388774037361145,
488
+ "rewards/margins": 6.432289123535156,
489
+ "rewards/rejected": -5.493411064147949,
490
+ "step": 320
491
+ }
492
+ ],
493
+ "logging_steps": 10,
494
+ "max_steps": 322,
495
+ "num_input_tokens_seen": 0,
496
+ "num_train_epochs": 2,
497
+ "save_steps": 500,
498
+ "stateful_callbacks": {
499
+ "TrainerControl": {
500
+ "args": {
501
+ "should_epoch_stop": false,
502
+ "should_evaluate": false,
503
+ "should_log": false,
504
+ "should_save": true,
505
+ "should_training_stop": true
506
+ },
507
+ "attributes": {}
508
+ }
509
+ },
510
+ "total_flos": 0.0,
511
+ "train_batch_size": 2,
512
+ "trial_name": null,
513
+ "trial_params": null
514
+ }