ggbetz commited on
Commit
86b070a
·
verified ·
1 Parent(s): f76c208

Model save

Browse files
README.md ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: DebateLabKIT/Phi-4-Argunaut-1-SFT-dev1
3
+ library_name: transformers
4
+ model_name: Phi-4-Argunaut-1-SPIN-dev1
5
+ tags:
6
+ - generated_from_trainer
7
+ - dpo
8
+ - trl
9
+ licence: license
10
+ ---
11
+
12
+ # Model Card for Phi-4-Argunaut-1-SPIN-dev1
13
+
14
+ This model is a fine-tuned version of [DebateLabKIT/Phi-4-Argunaut-1-SFT-dev1](https://huggingface.co/DebateLabKIT/Phi-4-Argunaut-1-SFT-dev1).
15
+ It has been trained using [TRL](https://github.com/huggingface/trl).
16
+
17
+ ## Quick start
18
+
19
+ ```python
20
+ from transformers import pipeline
21
+
22
+ question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
23
+ generator = pipeline("text-generation", model="DebateLabKIT/Phi-4-Argunaut-1-SPIN-dev1", device="cuda")
24
+ output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
25
+ print(output["generated_text"])
26
+ ```
27
+
28
+ ## Training procedure
29
+
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/ggbetz/argunauts-training/runs/g7wyrguq)
31
+
32
+
33
+ This model was trained with DPO, a method introduced in [Direct Preference Optimization: Your Language Model is Secretly a Reward Model](https://huggingface.co/papers/2305.18290).
34
+
35
+ ### Framework versions
36
+
37
+ - TRL: 0.19.1
38
+ - Transformers: 4.53.3
39
+ - Pytorch: 2.4.1
40
+ - Datasets: 3.1.0
41
+ - Tokenizers: 0.21.4
42
+
43
+ ## Citations
44
+
45
+ Cite DPO as:
46
+
47
+ ```bibtex
48
+ @inproceedings{rafailov2023direct,
49
+ title = {{Direct Preference Optimization: Your Language Model is Secretly a Reward Model}},
50
+ author = {Rafael Rafailov and Archit Sharma and Eric Mitchell and Christopher D. Manning and Stefano Ermon and Chelsea Finn},
51
+ year = 2023,
52
+ booktitle = {Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 - 16, 2023},
53
+ url = {http://papers.nips.cc/paper_files/paper/2023/hash/a85b405ed65c6477a4fe8302b5e06ce7-Abstract-Conference.html},
54
+ editor = {Alice Oh and Tristan Naumann and Amir Globerson and Kate Saenko and Moritz Hardt and Sergey Levine},
55
+ }
56
+ ```
57
+
58
+ Cite TRL as:
59
+
60
+ ```bibtex
61
+ @misc{vonwerra2022trl,
62
+ title = {{TRL: Transformer Reinforcement Learning}},
63
+ author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec},
64
+ year = 2020,
65
+ journal = {GitHub repository},
66
+ publisher = {GitHub},
67
+ howpublished = {\url{https://github.com/huggingface/trl}}
68
+ }
69
+ ```
all_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.34146574610158015,
5
+ "train_runtime": 4832.1445,
6
+ "train_samples": 4838,
7
+ "train_samples_per_second": 2.002,
8
+ "train_steps_per_second": 0.063
9
+ }
chat_template.jinja ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system<|im_sep|>' }}
3
+ {%- if messages[0]['role'] == 'system' %}
4
+ {{- messages[0]['content'] }}
5
+ {%- else %}
6
+ {{- 'You are Argunaut, created by DebateLab@KIT. You are a helpful assistant.' }}
7
+ {%- endif %}
8
+ {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
9
+ {%- for tool in tools %}
10
+ {{- "\n" }}
11
+ {{- tool | tojson }}
12
+ {%- endfor %}
13
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>" }}
14
+ {%- else %}
15
+ {%- if messages[0]['role'] == 'system' %}
16
+ {{- '<|im_start|>system<|im_sep|>' + messages[0]['content'] + '<|im_end|>' }}
17
+ {%- else %}
18
+ {{- '<|im_start|>system<|im_sep|>{system_message}<|im_end|>' }}
19
+ {%- endif %}
20
+ {%- endif %}
21
+ {%- for message in messages %}
22
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
23
+ {{- '<|im_start|>' + message.role + '<|im_sep|>' + message.content + '<|im_end|>' }}
24
+ {%- elif message.role == "assistant" %}
25
+ {{- '<|im_start|>' + message.role + '<|im_sep|>'}}
26
+ {%- if message.content %}
27
+ {{- message.content }}
28
+ {%- endif %}
29
+ {%- for tool_call in message.tool_calls %}
30
+ {%- if tool_call.function is defined %}
31
+ {%- set tool_call = tool_call.function %}
32
+ {%- endif %}
33
+ {{- '\n<tool_call>\n{"name": "' }}
34
+ {{- tool_call.name }}
35
+ {{- '", "arguments": ' }}
36
+ {{- tool_call.arguments | tojson }}
37
+ {{- '}\n</tool_call>' }}
38
+ {%- endfor %}
39
+ {{- '<|im_end|>' }}
40
+ {%- elif message.role == "tool" %}
41
+ {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %} {{- '<|im_start|>user<|im_sep|>' }}
42
+ {%- endif %}
43
+ {{- '\n<tool_response>\n' }}
44
+ {{- message.content }}
45
+ {{- '\n</tool_response>' }}
46
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
47
+ {{- '<|im_end|>' }}
48
+ {%- endif %}
49
+ {%- endif %}
50
+ {%- endfor %}
51
+ {%- if add_generation_prompt %}
52
+ {{- '<|im_start|>assistant<|im_sep|>' }}
53
+ {%- endif %}
config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.15,
7
+ "bos_token_id": 100257,
8
+ "eos_token_id": 100265,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 5120,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 17920,
14
+ "max_position_embeddings": 16384,
15
+ "mlp_bias": false,
16
+ "model_type": "llama",
17
+ "num_attention_heads": 40,
18
+ "num_hidden_layers": 40,
19
+ "num_key_value_heads": 10,
20
+ "original_max_position_embeddings": 16384,
21
+ "pad_token_id": 100351,
22
+ "pretraining_tp": 1,
23
+ "rms_norm_eps": 1e-05,
24
+ "rope_scaling": null,
25
+ "rope_theta": 250000,
26
+ "tie_word_embeddings": false,
27
+ "torch_dtype": "bfloat16",
28
+ "transformers_version": "4.53.3",
29
+ "use_cache": false,
30
+ "vocab_size": 100352
31
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 100257,
4
+ "eos_token_id": 100265,
5
+ "pad_token_id": 100351,
6
+ "transformers_version": "4.53.3"
7
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model-00001-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f80f16654bd69a9bf61ab58ff711594351aec75ae3a053fe72f250750cc2503a
3
+ size 4933658528
model-00002-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d47f10ad788f41a9f53b194f6ff88d25819d1fb5e212b750fd4424e71169413a
3
+ size 4954693112
model-00003-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aae514e2187be12286ac65679ff5bf8b43206430fa8032764ddd0ea27b88ce40
3
+ size 4902243992
model-00004-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27173e9497c7431cdad62f89289fd00782ca43904aa09270d9af5a65009aeb1f
3
+ size 4954672440
model-00005-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b43358b6067c8d8e3e613b515c9a3304beca7a0cbe1ff009909eec02f1023d7f
3
+ size 4954672432
model-00006-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf59e535ca829d332b689c97cf31875ad50aaa5798ebf4f02d614ebaa018412e
3
+ size 4619116224
model.safetensors.index.json ADDED
@@ -0,0 +1,371 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_parameters": 414720,
4
+ "total_size": 29319014400
5
+ },
6
+ "weight_map": {
7
+ "lm_head.weight": "model-00006-of-00006.safetensors",
8
+ "model.embed_tokens.weight": "model-00001-of-00006.safetensors",
9
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00006.safetensors",
10
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
11
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
12
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
13
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
14
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
15
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
16
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
17
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
18
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00006.safetensors",
19
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
20
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
21
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
22
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
23
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
24
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
25
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
26
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
27
+ "model.layers.10.input_layernorm.weight": "model-00002-of-00006.safetensors",
28
+ "model.layers.10.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
29
+ "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
30
+ "model.layers.10.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
31
+ "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
32
+ "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
33
+ "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
34
+ "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
35
+ "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
36
+ "model.layers.11.input_layernorm.weight": "model-00002-of-00006.safetensors",
37
+ "model.layers.11.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
38
+ "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
39
+ "model.layers.11.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
40
+ "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
41
+ "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
42
+ "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
43
+ "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
44
+ "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
45
+ "model.layers.12.input_layernorm.weight": "model-00002-of-00006.safetensors",
46
+ "model.layers.12.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
47
+ "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
48
+ "model.layers.12.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
49
+ "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
50
+ "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
51
+ "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
52
+ "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
53
+ "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
54
+ "model.layers.13.input_layernorm.weight": "model-00003-of-00006.safetensors",
55
+ "model.layers.13.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
56
+ "model.layers.13.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
57
+ "model.layers.13.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
58
+ "model.layers.13.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
59
+ "model.layers.13.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
60
+ "model.layers.13.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
61
+ "model.layers.13.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
62
+ "model.layers.13.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
63
+ "model.layers.14.input_layernorm.weight": "model-00003-of-00006.safetensors",
64
+ "model.layers.14.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
65
+ "model.layers.14.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
66
+ "model.layers.14.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
67
+ "model.layers.14.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
68
+ "model.layers.14.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
69
+ "model.layers.14.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
70
+ "model.layers.14.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
71
+ "model.layers.14.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
72
+ "model.layers.15.input_layernorm.weight": "model-00003-of-00006.safetensors",
73
+ "model.layers.15.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
74
+ "model.layers.15.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
75
+ "model.layers.15.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
76
+ "model.layers.15.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
77
+ "model.layers.15.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
78
+ "model.layers.15.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
79
+ "model.layers.15.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
80
+ "model.layers.15.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
81
+ "model.layers.16.input_layernorm.weight": "model-00003-of-00006.safetensors",
82
+ "model.layers.16.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
83
+ "model.layers.16.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
84
+ "model.layers.16.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
85
+ "model.layers.16.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
86
+ "model.layers.16.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
87
+ "model.layers.16.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
88
+ "model.layers.16.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
89
+ "model.layers.16.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
90
+ "model.layers.17.input_layernorm.weight": "model-00003-of-00006.safetensors",
91
+ "model.layers.17.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
92
+ "model.layers.17.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
93
+ "model.layers.17.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
94
+ "model.layers.17.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
95
+ "model.layers.17.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
96
+ "model.layers.17.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
97
+ "model.layers.17.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
98
+ "model.layers.17.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
99
+ "model.layers.18.input_layernorm.weight": "model-00003-of-00006.safetensors",
100
+ "model.layers.18.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
101
+ "model.layers.18.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
102
+ "model.layers.18.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
103
+ "model.layers.18.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
104
+ "model.layers.18.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
105
+ "model.layers.18.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
106
+ "model.layers.18.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
107
+ "model.layers.18.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
108
+ "model.layers.19.input_layernorm.weight": "model-00003-of-00006.safetensors",
109
+ "model.layers.19.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
110
+ "model.layers.19.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
111
+ "model.layers.19.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
112
+ "model.layers.19.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
113
+ "model.layers.19.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
114
+ "model.layers.19.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
115
+ "model.layers.19.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
116
+ "model.layers.19.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
117
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00006.safetensors",
118
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
119
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
120
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
121
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
122
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
123
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
124
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
125
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
126
+ "model.layers.20.input_layernorm.weight": "model-00004-of-00006.safetensors",
127
+ "model.layers.20.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
128
+ "model.layers.20.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
129
+ "model.layers.20.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
130
+ "model.layers.20.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
131
+ "model.layers.20.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
132
+ "model.layers.20.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
133
+ "model.layers.20.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
134
+ "model.layers.20.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
135
+ "model.layers.21.input_layernorm.weight": "model-00004-of-00006.safetensors",
136
+ "model.layers.21.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
137
+ "model.layers.21.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
138
+ "model.layers.21.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
139
+ "model.layers.21.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
140
+ "model.layers.21.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
141
+ "model.layers.21.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
142
+ "model.layers.21.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
143
+ "model.layers.21.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
144
+ "model.layers.22.input_layernorm.weight": "model-00004-of-00006.safetensors",
145
+ "model.layers.22.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
146
+ "model.layers.22.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
147
+ "model.layers.22.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
148
+ "model.layers.22.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
149
+ "model.layers.22.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
150
+ "model.layers.22.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
151
+ "model.layers.22.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
152
+ "model.layers.22.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
153
+ "model.layers.23.input_layernorm.weight": "model-00004-of-00006.safetensors",
154
+ "model.layers.23.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
155
+ "model.layers.23.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
156
+ "model.layers.23.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
157
+ "model.layers.23.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
158
+ "model.layers.23.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
159
+ "model.layers.23.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
160
+ "model.layers.23.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
161
+ "model.layers.23.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
162
+ "model.layers.24.input_layernorm.weight": "model-00004-of-00006.safetensors",
163
+ "model.layers.24.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
164
+ "model.layers.24.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
165
+ "model.layers.24.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
166
+ "model.layers.24.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
167
+ "model.layers.24.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
168
+ "model.layers.24.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
169
+ "model.layers.24.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
170
+ "model.layers.24.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
171
+ "model.layers.25.input_layernorm.weight": "model-00004-of-00006.safetensors",
172
+ "model.layers.25.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
173
+ "model.layers.25.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
174
+ "model.layers.25.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
175
+ "model.layers.25.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
176
+ "model.layers.25.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
177
+ "model.layers.25.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
178
+ "model.layers.25.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
179
+ "model.layers.25.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
180
+ "model.layers.26.input_layernorm.weight": "model-00004-of-00006.safetensors",
181
+ "model.layers.26.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
182
+ "model.layers.26.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
183
+ "model.layers.26.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
184
+ "model.layers.26.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
185
+ "model.layers.26.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
186
+ "model.layers.26.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
187
+ "model.layers.26.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
188
+ "model.layers.26.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
189
+ "model.layers.27.input_layernorm.weight": "model-00005-of-00006.safetensors",
190
+ "model.layers.27.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
191
+ "model.layers.27.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
192
+ "model.layers.27.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
193
+ "model.layers.27.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
194
+ "model.layers.27.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
195
+ "model.layers.27.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
196
+ "model.layers.27.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
197
+ "model.layers.27.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
198
+ "model.layers.28.input_layernorm.weight": "model-00005-of-00006.safetensors",
199
+ "model.layers.28.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
200
+ "model.layers.28.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
201
+ "model.layers.28.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
202
+ "model.layers.28.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
203
+ "model.layers.28.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
204
+ "model.layers.28.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
205
+ "model.layers.28.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
206
+ "model.layers.28.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
207
+ "model.layers.29.input_layernorm.weight": "model-00005-of-00006.safetensors",
208
+ "model.layers.29.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
209
+ "model.layers.29.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
210
+ "model.layers.29.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
211
+ "model.layers.29.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
212
+ "model.layers.29.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
213
+ "model.layers.29.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
214
+ "model.layers.29.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
215
+ "model.layers.29.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
216
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00006.safetensors",
217
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
218
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
219
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
220
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
221
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
222
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
223
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
224
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
225
+ "model.layers.30.input_layernorm.weight": "model-00005-of-00006.safetensors",
226
+ "model.layers.30.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
227
+ "model.layers.30.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
228
+ "model.layers.30.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
229
+ "model.layers.30.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
230
+ "model.layers.30.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
231
+ "model.layers.30.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
232
+ "model.layers.30.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
233
+ "model.layers.30.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
234
+ "model.layers.31.input_layernorm.weight": "model-00005-of-00006.safetensors",
235
+ "model.layers.31.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
236
+ "model.layers.31.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
237
+ "model.layers.31.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
238
+ "model.layers.31.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
239
+ "model.layers.31.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
240
+ "model.layers.31.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
241
+ "model.layers.31.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
242
+ "model.layers.31.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
243
+ "model.layers.32.input_layernorm.weight": "model-00005-of-00006.safetensors",
244
+ "model.layers.32.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
245
+ "model.layers.32.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
246
+ "model.layers.32.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
247
+ "model.layers.32.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
248
+ "model.layers.32.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
249
+ "model.layers.32.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
250
+ "model.layers.32.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
251
+ "model.layers.32.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
252
+ "model.layers.33.input_layernorm.weight": "model-00005-of-00006.safetensors",
253
+ "model.layers.33.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
254
+ "model.layers.33.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
255
+ "model.layers.33.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
256
+ "model.layers.33.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
257
+ "model.layers.33.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
258
+ "model.layers.33.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
259
+ "model.layers.33.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
260
+ "model.layers.33.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
261
+ "model.layers.34.input_layernorm.weight": "model-00006-of-00006.safetensors",
262
+ "model.layers.34.mlp.down_proj.weight": "model-00006-of-00006.safetensors",
263
+ "model.layers.34.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
264
+ "model.layers.34.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
265
+ "model.layers.34.post_attention_layernorm.weight": "model-00006-of-00006.safetensors",
266
+ "model.layers.34.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
267
+ "model.layers.34.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
268
+ "model.layers.34.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
269
+ "model.layers.34.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
270
+ "model.layers.35.input_layernorm.weight": "model-00006-of-00006.safetensors",
271
+ "model.layers.35.mlp.down_proj.weight": "model-00006-of-00006.safetensors",
272
+ "model.layers.35.mlp.gate_proj.weight": "model-00006-of-00006.safetensors",
273
+ "model.layers.35.mlp.up_proj.weight": "model-00006-of-00006.safetensors",
274
+ "model.layers.35.post_attention_layernorm.weight": "model-00006-of-00006.safetensors",
275
+ "model.layers.35.self_attn.k_proj.weight": "model-00006-of-00006.safetensors",
276
+ "model.layers.35.self_attn.o_proj.weight": "model-00006-of-00006.safetensors",
277
+ "model.layers.35.self_attn.q_proj.weight": "model-00006-of-00006.safetensors",
278
+ "model.layers.35.self_attn.v_proj.weight": "model-00006-of-00006.safetensors",
279
+ "model.layers.36.input_layernorm.weight": "model-00006-of-00006.safetensors",
280
+ "model.layers.36.mlp.down_proj.weight": "model-00006-of-00006.safetensors",
281
+ "model.layers.36.mlp.gate_proj.weight": "model-00006-of-00006.safetensors",
282
+ "model.layers.36.mlp.up_proj.weight": "model-00006-of-00006.safetensors",
283
+ "model.layers.36.post_attention_layernorm.weight": "model-00006-of-00006.safetensors",
284
+ "model.layers.36.self_attn.k_proj.weight": "model-00006-of-00006.safetensors",
285
+ "model.layers.36.self_attn.o_proj.weight": "model-00006-of-00006.safetensors",
286
+ "model.layers.36.self_attn.q_proj.weight": "model-00006-of-00006.safetensors",
287
+ "model.layers.36.self_attn.v_proj.weight": "model-00006-of-00006.safetensors",
288
+ "model.layers.37.input_layernorm.weight": "model-00006-of-00006.safetensors",
289
+ "model.layers.37.mlp.down_proj.weight": "model-00006-of-00006.safetensors",
290
+ "model.layers.37.mlp.gate_proj.weight": "model-00006-of-00006.safetensors",
291
+ "model.layers.37.mlp.up_proj.weight": "model-00006-of-00006.safetensors",
292
+ "model.layers.37.post_attention_layernorm.weight": "model-00006-of-00006.safetensors",
293
+ "model.layers.37.self_attn.k_proj.weight": "model-00006-of-00006.safetensors",
294
+ "model.layers.37.self_attn.o_proj.weight": "model-00006-of-00006.safetensors",
295
+ "model.layers.37.self_attn.q_proj.weight": "model-00006-of-00006.safetensors",
296
+ "model.layers.37.self_attn.v_proj.weight": "model-00006-of-00006.safetensors",
297
+ "model.layers.38.input_layernorm.weight": "model-00006-of-00006.safetensors",
298
+ "model.layers.38.mlp.down_proj.weight": "model-00006-of-00006.safetensors",
299
+ "model.layers.38.mlp.gate_proj.weight": "model-00006-of-00006.safetensors",
300
+ "model.layers.38.mlp.up_proj.weight": "model-00006-of-00006.safetensors",
301
+ "model.layers.38.post_attention_layernorm.weight": "model-00006-of-00006.safetensors",
302
+ "model.layers.38.self_attn.k_proj.weight": "model-00006-of-00006.safetensors",
303
+ "model.layers.38.self_attn.o_proj.weight": "model-00006-of-00006.safetensors",
304
+ "model.layers.38.self_attn.q_proj.weight": "model-00006-of-00006.safetensors",
305
+ "model.layers.38.self_attn.v_proj.weight": "model-00006-of-00006.safetensors",
306
+ "model.layers.39.input_layernorm.weight": "model-00006-of-00006.safetensors",
307
+ "model.layers.39.mlp.down_proj.weight": "model-00006-of-00006.safetensors",
308
+ "model.layers.39.mlp.gate_proj.weight": "model-00006-of-00006.safetensors",
309
+ "model.layers.39.mlp.up_proj.weight": "model-00006-of-00006.safetensors",
310
+ "model.layers.39.post_attention_layernorm.weight": "model-00006-of-00006.safetensors",
311
+ "model.layers.39.self_attn.k_proj.weight": "model-00006-of-00006.safetensors",
312
+ "model.layers.39.self_attn.o_proj.weight": "model-00006-of-00006.safetensors",
313
+ "model.layers.39.self_attn.q_proj.weight": "model-00006-of-00006.safetensors",
314
+ "model.layers.39.self_attn.v_proj.weight": "model-00006-of-00006.safetensors",
315
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00006.safetensors",
316
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
317
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
318
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
319
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
320
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
321
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
322
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
323
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
324
+ "model.layers.5.input_layernorm.weight": "model-00002-of-00006.safetensors",
325
+ "model.layers.5.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
326
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
327
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
328
+ "model.layers.5.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
329
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
330
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
331
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
332
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
333
+ "model.layers.6.input_layernorm.weight": "model-00002-of-00006.safetensors",
334
+ "model.layers.6.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
335
+ "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
336
+ "model.layers.6.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
337
+ "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
338
+ "model.layers.6.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
339
+ "model.layers.6.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
340
+ "model.layers.6.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
341
+ "model.layers.6.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
342
+ "model.layers.7.input_layernorm.weight": "model-00002-of-00006.safetensors",
343
+ "model.layers.7.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
344
+ "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
345
+ "model.layers.7.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
346
+ "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
347
+ "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
348
+ "model.layers.7.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
349
+ "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
350
+ "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
351
+ "model.layers.8.input_layernorm.weight": "model-00002-of-00006.safetensors",
352
+ "model.layers.8.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
353
+ "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
354
+ "model.layers.8.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
355
+ "model.layers.8.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
356
+ "model.layers.8.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
357
+ "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
358
+ "model.layers.8.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
359
+ "model.layers.8.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
360
+ "model.layers.9.input_layernorm.weight": "model-00002-of-00006.safetensors",
361
+ "model.layers.9.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
362
+ "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
363
+ "model.layers.9.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
364
+ "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
365
+ "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
366
+ "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
367
+ "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
368
+ "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
369
+ "model.norm.weight": "model-00006-of-00006.safetensors"
370
+ }
371
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": true,
5
+ "normalized": false,
6
+ "rstrip": true,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|im_end|>",
11
+ "lstrip": true,
12
+ "normalized": false,
13
+ "rstrip": true,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|dummy_87|>",
18
+ "lstrip": true,
19
+ "normalized": false,
20
+ "rstrip": true,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "�",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,790 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "5809": {
5
+ "content": "�",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "100256": {
13
+ "content": "<|dummy_0|>",
14
+ "lstrip": true,
15
+ "normalized": false,
16
+ "rstrip": true,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "100257": {
21
+ "content": "<|endoftext|>",
22
+ "lstrip": true,
23
+ "normalized": false,
24
+ "rstrip": true,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "100258": {
29
+ "content": "<|fim_prefix|>",
30
+ "lstrip": true,
31
+ "normalized": false,
32
+ "rstrip": true,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "100259": {
37
+ "content": "<|fim_middle|>",
38
+ "lstrip": true,
39
+ "normalized": false,
40
+ "rstrip": true,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "100260": {
45
+ "content": "<|fim_suffix|>",
46
+ "lstrip": true,
47
+ "normalized": false,
48
+ "rstrip": true,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "100261": {
53
+ "content": "<|dummy_1|>",
54
+ "lstrip": true,
55
+ "normalized": false,
56
+ "rstrip": true,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "100262": {
61
+ "content": "<|dummy_2|>",
62
+ "lstrip": true,
63
+ "normalized": false,
64
+ "rstrip": true,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "100263": {
69
+ "content": "<|dummy_3|>",
70
+ "lstrip": true,
71
+ "normalized": false,
72
+ "rstrip": true,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "100264": {
77
+ "content": "<|im_start|>",
78
+ "lstrip": true,
79
+ "normalized": false,
80
+ "rstrip": true,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "100265": {
85
+ "content": "<|im_end|>",
86
+ "lstrip": true,
87
+ "normalized": false,
88
+ "rstrip": true,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "100266": {
93
+ "content": "<|im_sep|>",
94
+ "lstrip": true,
95
+ "normalized": false,
96
+ "rstrip": true,
97
+ "single_word": false,
98
+ "special": true
99
+ },
100
+ "100267": {
101
+ "content": "<|dummy_4|>",
102
+ "lstrip": true,
103
+ "normalized": false,
104
+ "rstrip": true,
105
+ "single_word": false,
106
+ "special": true
107
+ },
108
+ "100268": {
109
+ "content": "<|dummy_5|>",
110
+ "lstrip": true,
111
+ "normalized": false,
112
+ "rstrip": true,
113
+ "single_word": false,
114
+ "special": true
115
+ },
116
+ "100269": {
117
+ "content": "<|dummy_6|>",
118
+ "lstrip": true,
119
+ "normalized": false,
120
+ "rstrip": true,
121
+ "single_word": false,
122
+ "special": true
123
+ },
124
+ "100270": {
125
+ "content": "<|dummy_7|>",
126
+ "lstrip": true,
127
+ "normalized": false,
128
+ "rstrip": true,
129
+ "single_word": false,
130
+ "special": true
131
+ },
132
+ "100271": {
133
+ "content": "<|dummy_8|>",
134
+ "lstrip": true,
135
+ "normalized": false,
136
+ "rstrip": true,
137
+ "single_word": false,
138
+ "special": true
139
+ },
140
+ "100272": {
141
+ "content": "<|dummy_9|>",
142
+ "lstrip": true,
143
+ "normalized": false,
144
+ "rstrip": true,
145
+ "single_word": false,
146
+ "special": true
147
+ },
148
+ "100273": {
149
+ "content": "<|dummy_10|>",
150
+ "lstrip": true,
151
+ "normalized": false,
152
+ "rstrip": true,
153
+ "single_word": false,
154
+ "special": true
155
+ },
156
+ "100274": {
157
+ "content": "<|dummy_11|>",
158
+ "lstrip": true,
159
+ "normalized": false,
160
+ "rstrip": true,
161
+ "single_word": false,
162
+ "special": true
163
+ },
164
+ "100275": {
165
+ "content": "<|dummy_12|>",
166
+ "lstrip": true,
167
+ "normalized": false,
168
+ "rstrip": true,
169
+ "single_word": false,
170
+ "special": true
171
+ },
172
+ "100276": {
173
+ "content": "<|endofprompt|>",
174
+ "lstrip": true,
175
+ "normalized": false,
176
+ "rstrip": true,
177
+ "single_word": false,
178
+ "special": true
179
+ },
180
+ "100277": {
181
+ "content": "<|dummy_13|>",
182
+ "lstrip": true,
183
+ "normalized": false,
184
+ "rstrip": true,
185
+ "single_word": false,
186
+ "special": true
187
+ },
188
+ "100278": {
189
+ "content": "<|dummy_14|>",
190
+ "lstrip": true,
191
+ "normalized": false,
192
+ "rstrip": true,
193
+ "single_word": false,
194
+ "special": true
195
+ },
196
+ "100279": {
197
+ "content": "<|dummy_15|>",
198
+ "lstrip": true,
199
+ "normalized": false,
200
+ "rstrip": true,
201
+ "single_word": false,
202
+ "special": true
203
+ },
204
+ "100280": {
205
+ "content": "<|dummy_16|>",
206
+ "lstrip": true,
207
+ "normalized": false,
208
+ "rstrip": true,
209
+ "single_word": false,
210
+ "special": true
211
+ },
212
+ "100281": {
213
+ "content": "<|dummy_17|>",
214
+ "lstrip": true,
215
+ "normalized": false,
216
+ "rstrip": true,
217
+ "single_word": false,
218
+ "special": true
219
+ },
220
+ "100282": {
221
+ "content": "<|dummy_18|>",
222
+ "lstrip": true,
223
+ "normalized": false,
224
+ "rstrip": true,
225
+ "single_word": false,
226
+ "special": true
227
+ },
228
+ "100283": {
229
+ "content": "<|dummy_19|>",
230
+ "lstrip": true,
231
+ "normalized": false,
232
+ "rstrip": true,
233
+ "single_word": false,
234
+ "special": true
235
+ },
236
+ "100284": {
237
+ "content": "<|dummy_20|>",
238
+ "lstrip": true,
239
+ "normalized": false,
240
+ "rstrip": true,
241
+ "single_word": false,
242
+ "special": true
243
+ },
244
+ "100285": {
245
+ "content": "<|dummy_21|>",
246
+ "lstrip": true,
247
+ "normalized": false,
248
+ "rstrip": true,
249
+ "single_word": false,
250
+ "special": true
251
+ },
252
+ "100286": {
253
+ "content": "<|dummy_22|>",
254
+ "lstrip": true,
255
+ "normalized": false,
256
+ "rstrip": true,
257
+ "single_word": false,
258
+ "special": true
259
+ },
260
+ "100287": {
261
+ "content": "<|dummy_23|>",
262
+ "lstrip": true,
263
+ "normalized": false,
264
+ "rstrip": true,
265
+ "single_word": false,
266
+ "special": true
267
+ },
268
+ "100288": {
269
+ "content": "<|dummy_24|>",
270
+ "lstrip": true,
271
+ "normalized": false,
272
+ "rstrip": true,
273
+ "single_word": false,
274
+ "special": true
275
+ },
276
+ "100289": {
277
+ "content": "<|dummy_25|>",
278
+ "lstrip": true,
279
+ "normalized": false,
280
+ "rstrip": true,
281
+ "single_word": false,
282
+ "special": true
283
+ },
284
+ "100290": {
285
+ "content": "<|dummy_26|>",
286
+ "lstrip": true,
287
+ "normalized": false,
288
+ "rstrip": true,
289
+ "single_word": false,
290
+ "special": true
291
+ },
292
+ "100291": {
293
+ "content": "<|dummy_27|>",
294
+ "lstrip": true,
295
+ "normalized": false,
296
+ "rstrip": true,
297
+ "single_word": false,
298
+ "special": true
299
+ },
300
+ "100292": {
301
+ "content": "<|dummy_28|>",
302
+ "lstrip": true,
303
+ "normalized": false,
304
+ "rstrip": true,
305
+ "single_word": false,
306
+ "special": true
307
+ },
308
+ "100293": {
309
+ "content": "<|dummy_29|>",
310
+ "lstrip": true,
311
+ "normalized": false,
312
+ "rstrip": true,
313
+ "single_word": false,
314
+ "special": true
315
+ },
316
+ "100294": {
317
+ "content": "<|dummy_30|>",
318
+ "lstrip": true,
319
+ "normalized": false,
320
+ "rstrip": true,
321
+ "single_word": false,
322
+ "special": true
323
+ },
324
+ "100295": {
325
+ "content": "<|dummy_31|>",
326
+ "lstrip": true,
327
+ "normalized": false,
328
+ "rstrip": true,
329
+ "single_word": false,
330
+ "special": true
331
+ },
332
+ "100296": {
333
+ "content": "<|dummy_32|>",
334
+ "lstrip": true,
335
+ "normalized": false,
336
+ "rstrip": true,
337
+ "single_word": false,
338
+ "special": true
339
+ },
340
+ "100297": {
341
+ "content": "<|dummy_33|>",
342
+ "lstrip": true,
343
+ "normalized": false,
344
+ "rstrip": true,
345
+ "single_word": false,
346
+ "special": true
347
+ },
348
+ "100298": {
349
+ "content": "<|dummy_34|>",
350
+ "lstrip": true,
351
+ "normalized": false,
352
+ "rstrip": true,
353
+ "single_word": false,
354
+ "special": true
355
+ },
356
+ "100299": {
357
+ "content": "<|dummy_35|>",
358
+ "lstrip": true,
359
+ "normalized": false,
360
+ "rstrip": true,
361
+ "single_word": false,
362
+ "special": true
363
+ },
364
+ "100300": {
365
+ "content": "<|dummy_36|>",
366
+ "lstrip": true,
367
+ "normalized": false,
368
+ "rstrip": true,
369
+ "single_word": false,
370
+ "special": true
371
+ },
372
+ "100301": {
373
+ "content": "<|dummy_37|>",
374
+ "lstrip": true,
375
+ "normalized": false,
376
+ "rstrip": true,
377
+ "single_word": false,
378
+ "special": true
379
+ },
380
+ "100302": {
381
+ "content": "<|dummy_38|>",
382
+ "lstrip": true,
383
+ "normalized": false,
384
+ "rstrip": true,
385
+ "single_word": false,
386
+ "special": true
387
+ },
388
+ "100303": {
389
+ "content": "<|dummy_39|>",
390
+ "lstrip": true,
391
+ "normalized": false,
392
+ "rstrip": true,
393
+ "single_word": false,
394
+ "special": true
395
+ },
396
+ "100304": {
397
+ "content": "<|dummy_40|>",
398
+ "lstrip": true,
399
+ "normalized": false,
400
+ "rstrip": true,
401
+ "single_word": false,
402
+ "special": true
403
+ },
404
+ "100305": {
405
+ "content": "<|dummy_41|>",
406
+ "lstrip": true,
407
+ "normalized": false,
408
+ "rstrip": true,
409
+ "single_word": false,
410
+ "special": true
411
+ },
412
+ "100306": {
413
+ "content": "<|dummy_42|>",
414
+ "lstrip": true,
415
+ "normalized": false,
416
+ "rstrip": true,
417
+ "single_word": false,
418
+ "special": true
419
+ },
420
+ "100307": {
421
+ "content": "<|dummy_43|>",
422
+ "lstrip": true,
423
+ "normalized": false,
424
+ "rstrip": true,
425
+ "single_word": false,
426
+ "special": true
427
+ },
428
+ "100308": {
429
+ "content": "<|dummy_44|>",
430
+ "lstrip": true,
431
+ "normalized": false,
432
+ "rstrip": true,
433
+ "single_word": false,
434
+ "special": true
435
+ },
436
+ "100309": {
437
+ "content": "<|dummy_45|>",
438
+ "lstrip": true,
439
+ "normalized": false,
440
+ "rstrip": true,
441
+ "single_word": false,
442
+ "special": true
443
+ },
444
+ "100310": {
445
+ "content": "<|dummy_46|>",
446
+ "lstrip": true,
447
+ "normalized": false,
448
+ "rstrip": true,
449
+ "single_word": false,
450
+ "special": true
451
+ },
452
+ "100311": {
453
+ "content": "<|dummy_47|>",
454
+ "lstrip": true,
455
+ "normalized": false,
456
+ "rstrip": true,
457
+ "single_word": false,
458
+ "special": true
459
+ },
460
+ "100312": {
461
+ "content": "<|dummy_48|>",
462
+ "lstrip": true,
463
+ "normalized": false,
464
+ "rstrip": true,
465
+ "single_word": false,
466
+ "special": true
467
+ },
468
+ "100313": {
469
+ "content": "<|dummy_49|>",
470
+ "lstrip": true,
471
+ "normalized": false,
472
+ "rstrip": true,
473
+ "single_word": false,
474
+ "special": true
475
+ },
476
+ "100314": {
477
+ "content": "<|dummy_50|>",
478
+ "lstrip": true,
479
+ "normalized": false,
480
+ "rstrip": true,
481
+ "single_word": false,
482
+ "special": true
483
+ },
484
+ "100315": {
485
+ "content": "<|dummy_51|>",
486
+ "lstrip": true,
487
+ "normalized": false,
488
+ "rstrip": true,
489
+ "single_word": false,
490
+ "special": true
491
+ },
492
+ "100316": {
493
+ "content": "<|dummy_52|>",
494
+ "lstrip": true,
495
+ "normalized": false,
496
+ "rstrip": true,
497
+ "single_word": false,
498
+ "special": true
499
+ },
500
+ "100317": {
501
+ "content": "<|dummy_53|>",
502
+ "lstrip": true,
503
+ "normalized": false,
504
+ "rstrip": true,
505
+ "single_word": false,
506
+ "special": true
507
+ },
508
+ "100318": {
509
+ "content": "<|dummy_54|>",
510
+ "lstrip": true,
511
+ "normalized": false,
512
+ "rstrip": true,
513
+ "single_word": false,
514
+ "special": true
515
+ },
516
+ "100319": {
517
+ "content": "<|dummy_55|>",
518
+ "lstrip": true,
519
+ "normalized": false,
520
+ "rstrip": true,
521
+ "single_word": false,
522
+ "special": true
523
+ },
524
+ "100320": {
525
+ "content": "<|dummy_56|>",
526
+ "lstrip": true,
527
+ "normalized": false,
528
+ "rstrip": true,
529
+ "single_word": false,
530
+ "special": true
531
+ },
532
+ "100321": {
533
+ "content": "<|dummy_57|>",
534
+ "lstrip": true,
535
+ "normalized": false,
536
+ "rstrip": true,
537
+ "single_word": false,
538
+ "special": true
539
+ },
540
+ "100322": {
541
+ "content": "<|dummy_58|>",
542
+ "lstrip": true,
543
+ "normalized": false,
544
+ "rstrip": true,
545
+ "single_word": false,
546
+ "special": true
547
+ },
548
+ "100323": {
549
+ "content": "<|dummy_59|>",
550
+ "lstrip": true,
551
+ "normalized": false,
552
+ "rstrip": true,
553
+ "single_word": false,
554
+ "special": true
555
+ },
556
+ "100324": {
557
+ "content": "<|dummy_60|>",
558
+ "lstrip": true,
559
+ "normalized": false,
560
+ "rstrip": true,
561
+ "single_word": false,
562
+ "special": true
563
+ },
564
+ "100325": {
565
+ "content": "<|dummy_61|>",
566
+ "lstrip": true,
567
+ "normalized": false,
568
+ "rstrip": true,
569
+ "single_word": false,
570
+ "special": true
571
+ },
572
+ "100326": {
573
+ "content": "<|dummy_62|>",
574
+ "lstrip": true,
575
+ "normalized": false,
576
+ "rstrip": true,
577
+ "single_word": false,
578
+ "special": true
579
+ },
580
+ "100327": {
581
+ "content": "<|dummy_63|>",
582
+ "lstrip": true,
583
+ "normalized": false,
584
+ "rstrip": true,
585
+ "single_word": false,
586
+ "special": true
587
+ },
588
+ "100328": {
589
+ "content": "<|dummy_64|>",
590
+ "lstrip": true,
591
+ "normalized": false,
592
+ "rstrip": true,
593
+ "single_word": false,
594
+ "special": true
595
+ },
596
+ "100329": {
597
+ "content": "<|dummy_65|>",
598
+ "lstrip": true,
599
+ "normalized": false,
600
+ "rstrip": true,
601
+ "single_word": false,
602
+ "special": true
603
+ },
604
+ "100330": {
605
+ "content": "<|dummy_66|>",
606
+ "lstrip": true,
607
+ "normalized": false,
608
+ "rstrip": true,
609
+ "single_word": false,
610
+ "special": true
611
+ },
612
+ "100331": {
613
+ "content": "<|dummy_67|>",
614
+ "lstrip": true,
615
+ "normalized": false,
616
+ "rstrip": true,
617
+ "single_word": false,
618
+ "special": true
619
+ },
620
+ "100332": {
621
+ "content": "<|dummy_68|>",
622
+ "lstrip": true,
623
+ "normalized": false,
624
+ "rstrip": true,
625
+ "single_word": false,
626
+ "special": true
627
+ },
628
+ "100333": {
629
+ "content": "<|dummy_69|>",
630
+ "lstrip": true,
631
+ "normalized": false,
632
+ "rstrip": true,
633
+ "single_word": false,
634
+ "special": true
635
+ },
636
+ "100334": {
637
+ "content": "<|dummy_70|>",
638
+ "lstrip": true,
639
+ "normalized": false,
640
+ "rstrip": true,
641
+ "single_word": false,
642
+ "special": true
643
+ },
644
+ "100335": {
645
+ "content": "<|dummy_71|>",
646
+ "lstrip": true,
647
+ "normalized": false,
648
+ "rstrip": true,
649
+ "single_word": false,
650
+ "special": true
651
+ },
652
+ "100336": {
653
+ "content": "<|dummy_72|>",
654
+ "lstrip": true,
655
+ "normalized": false,
656
+ "rstrip": true,
657
+ "single_word": false,
658
+ "special": true
659
+ },
660
+ "100337": {
661
+ "content": "<|dummy_73|>",
662
+ "lstrip": true,
663
+ "normalized": false,
664
+ "rstrip": true,
665
+ "single_word": false,
666
+ "special": true
667
+ },
668
+ "100338": {
669
+ "content": "<|dummy_74|>",
670
+ "lstrip": true,
671
+ "normalized": false,
672
+ "rstrip": true,
673
+ "single_word": false,
674
+ "special": true
675
+ },
676
+ "100339": {
677
+ "content": "<|dummy_75|>",
678
+ "lstrip": true,
679
+ "normalized": false,
680
+ "rstrip": true,
681
+ "single_word": false,
682
+ "special": true
683
+ },
684
+ "100340": {
685
+ "content": "<|dummy_76|>",
686
+ "lstrip": true,
687
+ "normalized": false,
688
+ "rstrip": true,
689
+ "single_word": false,
690
+ "special": true
691
+ },
692
+ "100341": {
693
+ "content": "<|dummy_77|>",
694
+ "lstrip": true,
695
+ "normalized": false,
696
+ "rstrip": true,
697
+ "single_word": false,
698
+ "special": true
699
+ },
700
+ "100342": {
701
+ "content": "<|dummy_78|>",
702
+ "lstrip": true,
703
+ "normalized": false,
704
+ "rstrip": true,
705
+ "single_word": false,
706
+ "special": true
707
+ },
708
+ "100343": {
709
+ "content": "<|dummy_79|>",
710
+ "lstrip": true,
711
+ "normalized": false,
712
+ "rstrip": true,
713
+ "single_word": false,
714
+ "special": true
715
+ },
716
+ "100344": {
717
+ "content": "<|dummy_80|>",
718
+ "lstrip": true,
719
+ "normalized": false,
720
+ "rstrip": true,
721
+ "single_word": false,
722
+ "special": true
723
+ },
724
+ "100345": {
725
+ "content": "<|dummy_81|>",
726
+ "lstrip": true,
727
+ "normalized": false,
728
+ "rstrip": true,
729
+ "single_word": false,
730
+ "special": true
731
+ },
732
+ "100346": {
733
+ "content": "<|dummy_82|>",
734
+ "lstrip": true,
735
+ "normalized": false,
736
+ "rstrip": true,
737
+ "single_word": false,
738
+ "special": true
739
+ },
740
+ "100347": {
741
+ "content": "<|dummy_83|>",
742
+ "lstrip": true,
743
+ "normalized": false,
744
+ "rstrip": true,
745
+ "single_word": false,
746
+ "special": true
747
+ },
748
+ "100348": {
749
+ "content": "<|dummy_84|>",
750
+ "lstrip": true,
751
+ "normalized": false,
752
+ "rstrip": true,
753
+ "single_word": false,
754
+ "special": true
755
+ },
756
+ "100349": {
757
+ "content": "<|dummy_85|>",
758
+ "lstrip": true,
759
+ "normalized": false,
760
+ "rstrip": true,
761
+ "single_word": false,
762
+ "special": true
763
+ },
764
+ "100350": {
765
+ "content": "<|dummy_86|>",
766
+ "lstrip": true,
767
+ "normalized": false,
768
+ "rstrip": true,
769
+ "single_word": false,
770
+ "special": true
771
+ },
772
+ "100351": {
773
+ "content": "<|dummy_87|>",
774
+ "lstrip": true,
775
+ "normalized": false,
776
+ "rstrip": true,
777
+ "single_word": false,
778
+ "special": true
779
+ }
780
+ },
781
+ "bos_token": "<|endoftext|>",
782
+ "clean_up_tokenization_spaces": false,
783
+ "eos_token": "<|im_end|>",
784
+ "extra_special_tokens": {},
785
+ "model_max_length": 16384,
786
+ "pad_token": "<|dummy_87|>",
787
+ "padding_side": "left",
788
+ "tokenizer_class": "GPT2Tokenizer",
789
+ "unk_token": "�"
790
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.34146574610158015,
5
+ "train_runtime": 4832.1445,
6
+ "train_samples": 4838,
7
+ "train_samples_per_second": 2.002,
8
+ "train_steps_per_second": 0.063
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,943 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 2.0,
6
+ "eval_steps": 500,
7
+ "global_step": 304,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.03305785123966942,
14
+ "grad_norm": 20.384813053174184,
15
+ "learning_rate": 1.25e-07,
16
+ "logits/chosen": -1.9736328125,
17
+ "logits/rejected": -2.0150389671325684,
18
+ "logps/chosen": -101.9375,
19
+ "logps/rejected": -134.6875,
20
+ "loss": 0.7609,
21
+ "rewards/accuracies": 0.5562499761581421,
22
+ "rewards/chosen": -0.05767517164349556,
23
+ "rewards/margins": -0.02346496656537056,
24
+ "rewards/rejected": -0.03435821458697319,
25
+ "step": 5
26
+ },
27
+ {
28
+ "epoch": 0.06611570247933884,
29
+ "grad_norm": 94.01960102254296,
30
+ "learning_rate": 2.8125e-07,
31
+ "logits/chosen": -1.9695312976837158,
32
+ "logits/rejected": -1.9580078125,
33
+ "logps/chosen": -188.0031280517578,
34
+ "logps/rejected": -213.8312530517578,
35
+ "loss": 0.7023,
36
+ "rewards/accuracies": 0.5062500238418579,
37
+ "rewards/chosen": -0.0018112182151526213,
38
+ "rewards/margins": 0.08289947360754013,
39
+ "rewards/rejected": -0.08440055698156357,
40
+ "step": 10
41
+ },
42
+ {
43
+ "epoch": 0.09917355371900827,
44
+ "grad_norm": 21.211718326010597,
45
+ "learning_rate": 4.375e-07,
46
+ "logits/chosen": -1.9630858898162842,
47
+ "logits/rejected": -2.010546922683716,
48
+ "logps/chosen": -121.1937484741211,
49
+ "logps/rejected": -157.6640625,
50
+ "loss": 0.7329,
51
+ "rewards/accuracies": 0.48750001192092896,
52
+ "rewards/chosen": -0.034603118896484375,
53
+ "rewards/margins": -0.02179870568215847,
54
+ "rewards/rejected": -0.01271667517721653,
55
+ "step": 15
56
+ },
57
+ {
58
+ "epoch": 0.1322314049586777,
59
+ "grad_norm": 20.70329807308497,
60
+ "learning_rate": 4.947916666666667e-07,
61
+ "logits/chosen": -1.928125023841858,
62
+ "logits/rejected": -2.0234375,
63
+ "logps/chosen": -112.5875015258789,
64
+ "logps/rejected": -97.15156555175781,
65
+ "loss": 0.6808,
66
+ "rewards/accuracies": 0.6187499761581421,
67
+ "rewards/chosen": 0.06768341362476349,
68
+ "rewards/margins": 0.08130035549402237,
69
+ "rewards/rejected": -0.013789367862045765,
70
+ "step": 20
71
+ },
72
+ {
73
+ "epoch": 0.1652892561983471,
74
+ "grad_norm": 25.232267926711238,
75
+ "learning_rate": 4.861111111111111e-07,
76
+ "logits/chosen": -2.002734422683716,
77
+ "logits/rejected": -1.969335913658142,
78
+ "logps/chosen": -106.5374984741211,
79
+ "logps/rejected": -128.78125,
80
+ "loss": 0.6689,
81
+ "rewards/accuracies": 0.699999988079071,
82
+ "rewards/chosen": 0.17589417099952698,
83
+ "rewards/margins": 0.202056884765625,
84
+ "rewards/rejected": -0.02575073204934597,
85
+ "step": 25
86
+ },
87
+ {
88
+ "epoch": 0.19834710743801653,
89
+ "grad_norm": 15.27943476263106,
90
+ "learning_rate": 4.774305555555555e-07,
91
+ "logits/chosen": -1.9646484851837158,
92
+ "logits/rejected": -2.035351514816284,
93
+ "logps/chosen": -97.609375,
94
+ "logps/rejected": -133.9968719482422,
95
+ "loss": 0.5863,
96
+ "rewards/accuracies": 0.71875,
97
+ "rewards/chosen": 0.27128297090530396,
98
+ "rewards/margins": 0.34486085176467896,
99
+ "rewards/rejected": -0.07353515923023224,
100
+ "step": 30
101
+ },
102
+ {
103
+ "epoch": 0.23140495867768596,
104
+ "grad_norm": 10.254397382545033,
105
+ "learning_rate": 4.6874999999999996e-07,
106
+ "logits/chosen": -1.9158203601837158,
107
+ "logits/rejected": -1.9259765148162842,
108
+ "logps/chosen": -76.8343734741211,
109
+ "logps/rejected": -93.0531234741211,
110
+ "loss": 0.5547,
111
+ "rewards/accuracies": 0.762499988079071,
112
+ "rewards/chosen": 0.4645752012729645,
113
+ "rewards/margins": 0.45016783475875854,
114
+ "rewards/rejected": 0.014300537295639515,
115
+ "step": 35
116
+ },
117
+ {
118
+ "epoch": 0.2644628099173554,
119
+ "grad_norm": 14.380457410581087,
120
+ "learning_rate": 4.600694444444444e-07,
121
+ "logits/chosen": -1.8447265625,
122
+ "logits/rejected": -1.936914086341858,
123
+ "logps/chosen": -106.31562805175781,
124
+ "logps/rejected": -108.1734390258789,
125
+ "loss": 0.5251,
126
+ "rewards/accuracies": 0.78125,
127
+ "rewards/chosen": 0.601550281047821,
128
+ "rewards/margins": 0.5547240972518921,
129
+ "rewards/rejected": 0.047088623046875,
130
+ "step": 40
131
+ },
132
+ {
133
+ "epoch": 0.2975206611570248,
134
+ "grad_norm": 78.56584566869854,
135
+ "learning_rate": 4.513888888888889e-07,
136
+ "logits/chosen": -1.8576171398162842,
137
+ "logits/rejected": -1.8904297351837158,
138
+ "logps/chosen": -127.65937805175781,
139
+ "logps/rejected": -155.9734344482422,
140
+ "loss": 0.5159,
141
+ "rewards/accuracies": 0.8062499761581421,
142
+ "rewards/chosen": 0.5876830816268921,
143
+ "rewards/margins": 0.611767590045929,
144
+ "rewards/rejected": -0.02455444261431694,
145
+ "step": 45
146
+ },
147
+ {
148
+ "epoch": 0.3305785123966942,
149
+ "grad_norm": 12.535466357743957,
150
+ "learning_rate": 4.427083333333333e-07,
151
+ "logits/chosen": -1.7546875476837158,
152
+ "logits/rejected": -1.8126952648162842,
153
+ "logps/chosen": -101.51875305175781,
154
+ "logps/rejected": -104.28593444824219,
155
+ "loss": 0.4631,
156
+ "rewards/accuracies": 0.862500011920929,
157
+ "rewards/chosen": 0.7405639886856079,
158
+ "rewards/margins": 0.692065417766571,
159
+ "rewards/rejected": 0.0489349365234375,
160
+ "step": 50
161
+ },
162
+ {
163
+ "epoch": 0.36363636363636365,
164
+ "grad_norm": 17.87312710689112,
165
+ "learning_rate": 4.3402777777777775e-07,
166
+ "logits/chosen": -1.8767578601837158,
167
+ "logits/rejected": -1.91015625,
168
+ "logps/chosen": -108.8062515258789,
169
+ "logps/rejected": -112.98124694824219,
170
+ "loss": 0.4817,
171
+ "rewards/accuracies": 0.793749988079071,
172
+ "rewards/chosen": 0.802502453327179,
173
+ "rewards/margins": 0.7518310546875,
174
+ "rewards/rejected": 0.05042724683880806,
175
+ "step": 55
176
+ },
177
+ {
178
+ "epoch": 0.39669421487603307,
179
+ "grad_norm": 33.86478827382489,
180
+ "learning_rate": 4.253472222222222e-07,
181
+ "logits/chosen": -1.750585913658142,
182
+ "logits/rejected": -1.828710913658142,
183
+ "logps/chosen": -95.0843734741211,
184
+ "logps/rejected": -84.9000015258789,
185
+ "loss": 0.4742,
186
+ "rewards/accuracies": 0.831250011920929,
187
+ "rewards/chosen": 0.8693481683731079,
188
+ "rewards/margins": 0.792254626750946,
189
+ "rewards/rejected": 0.07660599052906036,
190
+ "step": 60
191
+ },
192
+ {
193
+ "epoch": 0.4297520661157025,
194
+ "grad_norm": 14.995895485980972,
195
+ "learning_rate": 4.1666666666666667e-07,
196
+ "logits/chosen": -1.761132836341858,
197
+ "logits/rejected": -1.8439452648162842,
198
+ "logps/chosen": -83.8453140258789,
199
+ "logps/rejected": -218.5031280517578,
200
+ "loss": 0.4289,
201
+ "rewards/accuracies": 0.862500011920929,
202
+ "rewards/chosen": 0.879650890827179,
203
+ "rewards/margins": 0.8621460199356079,
204
+ "rewards/rejected": 0.017832566052675247,
205
+ "step": 65
206
+ },
207
+ {
208
+ "epoch": 0.4628099173553719,
209
+ "grad_norm": 16.046927552557996,
210
+ "learning_rate": 4.079861111111111e-07,
211
+ "logits/chosen": -1.7744140625,
212
+ "logits/rejected": -1.7898437976837158,
213
+ "logps/chosen": -115.00468444824219,
214
+ "logps/rejected": -108.6343765258789,
215
+ "loss": 0.4529,
216
+ "rewards/accuracies": 0.824999988079071,
217
+ "rewards/chosen": 0.887927234172821,
218
+ "rewards/margins": 0.950427234172821,
219
+ "rewards/rejected": -0.062247466295957565,
220
+ "step": 70
221
+ },
222
+ {
223
+ "epoch": 0.49586776859504134,
224
+ "grad_norm": 16.03572269046516,
225
+ "learning_rate": 3.993055555555556e-07,
226
+ "logits/chosen": -1.7394530773162842,
227
+ "logits/rejected": -1.7888672351837158,
228
+ "logps/chosen": -116.4124984741211,
229
+ "logps/rejected": -133.37655639648438,
230
+ "loss": 0.4293,
231
+ "rewards/accuracies": 0.8125,
232
+ "rewards/chosen": 0.8433440923690796,
233
+ "rewards/margins": 0.95654296875,
234
+ "rewards/rejected": -0.11332092434167862,
235
+ "step": 75
236
+ },
237
+ {
238
+ "epoch": 0.5289256198347108,
239
+ "grad_norm": 22.37349972383721,
240
+ "learning_rate": 3.9062499999999997e-07,
241
+ "logits/chosen": -1.75390625,
242
+ "logits/rejected": -1.704687476158142,
243
+ "logps/chosen": -71.3265609741211,
244
+ "logps/rejected": -139.4250030517578,
245
+ "loss": 0.3767,
246
+ "rewards/accuracies": 0.875,
247
+ "rewards/chosen": 1.1237304210662842,
248
+ "rewards/margins": 1.17822265625,
249
+ "rewards/rejected": -0.05453949049115181,
250
+ "step": 80
251
+ },
252
+ {
253
+ "epoch": 0.5619834710743802,
254
+ "grad_norm": 20.509754459037634,
255
+ "learning_rate": 3.819444444444444e-07,
256
+ "logits/chosen": -1.729101538658142,
257
+ "logits/rejected": -1.7654297351837158,
258
+ "logps/chosen": -75.62968444824219,
259
+ "logps/rejected": -91.04374694824219,
260
+ "loss": 0.3517,
261
+ "rewards/accuracies": 0.90625,
262
+ "rewards/chosen": 1.2136719226837158,
263
+ "rewards/margins": 1.282470703125,
264
+ "rewards/rejected": -0.06860504299402237,
265
+ "step": 85
266
+ },
267
+ {
268
+ "epoch": 0.5950413223140496,
269
+ "grad_norm": 9.352262418999281,
270
+ "learning_rate": 3.732638888888889e-07,
271
+ "logits/chosen": -1.715429663658142,
272
+ "logits/rejected": -1.8029296398162842,
273
+ "logps/chosen": -94.9625015258789,
274
+ "logps/rejected": -106.2046890258789,
275
+ "loss": 0.363,
276
+ "rewards/accuracies": 0.8687499761581421,
277
+ "rewards/chosen": 0.999072253704071,
278
+ "rewards/margins": 1.187744140625,
279
+ "rewards/rejected": -0.188883975148201,
280
+ "step": 90
281
+ },
282
+ {
283
+ "epoch": 0.628099173553719,
284
+ "grad_norm": 23.688934732030923,
285
+ "learning_rate": 3.645833333333333e-07,
286
+ "logits/chosen": -1.700781226158142,
287
+ "logits/rejected": -1.7039062976837158,
288
+ "logps/chosen": -112.2484359741211,
289
+ "logps/rejected": -159.9812469482422,
290
+ "loss": 0.3718,
291
+ "rewards/accuracies": 0.893750011920929,
292
+ "rewards/chosen": 1.14593505859375,
293
+ "rewards/margins": 1.391357421875,
294
+ "rewards/rejected": -0.24615478515625,
295
+ "step": 95
296
+ },
297
+ {
298
+ "epoch": 0.6611570247933884,
299
+ "grad_norm": 15.499132967124797,
300
+ "learning_rate": 3.5590277777777775e-07,
301
+ "logits/chosen": -1.759179711341858,
302
+ "logits/rejected": -1.74609375,
303
+ "logps/chosen": -113.99687194824219,
304
+ "logps/rejected": -290.7093811035156,
305
+ "loss": 0.3336,
306
+ "rewards/accuracies": 0.8812500238418579,
307
+ "rewards/chosen": 1.349707007408142,
308
+ "rewards/margins": 1.653967261314392,
309
+ "rewards/rejected": -0.30389100313186646,
310
+ "step": 100
311
+ },
312
+ {
313
+ "epoch": 0.6942148760330579,
314
+ "grad_norm": 8.99540374074108,
315
+ "learning_rate": 3.472222222222222e-07,
316
+ "logits/chosen": -1.693359375,
317
+ "logits/rejected": -1.745703101158142,
318
+ "logps/chosen": -78.84843444824219,
319
+ "logps/rejected": -206.0625,
320
+ "loss": 0.3079,
321
+ "rewards/accuracies": 0.918749988079071,
322
+ "rewards/chosen": 1.15185546875,
323
+ "rewards/margins": 1.513940453529358,
324
+ "rewards/rejected": -0.361959844827652,
325
+ "step": 105
326
+ },
327
+ {
328
+ "epoch": 0.7272727272727273,
329
+ "grad_norm": 13.110466529706155,
330
+ "learning_rate": 3.3854166666666667e-07,
331
+ "logits/chosen": -1.701171875,
332
+ "logits/rejected": -1.7404296398162842,
333
+ "logps/chosen": -89.875,
334
+ "logps/rejected": -91.78437805175781,
335
+ "loss": 0.3093,
336
+ "rewards/accuracies": 0.9437500238418579,
337
+ "rewards/chosen": 1.142968773841858,
338
+ "rewards/margins": 1.448095679283142,
339
+ "rewards/rejected": -0.30474853515625,
340
+ "step": 110
341
+ },
342
+ {
343
+ "epoch": 0.7603305785123967,
344
+ "grad_norm": 16.946974264461762,
345
+ "learning_rate": 3.298611111111111e-07,
346
+ "logits/chosen": -1.6511719226837158,
347
+ "logits/rejected": -1.7580077648162842,
348
+ "logps/chosen": -68.44999694824219,
349
+ "logps/rejected": -56.02031326293945,
350
+ "loss": 0.351,
351
+ "rewards/accuracies": 0.8812500238418579,
352
+ "rewards/chosen": 1.1487548351287842,
353
+ "rewards/margins": 1.403710961341858,
354
+ "rewards/rejected": -0.254721075296402,
355
+ "step": 115
356
+ },
357
+ {
358
+ "epoch": 0.7933884297520661,
359
+ "grad_norm": 21.478896017415543,
360
+ "learning_rate": 3.211805555555556e-07,
361
+ "logits/chosen": -1.775781273841858,
362
+ "logits/rejected": -1.765039086341858,
363
+ "logps/chosen": -85.5171890258789,
364
+ "logps/rejected": -311.77813720703125,
365
+ "loss": 0.3856,
366
+ "rewards/accuracies": 0.84375,
367
+ "rewards/chosen": 1.138818383216858,
368
+ "rewards/margins": 1.498632788658142,
369
+ "rewards/rejected": -0.35924071073532104,
370
+ "step": 120
371
+ },
372
+ {
373
+ "epoch": 0.8264462809917356,
374
+ "grad_norm": 16.699686019608645,
375
+ "learning_rate": 3.1249999999999997e-07,
376
+ "logits/chosen": -1.739843726158142,
377
+ "logits/rejected": -1.7394530773162842,
378
+ "logps/chosen": -85.05937194824219,
379
+ "logps/rejected": -78.09687805175781,
380
+ "loss": 0.3181,
381
+ "rewards/accuracies": 0.9125000238418579,
382
+ "rewards/chosen": 1.241113305091858,
383
+ "rewards/margins": 1.596777319908142,
384
+ "rewards/rejected": -0.35357969999313354,
385
+ "step": 125
386
+ },
387
+ {
388
+ "epoch": 0.859504132231405,
389
+ "grad_norm": 22.059878721649408,
390
+ "learning_rate": 3.038194444444444e-07,
391
+ "logits/chosen": -1.7470703125,
392
+ "logits/rejected": -1.7429687976837158,
393
+ "logps/chosen": -84.1734390258789,
394
+ "logps/rejected": -108.15625,
395
+ "loss": 0.2781,
396
+ "rewards/accuracies": 0.8999999761581421,
397
+ "rewards/chosen": 1.319189429283142,
398
+ "rewards/margins": 1.7539551258087158,
399
+ "rewards/rejected": -0.435638427734375,
400
+ "step": 130
401
+ },
402
+ {
403
+ "epoch": 0.8925619834710744,
404
+ "grad_norm": 26.55552523822837,
405
+ "learning_rate": 2.951388888888889e-07,
406
+ "logits/chosen": -1.7365233898162842,
407
+ "logits/rejected": -1.753320336341858,
408
+ "logps/chosen": -86.8499984741211,
409
+ "logps/rejected": -123.36250305175781,
410
+ "loss": 0.3303,
411
+ "rewards/accuracies": 0.862500011920929,
412
+ "rewards/chosen": 1.180883765220642,
413
+ "rewards/margins": 1.699804663658142,
414
+ "rewards/rejected": -0.5188751220703125,
415
+ "step": 135
416
+ },
417
+ {
418
+ "epoch": 0.9256198347107438,
419
+ "grad_norm": 5.21454541019512,
420
+ "learning_rate": 2.864583333333333e-07,
421
+ "logits/chosen": -1.759765625,
422
+ "logits/rejected": -1.7794921398162842,
423
+ "logps/chosen": -72.8609390258789,
424
+ "logps/rejected": -104.01875305175781,
425
+ "loss": 0.3237,
426
+ "rewards/accuracies": 0.8999999761581421,
427
+ "rewards/chosen": 1.08544921875,
428
+ "rewards/margins": 1.622802734375,
429
+ "rewards/rejected": -0.5378357172012329,
430
+ "step": 140
431
+ },
432
+ {
433
+ "epoch": 0.9586776859504132,
434
+ "grad_norm": 17.08783452785935,
435
+ "learning_rate": 2.7777777777777776e-07,
436
+ "logits/chosen": -1.7380859851837158,
437
+ "logits/rejected": -1.71875,
438
+ "logps/chosen": -113.3609390258789,
439
+ "logps/rejected": -135.9968719482422,
440
+ "loss": 0.362,
441
+ "rewards/accuracies": 0.84375,
442
+ "rewards/chosen": 1.2180664539337158,
443
+ "rewards/margins": 1.650537133216858,
444
+ "rewards/rejected": -0.43310603499412537,
445
+ "step": 145
446
+ },
447
+ {
448
+ "epoch": 0.9917355371900827,
449
+ "grad_norm": 9.807839504587346,
450
+ "learning_rate": 2.690972222222222e-07,
451
+ "logits/chosen": -1.691015601158142,
452
+ "logits/rejected": -1.718164086341858,
453
+ "logps/chosen": -91.4906234741211,
454
+ "logps/rejected": -189.89688110351562,
455
+ "loss": 0.2705,
456
+ "rewards/accuracies": 0.925000011920929,
457
+ "rewards/chosen": 1.2189452648162842,
458
+ "rewards/margins": 1.797998070716858,
459
+ "rewards/rejected": -0.5793823003768921,
460
+ "step": 150
461
+ },
462
+ {
463
+ "epoch": 1.0198347107438017,
464
+ "grad_norm": 9.768977700612497,
465
+ "learning_rate": 2.604166666666667e-07,
466
+ "logits/chosen": -1.625229835510254,
467
+ "logits/rejected": -1.6571691036224365,
468
+ "logps/chosen": -57.45036697387695,
469
+ "logps/rejected": -53.50367736816406,
470
+ "loss": 0.2231,
471
+ "rewards/accuracies": 0.9558823704719543,
472
+ "rewards/chosen": 1.252814769744873,
473
+ "rewards/margins": 1.7216222286224365,
474
+ "rewards/rejected": -0.46808579564094543,
475
+ "step": 155
476
+ },
477
+ {
478
+ "epoch": 1.052892561983471,
479
+ "grad_norm": 10.908210786207713,
480
+ "learning_rate": 2.517361111111111e-07,
481
+ "logits/chosen": -1.7646484375,
482
+ "logits/rejected": -1.7109375,
483
+ "logps/chosen": -83.1937484741211,
484
+ "logps/rejected": -91.83125305175781,
485
+ "loss": 0.2587,
486
+ "rewards/accuracies": 0.9375,
487
+ "rewards/chosen": 1.206628441810608,
488
+ "rewards/margins": 1.848486304283142,
489
+ "rewards/rejected": -0.641589343547821,
490
+ "step": 160
491
+ },
492
+ {
493
+ "epoch": 1.0859504132231406,
494
+ "grad_norm": 9.103890657754624,
495
+ "learning_rate": 2.4305555555555555e-07,
496
+ "logits/chosen": -1.7931640148162842,
497
+ "logits/rejected": -1.7109375,
498
+ "logps/chosen": -131.7624969482422,
499
+ "logps/rejected": -376.5375061035156,
500
+ "loss": 0.2809,
501
+ "rewards/accuracies": 0.925000011920929,
502
+ "rewards/chosen": 1.186181664466858,
503
+ "rewards/margins": 1.97705078125,
504
+ "rewards/rejected": -0.7916015386581421,
505
+ "step": 165
506
+ },
507
+ {
508
+ "epoch": 1.1190082644628099,
509
+ "grad_norm": 20.108066106111135,
510
+ "learning_rate": 2.3437499999999998e-07,
511
+ "logits/chosen": -1.7462890148162842,
512
+ "logits/rejected": -1.751367211341858,
513
+ "logps/chosen": -112.19218444824219,
514
+ "logps/rejected": -107.296875,
515
+ "loss": 0.3179,
516
+ "rewards/accuracies": 0.862500011920929,
517
+ "rewards/chosen": 1.1929657459259033,
518
+ "rewards/margins": 1.8135497570037842,
519
+ "rewards/rejected": -0.62066650390625,
520
+ "step": 170
521
+ },
522
+ {
523
+ "epoch": 1.1520661157024794,
524
+ "grad_norm": 11.06192152881459,
525
+ "learning_rate": 2.2569444444444444e-07,
526
+ "logits/chosen": -1.7449219226837158,
527
+ "logits/rejected": -1.755273461341858,
528
+ "logps/chosen": -78.3671875,
529
+ "logps/rejected": -112.86250305175781,
530
+ "loss": 0.2205,
531
+ "rewards/accuracies": 0.9375,
532
+ "rewards/chosen": 1.427587866783142,
533
+ "rewards/margins": 2.342089891433716,
534
+ "rewards/rejected": -0.9125610589981079,
535
+ "step": 175
536
+ },
537
+ {
538
+ "epoch": 1.1851239669421487,
539
+ "grad_norm": 16.74266747693132,
540
+ "learning_rate": 2.1701388888888887e-07,
541
+ "logits/chosen": -1.722265601158142,
542
+ "logits/rejected": -1.7253906726837158,
543
+ "logps/chosen": -80.3187484741211,
544
+ "logps/rejected": -82.3359375,
545
+ "loss": 0.2836,
546
+ "rewards/accuracies": 0.9125000238418579,
547
+ "rewards/chosen": 1.2846190929412842,
548
+ "rewards/margins": 2.060546875,
549
+ "rewards/rejected": -0.7765258550643921,
550
+ "step": 180
551
+ },
552
+ {
553
+ "epoch": 1.2181818181818183,
554
+ "grad_norm": 21.202358098219463,
555
+ "learning_rate": 2.0833333333333333e-07,
556
+ "logits/chosen": -1.7423827648162842,
557
+ "logits/rejected": -1.75390625,
558
+ "logps/chosen": -128.7578125,
559
+ "logps/rejected": -135.2375030517578,
560
+ "loss": 0.2561,
561
+ "rewards/accuracies": 0.893750011920929,
562
+ "rewards/chosen": 1.331298828125,
563
+ "rewards/margins": 2.1512694358825684,
564
+ "rewards/rejected": -0.820111095905304,
565
+ "step": 185
566
+ },
567
+ {
568
+ "epoch": 1.2512396694214876,
569
+ "grad_norm": 15.516819767863538,
570
+ "learning_rate": 1.996527777777778e-07,
571
+ "logits/chosen": -1.74609375,
572
+ "logits/rejected": -1.717187523841858,
573
+ "logps/chosen": -103.02030944824219,
574
+ "logps/rejected": -207.6875,
575
+ "loss": 0.2562,
576
+ "rewards/accuracies": 0.90625,
577
+ "rewards/chosen": 1.2101562023162842,
578
+ "rewards/margins": 2.2046875953674316,
579
+ "rewards/rejected": -0.9949951171875,
580
+ "step": 190
581
+ },
582
+ {
583
+ "epoch": 1.284297520661157,
584
+ "grad_norm": 14.467425156894965,
585
+ "learning_rate": 1.909722222222222e-07,
586
+ "logits/chosen": -1.755468726158142,
587
+ "logits/rejected": -1.7345702648162842,
588
+ "logps/chosen": -86.8140640258789,
589
+ "logps/rejected": -247.21875,
590
+ "loss": 0.248,
591
+ "rewards/accuracies": 0.90625,
592
+ "rewards/chosen": 1.2613525390625,
593
+ "rewards/margins": 2.2603516578674316,
594
+ "rewards/rejected": -0.998150646686554,
595
+ "step": 195
596
+ },
597
+ {
598
+ "epoch": 1.3173553719008264,
599
+ "grad_norm": 21.767064372417554,
600
+ "learning_rate": 1.8229166666666666e-07,
601
+ "logits/chosen": -1.6640625,
602
+ "logits/rejected": -1.691992163658142,
603
+ "logps/chosen": -119.8218765258789,
604
+ "logps/rejected": -178.82186889648438,
605
+ "loss": 0.2761,
606
+ "rewards/accuracies": 0.925000011920929,
607
+ "rewards/chosen": 1.3565673828125,
608
+ "rewards/margins": 2.1192383766174316,
609
+ "rewards/rejected": -0.7636810541152954,
610
+ "step": 200
611
+ },
612
+ {
613
+ "epoch": 1.350413223140496,
614
+ "grad_norm": 10.54750059469599,
615
+ "learning_rate": 1.736111111111111e-07,
616
+ "logits/chosen": -1.7587890625,
617
+ "logits/rejected": -1.725195288658142,
618
+ "logps/chosen": -78.1890640258789,
619
+ "logps/rejected": -85.6890640258789,
620
+ "loss": 0.2541,
621
+ "rewards/accuracies": 0.9375,
622
+ "rewards/chosen": 1.3107421398162842,
623
+ "rewards/margins": 2.126415967941284,
624
+ "rewards/rejected": -0.8150528073310852,
625
+ "step": 205
626
+ },
627
+ {
628
+ "epoch": 1.3834710743801653,
629
+ "grad_norm": 3.7118818864195466,
630
+ "learning_rate": 1.6493055555555555e-07,
631
+ "logits/chosen": -1.722265601158142,
632
+ "logits/rejected": -1.691015601158142,
633
+ "logps/chosen": -83.59375,
634
+ "logps/rejected": -128.953125,
635
+ "loss": 0.2053,
636
+ "rewards/accuracies": 0.9624999761581421,
637
+ "rewards/chosen": 1.312109351158142,
638
+ "rewards/margins": 2.3080077171325684,
639
+ "rewards/rejected": -0.995330810546875,
640
+ "step": 210
641
+ },
642
+ {
643
+ "epoch": 1.4165289256198348,
644
+ "grad_norm": 13.111903944179245,
645
+ "learning_rate": 1.5624999999999999e-07,
646
+ "logits/chosen": -1.746679663658142,
647
+ "logits/rejected": -1.7384765148162842,
648
+ "logps/chosen": -92.25,
649
+ "logps/rejected": -143.0031280517578,
650
+ "loss": 0.2477,
651
+ "rewards/accuracies": 0.918749988079071,
652
+ "rewards/chosen": 1.2750976085662842,
653
+ "rewards/margins": 2.1878905296325684,
654
+ "rewards/rejected": -0.9129577875137329,
655
+ "step": 215
656
+ },
657
+ {
658
+ "epoch": 1.449586776859504,
659
+ "grad_norm": 8.98243357316642,
660
+ "learning_rate": 1.4756944444444445e-07,
661
+ "logits/chosen": -1.7062499523162842,
662
+ "logits/rejected": -1.7119140625,
663
+ "logps/chosen": -54.498435974121094,
664
+ "logps/rejected": -75.37187194824219,
665
+ "loss": 0.2179,
666
+ "rewards/accuracies": 0.9375,
667
+ "rewards/chosen": 1.424560546875,
668
+ "rewards/margins": 2.394970655441284,
669
+ "rewards/rejected": -0.9706786870956421,
670
+ "step": 220
671
+ },
672
+ {
673
+ "epoch": 1.4826446280991736,
674
+ "grad_norm": 12.808066286335526,
675
+ "learning_rate": 1.3888888888888888e-07,
676
+ "logits/chosen": -1.7238280773162842,
677
+ "logits/rejected": -1.69140625,
678
+ "logps/chosen": -84.2437515258789,
679
+ "logps/rejected": -208.33438110351562,
680
+ "loss": 0.1979,
681
+ "rewards/accuracies": 0.96875,
682
+ "rewards/chosen": 1.435449242591858,
683
+ "rewards/margins": 2.487499952316284,
684
+ "rewards/rejected": -1.053137183189392,
685
+ "step": 225
686
+ },
687
+ {
688
+ "epoch": 1.515702479338843,
689
+ "grad_norm": 12.264081534579313,
690
+ "learning_rate": 1.3020833333333334e-07,
691
+ "logits/chosen": -1.7414062023162842,
692
+ "logits/rejected": -1.8126952648162842,
693
+ "logps/chosen": -104.0718765258789,
694
+ "logps/rejected": -214.6531219482422,
695
+ "loss": 0.2108,
696
+ "rewards/accuracies": 0.949999988079071,
697
+ "rewards/chosen": 1.247045874595642,
698
+ "rewards/margins": 2.327441453933716,
699
+ "rewards/rejected": -1.0798766613006592,
700
+ "step": 230
701
+ },
702
+ {
703
+ "epoch": 1.5487603305785123,
704
+ "grad_norm": 13.542480117605272,
705
+ "learning_rate": 1.2152777777777777e-07,
706
+ "logits/chosen": -1.7566406726837158,
707
+ "logits/rejected": -1.7218749523162842,
708
+ "logps/chosen": -94.53125,
709
+ "logps/rejected": -159.43124389648438,
710
+ "loss": 0.2589,
711
+ "rewards/accuracies": 0.925000011920929,
712
+ "rewards/chosen": 1.1764647960662842,
713
+ "rewards/margins": 2.2066407203674316,
714
+ "rewards/rejected": -1.030126929283142,
715
+ "step": 235
716
+ },
717
+ {
718
+ "epoch": 1.5818181818181818,
719
+ "grad_norm": 12.091282830905666,
720
+ "learning_rate": 1.1284722222222222e-07,
721
+ "logits/chosen": -1.726953148841858,
722
+ "logits/rejected": -1.731054663658142,
723
+ "logps/chosen": -103.51875305175781,
724
+ "logps/rejected": -142.2843780517578,
725
+ "loss": 0.2465,
726
+ "rewards/accuracies": 0.887499988079071,
727
+ "rewards/chosen": 1.175048828125,
728
+ "rewards/margins": 2.24957275390625,
729
+ "rewards/rejected": -1.075646996498108,
730
+ "step": 240
731
+ },
732
+ {
733
+ "epoch": 1.6148760330578513,
734
+ "grad_norm": 10.386454689797876,
735
+ "learning_rate": 1.0416666666666667e-07,
736
+ "logits/chosen": -1.7158203125,
737
+ "logits/rejected": -1.738867163658142,
738
+ "logps/chosen": -149.1125030517578,
739
+ "logps/rejected": -225.484375,
740
+ "loss": 0.2091,
741
+ "rewards/accuracies": 0.949999988079071,
742
+ "rewards/chosen": 1.264672875404358,
743
+ "rewards/margins": 2.459765672683716,
744
+ "rewards/rejected": -1.1947753429412842,
745
+ "step": 245
746
+ },
747
+ {
748
+ "epoch": 1.6479338842975206,
749
+ "grad_norm": 11.535999334661906,
750
+ "learning_rate": 9.54861111111111e-08,
751
+ "logits/chosen": -1.6787109375,
752
+ "logits/rejected": -1.705468773841858,
753
+ "logps/chosen": -110.4765625,
754
+ "logps/rejected": -99.65937805175781,
755
+ "loss": 0.2224,
756
+ "rewards/accuracies": 0.918749988079071,
757
+ "rewards/chosen": 1.3373291492462158,
758
+ "rewards/margins": 2.2801756858825684,
759
+ "rewards/rejected": -0.9426910281181335,
760
+ "step": 250
761
+ },
762
+ {
763
+ "epoch": 1.68099173553719,
764
+ "grad_norm": 12.221384548644076,
765
+ "learning_rate": 8.680555555555555e-08,
766
+ "logits/chosen": -1.7462890148162842,
767
+ "logits/rejected": -1.6970703601837158,
768
+ "logps/chosen": -77.2984390258789,
769
+ "logps/rejected": -157.0281219482422,
770
+ "loss": 0.2205,
771
+ "rewards/accuracies": 0.925000011920929,
772
+ "rewards/chosen": 1.394323706626892,
773
+ "rewards/margins": 2.5204100608825684,
774
+ "rewards/rejected": -1.124169945716858,
775
+ "step": 255
776
+ },
777
+ {
778
+ "epoch": 1.7140495867768595,
779
+ "grad_norm": 7.1842584754057635,
780
+ "learning_rate": 7.812499999999999e-08,
781
+ "logits/chosen": -1.6804687976837158,
782
+ "logits/rejected": -1.671289086341858,
783
+ "logps/chosen": -121.0609359741211,
784
+ "logps/rejected": -221.22811889648438,
785
+ "loss": 0.2036,
786
+ "rewards/accuracies": 0.9624999761581421,
787
+ "rewards/chosen": 1.3713257312774658,
788
+ "rewards/margins": 2.472705125808716,
789
+ "rewards/rejected": -1.101171851158142,
790
+ "step": 260
791
+ },
792
+ {
793
+ "epoch": 1.747107438016529,
794
+ "grad_norm": 10.198371331055432,
795
+ "learning_rate": 6.944444444444444e-08,
796
+ "logits/chosen": -1.7517578601837158,
797
+ "logits/rejected": -1.7761719226837158,
798
+ "logps/chosen": -107.21875,
799
+ "logps/rejected": -152.17813110351562,
800
+ "loss": 0.1913,
801
+ "rewards/accuracies": 0.956250011920929,
802
+ "rewards/chosen": 1.3739745616912842,
803
+ "rewards/margins": 2.637890577316284,
804
+ "rewards/rejected": -1.2629883289337158,
805
+ "step": 265
806
+ },
807
+ {
808
+ "epoch": 1.7801652892561983,
809
+ "grad_norm": 7.641943419372844,
810
+ "learning_rate": 6.076388888888889e-08,
811
+ "logits/chosen": -1.6964843273162842,
812
+ "logits/rejected": -1.694921851158142,
813
+ "logps/chosen": -69.37812805175781,
814
+ "logps/rejected": -61.01250076293945,
815
+ "loss": 0.1954,
816
+ "rewards/accuracies": 0.9375,
817
+ "rewards/chosen": 1.41259765625,
818
+ "rewards/margins": 2.5541014671325684,
819
+ "rewards/rejected": -1.141119360923767,
820
+ "step": 270
821
+ },
822
+ {
823
+ "epoch": 1.8132231404958676,
824
+ "grad_norm": 11.114602201507525,
825
+ "learning_rate": 5.208333333333333e-08,
826
+ "logits/chosen": -1.6591796875,
827
+ "logits/rejected": -1.7351562976837158,
828
+ "logps/chosen": -87.42655944824219,
829
+ "logps/rejected": -105.8843765258789,
830
+ "loss": 0.2048,
831
+ "rewards/accuracies": 0.9437500238418579,
832
+ "rewards/chosen": 1.306982398033142,
833
+ "rewards/margins": 2.4515624046325684,
834
+ "rewards/rejected": -1.14251708984375,
835
+ "step": 275
836
+ },
837
+ {
838
+ "epoch": 1.8462809917355372,
839
+ "grad_norm": 15.379731063683517,
840
+ "learning_rate": 4.340277777777777e-08,
841
+ "logits/chosen": -1.618749976158142,
842
+ "logits/rejected": -1.649999976158142,
843
+ "logps/chosen": -119.0171890258789,
844
+ "logps/rejected": -173.796875,
845
+ "loss": 0.2651,
846
+ "rewards/accuracies": 0.9125000238418579,
847
+ "rewards/chosen": 1.188745141029358,
848
+ "rewards/margins": 2.2294921875,
849
+ "rewards/rejected": -1.0393798351287842,
850
+ "step": 280
851
+ },
852
+ {
853
+ "epoch": 1.8793388429752067,
854
+ "grad_norm": 6.061786674310964,
855
+ "learning_rate": 3.472222222222222e-08,
856
+ "logits/chosen": -1.7941405773162842,
857
+ "logits/rejected": -1.791601538658142,
858
+ "logps/chosen": -74.0921859741211,
859
+ "logps/rejected": -139.1843719482422,
860
+ "loss": 0.2076,
861
+ "rewards/accuracies": 0.9437500238418579,
862
+ "rewards/chosen": 1.3419921398162842,
863
+ "rewards/margins": 2.566699266433716,
864
+ "rewards/rejected": -1.22357177734375,
865
+ "step": 285
866
+ },
867
+ {
868
+ "epoch": 1.912396694214876,
869
+ "grad_norm": 5.230589963566852,
870
+ "learning_rate": 2.6041666666666667e-08,
871
+ "logits/chosen": -1.738671898841858,
872
+ "logits/rejected": -1.742773413658142,
873
+ "logps/chosen": -71.78593444824219,
874
+ "logps/rejected": -128.6593780517578,
875
+ "loss": 0.1771,
876
+ "rewards/accuracies": 0.9624999761581421,
877
+ "rewards/chosen": 1.4137451648712158,
878
+ "rewards/margins": 2.7740235328674316,
879
+ "rewards/rejected": -1.360070824623108,
880
+ "step": 290
881
+ },
882
+ {
883
+ "epoch": 1.9454545454545453,
884
+ "grad_norm": 9.184910311105565,
885
+ "learning_rate": 1.736111111111111e-08,
886
+ "logits/chosen": -1.646484375,
887
+ "logits/rejected": -1.681054711341858,
888
+ "logps/chosen": -52.390625,
889
+ "logps/rejected": -72.6812515258789,
890
+ "loss": 0.217,
891
+ "rewards/accuracies": 0.9312499761581421,
892
+ "rewards/chosen": 1.3534667491912842,
893
+ "rewards/margins": 2.360156297683716,
894
+ "rewards/rejected": -1.006866455078125,
895
+ "step": 295
896
+ },
897
+ {
898
+ "epoch": 1.9785123966942149,
899
+ "grad_norm": 25.401166784551016,
900
+ "learning_rate": 8.680555555555555e-09,
901
+ "logits/chosen": -1.756445288658142,
902
+ "logits/rejected": -1.7527344226837158,
903
+ "logps/chosen": -120.4921875,
904
+ "logps/rejected": -117.88749694824219,
905
+ "loss": 0.2535,
906
+ "rewards/accuracies": 0.9125000238418579,
907
+ "rewards/chosen": 1.1732177734375,
908
+ "rewards/margins": 2.3089842796325684,
909
+ "rewards/rejected": -1.13714599609375,
910
+ "step": 300
911
+ },
912
+ {
913
+ "epoch": 2.0,
914
+ "step": 304,
915
+ "total_flos": 0.0,
916
+ "train_loss": 0.34146574610158015,
917
+ "train_runtime": 4832.1445,
918
+ "train_samples_per_second": 2.002,
919
+ "train_steps_per_second": 0.063
920
+ }
921
+ ],
922
+ "logging_steps": 5,
923
+ "max_steps": 304,
924
+ "num_input_tokens_seen": 0,
925
+ "num_train_epochs": 2,
926
+ "save_steps": 50,
927
+ "stateful_callbacks": {
928
+ "TrainerControl": {
929
+ "args": {
930
+ "should_epoch_stop": false,
931
+ "should_evaluate": false,
932
+ "should_log": false,
933
+ "should_save": false,
934
+ "should_training_stop": false
935
+ },
936
+ "attributes": {}
937
+ }
938
+ },
939
+ "total_flos": 0.0,
940
+ "train_batch_size": 1,
941
+ "trial_name": null,
942
+ "trial_params": null
943
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad24b45b62b0a1c980a89307b40b1ecff57cdd44870e1adae756f8b457ce66a9
3
+ size 7800
vocab.json ADDED
The diff for this file is too large to render. See raw diff