thomasklbg commited on
Commit
7e5c0e9
·
verified ·
1 Parent(s): 1935396

Upload folder using huggingface_hub

Browse files
checkpoint-760/chat_template.jinja ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- set has_system = messages|selectattr('role', 'equalto', 'system')|list|length > 0 -%}{%- if not has_system -%}{{- '<|im_start|>system
2
+ You are a helpful function-calling AI assistant. ' -}}{%- if tools is none or (tools | length) == 0 -%}{{- 'You do not currently have access to any functions. <functions></functions><|im_end|>
3
+ ' -}}{%- else -%}{{- 'You are provided with function signatures within <functions></functions> XML tags. You may call one or more functions to assist with the user query. Output any function calls within <function_calls></function_calls> XML tags. Do not make assumptions about what values to plug into functions.' -}}{{- '<functions>' -}}{{- tools | tojson -}}{{- '</functions><|im_end|>
4
+ ' -}}{%- endif -%}{%- endif -%}{%- for message in messages -%}{%- if message['role'] == 'system' -%}{{- '<|im_start|>system
5
+ ' + message['content'] -}}{%- if tools is not none -%}{{- '<functions>' -}}{{- tools | tojson -}}{{- '</functions>' -}}{%- elif message.get('functions', none) is not none -%}{{- ' <functions>' + message['functions'] + '</functions>' -}}{%- endif -%}{{- '<|im_end|>
6
+ ' -}}{%- elif message['role'] == 'user' -%}{{- '<|im_start|>user
7
+ ' + message['content'] + '<|im_end|>
8
+ ' -}}{%- elif message['role'] == 'assistant' -%}{{- '<|im_start|>assistant
9
+ ' -}}{%- if message.get('content', none) is not none -%}{{- message['content'] -}}{%- endif -%}{%- if message.get('function_calls', none) is not none -%}{{- '<function_calls>' + message['function_calls'] + '</function_calls>' -}}{% elif message.get('tool_calls', none) is not none %}{{- '<function_calls>' -}}{%- for tool_call in message['tool_calls'] %}{%- if tool_call is mapping and tool_call.get('function', none) is not none %}{%- set args = tool_call['function']['arguments'] -%}{%- set ns = namespace(arguments_list=[]) -%}{%- for key, value in args.items() -%}{%- set ns.arguments_list = ns.arguments_list + [key ~ '=' ~ (value | tojson)] -%}{%- endfor -%}{%- set arguments = ns.arguments_list | join(', ') -%}{{- tool_call['function']['name'] + '(' + arguments + ')' -}}{%- if not loop.last -%}{{ '
10
+ ' }}{%- endif -%}{% else %}{{- tool_call -}}{%- endif %}{%- endfor %}{{- '</function_calls>' -}}{%- endif -%}{%- if not loop.last -%}{{- '<|im_end|>' + '
11
+ ' -}}{%- else -%}{{- eos_token -}}{%- endif -%}{%- elif message['role'] == 'environment' -%}{{- '<|im_start|>environment
12
+ ' + message['content'] + '<|im_end|>
13
+ ' -}}{%- elif message['role'] == 'tool' -%}{{- '<|im_start|>environment
14
+ ' + message['content'] + '<|im_end|>
15
+ ' -}}{%- endif -%}{%- if loop.last and add_generation_prompt -%}{{- '<|im_start|>assistant
16
+ ' -}}{%- endif -%}{%- endfor -%}
checkpoint-760/config.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Olmo3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 100257,
8
+ "dtype": "bfloat16",
9
+ "eos_token_id": 100257,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 4096,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 11008,
14
+ "layer_types": [
15
+ "sliding_attention",
16
+ "sliding_attention",
17
+ "sliding_attention",
18
+ "full_attention",
19
+ "sliding_attention",
20
+ "sliding_attention",
21
+ "sliding_attention",
22
+ "full_attention",
23
+ "sliding_attention",
24
+ "sliding_attention",
25
+ "sliding_attention",
26
+ "full_attention",
27
+ "sliding_attention",
28
+ "sliding_attention",
29
+ "sliding_attention",
30
+ "full_attention",
31
+ "sliding_attention",
32
+ "sliding_attention",
33
+ "sliding_attention",
34
+ "full_attention",
35
+ "sliding_attention",
36
+ "sliding_attention",
37
+ "sliding_attention",
38
+ "full_attention",
39
+ "sliding_attention",
40
+ "sliding_attention",
41
+ "sliding_attention",
42
+ "full_attention",
43
+ "sliding_attention",
44
+ "sliding_attention",
45
+ "sliding_attention",
46
+ "full_attention"
47
+ ],
48
+ "max_position_embeddings": 65536,
49
+ "model_type": "olmo3",
50
+ "num_attention_heads": 32,
51
+ "num_hidden_layers": 32,
52
+ "num_key_value_heads": 32,
53
+ "pad_token_id": 100277,
54
+ "rms_norm_eps": 1e-06,
55
+ "rope_scaling": {
56
+ "attention_factor": 1.2079441541679836,
57
+ "beta_fast": 32,
58
+ "beta_slow": 1,
59
+ "factor": 8.0,
60
+ "original_max_position_embeddings": 8192,
61
+ "rope_type": "yarn"
62
+ },
63
+ "rope_theta": 500000,
64
+ "sliding_window": 4096,
65
+ "tie_word_embeddings": false,
66
+ "transformers_version": "4.57.3",
67
+ "use_cache": true,
68
+ "vocab_size": 100278
69
+ }
checkpoint-760/generation_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 100257,
4
+ "do_sample": true,
5
+ "eos_token_id": [
6
+ 100265,
7
+ 100257
8
+ ],
9
+ "max_new_tokens": 32768,
10
+ "pad_token_id": 100277,
11
+ "transformers_version": "4.57.3"
12
+ }
checkpoint-760/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-760/model-00001-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c66658a028ea19b0dd0304655fd2e4bdf77ec7897bb74caec61ef3139332c1e7
3
+ size 4969984976
checkpoint-760/model-00002-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3aaec32c2c6438fe2026d99e0062a98c6e44b2c55f2232b9ee00ea3d055e9b7
3
+ size 4981161496
checkpoint-760/model-00003-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91dfacd9c2e8a580e563650ba1716b33921954cad2ef9f1e86404602f03c282d
3
+ size 4644917240
checkpoint-760/model.safetensors.index.json ADDED
@@ -0,0 +1,363 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_parameters": 7298011136,
4
+ "total_size": 14596022272
5
+ },
6
+ "weight_map": {
7
+ "lm_head.weight": "model-00003-of-00003.safetensors",
8
+ "model.embed_tokens.weight": "model-00001-of-00003.safetensors",
9
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
10
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
11
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
12
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
13
+ "model.layers.0.post_feedforward_layernorm.weight": "model-00001-of-00003.safetensors",
14
+ "model.layers.0.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
15
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
16
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
17
+ "model.layers.0.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
18
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
19
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
20
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
21
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
22
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
23
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
24
+ "model.layers.1.post_feedforward_layernorm.weight": "model-00001-of-00003.safetensors",
25
+ "model.layers.1.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
26
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
27
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
28
+ "model.layers.1.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
29
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
30
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
31
+ "model.layers.10.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
32
+ "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
33
+ "model.layers.10.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
34
+ "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
35
+ "model.layers.10.post_feedforward_layernorm.weight": "model-00002-of-00003.safetensors",
36
+ "model.layers.10.self_attn.k_norm.weight": "model-00002-of-00003.safetensors",
37
+ "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
38
+ "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
39
+ "model.layers.10.self_attn.q_norm.weight": "model-00002-of-00003.safetensors",
40
+ "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
41
+ "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
42
+ "model.layers.11.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
43
+ "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
44
+ "model.layers.11.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
45
+ "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
46
+ "model.layers.11.post_feedforward_layernorm.weight": "model-00002-of-00003.safetensors",
47
+ "model.layers.11.self_attn.k_norm.weight": "model-00002-of-00003.safetensors",
48
+ "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
49
+ "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
50
+ "model.layers.11.self_attn.q_norm.weight": "model-00002-of-00003.safetensors",
51
+ "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
52
+ "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
53
+ "model.layers.12.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
54
+ "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
55
+ "model.layers.12.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
56
+ "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
57
+ "model.layers.12.post_feedforward_layernorm.weight": "model-00002-of-00003.safetensors",
58
+ "model.layers.12.self_attn.k_norm.weight": "model-00002-of-00003.safetensors",
59
+ "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
60
+ "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
61
+ "model.layers.12.self_attn.q_norm.weight": "model-00002-of-00003.safetensors",
62
+ "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
63
+ "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
64
+ "model.layers.13.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
65
+ "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
66
+ "model.layers.13.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
67
+ "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
68
+ "model.layers.13.post_feedforward_layernorm.weight": "model-00002-of-00003.safetensors",
69
+ "model.layers.13.self_attn.k_norm.weight": "model-00002-of-00003.safetensors",
70
+ "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
71
+ "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
72
+ "model.layers.13.self_attn.q_norm.weight": "model-00002-of-00003.safetensors",
73
+ "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
74
+ "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
75
+ "model.layers.14.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
76
+ "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
77
+ "model.layers.14.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
78
+ "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
79
+ "model.layers.14.post_feedforward_layernorm.weight": "model-00002-of-00003.safetensors",
80
+ "model.layers.14.self_attn.k_norm.weight": "model-00002-of-00003.safetensors",
81
+ "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
82
+ "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
83
+ "model.layers.14.self_attn.q_norm.weight": "model-00002-of-00003.safetensors",
84
+ "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
85
+ "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
86
+ "model.layers.15.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
87
+ "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
88
+ "model.layers.15.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
89
+ "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
90
+ "model.layers.15.post_feedforward_layernorm.weight": "model-00002-of-00003.safetensors",
91
+ "model.layers.15.self_attn.k_norm.weight": "model-00002-of-00003.safetensors",
92
+ "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
93
+ "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
94
+ "model.layers.15.self_attn.q_norm.weight": "model-00002-of-00003.safetensors",
95
+ "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
96
+ "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
97
+ "model.layers.16.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
98
+ "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
99
+ "model.layers.16.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
100
+ "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
101
+ "model.layers.16.post_feedforward_layernorm.weight": "model-00002-of-00003.safetensors",
102
+ "model.layers.16.self_attn.k_norm.weight": "model-00002-of-00003.safetensors",
103
+ "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
104
+ "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
105
+ "model.layers.16.self_attn.q_norm.weight": "model-00002-of-00003.safetensors",
106
+ "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
107
+ "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
108
+ "model.layers.17.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
109
+ "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
110
+ "model.layers.17.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
111
+ "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
112
+ "model.layers.17.post_feedforward_layernorm.weight": "model-00002-of-00003.safetensors",
113
+ "model.layers.17.self_attn.k_norm.weight": "model-00002-of-00003.safetensors",
114
+ "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
115
+ "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
116
+ "model.layers.17.self_attn.q_norm.weight": "model-00002-of-00003.safetensors",
117
+ "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
118
+ "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
119
+ "model.layers.18.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
120
+ "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
121
+ "model.layers.18.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
122
+ "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
123
+ "model.layers.18.post_feedforward_layernorm.weight": "model-00002-of-00003.safetensors",
124
+ "model.layers.18.self_attn.k_norm.weight": "model-00002-of-00003.safetensors",
125
+ "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
126
+ "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
127
+ "model.layers.18.self_attn.q_norm.weight": "model-00002-of-00003.safetensors",
128
+ "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
129
+ "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
130
+ "model.layers.19.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
131
+ "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
132
+ "model.layers.19.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
133
+ "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
134
+ "model.layers.19.post_feedforward_layernorm.weight": "model-00002-of-00003.safetensors",
135
+ "model.layers.19.self_attn.k_norm.weight": "model-00002-of-00003.safetensors",
136
+ "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
137
+ "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
138
+ "model.layers.19.self_attn.q_norm.weight": "model-00002-of-00003.safetensors",
139
+ "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
140
+ "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
141
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
142
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
143
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
144
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
145
+ "model.layers.2.post_feedforward_layernorm.weight": "model-00001-of-00003.safetensors",
146
+ "model.layers.2.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
147
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
148
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
149
+ "model.layers.2.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
150
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
151
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
152
+ "model.layers.20.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
153
+ "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
154
+ "model.layers.20.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
155
+ "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
156
+ "model.layers.20.post_feedforward_layernorm.weight": "model-00002-of-00003.safetensors",
157
+ "model.layers.20.self_attn.k_norm.weight": "model-00002-of-00003.safetensors",
158
+ "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
159
+ "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
160
+ "model.layers.20.self_attn.q_norm.weight": "model-00002-of-00003.safetensors",
161
+ "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
162
+ "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
163
+ "model.layers.21.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
164
+ "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
165
+ "model.layers.21.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
166
+ "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
167
+ "model.layers.21.post_feedforward_layernorm.weight": "model-00002-of-00003.safetensors",
168
+ "model.layers.21.self_attn.k_norm.weight": "model-00002-of-00003.safetensors",
169
+ "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
170
+ "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
171
+ "model.layers.21.self_attn.q_norm.weight": "model-00002-of-00003.safetensors",
172
+ "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
173
+ "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
174
+ "model.layers.22.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
175
+ "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
176
+ "model.layers.22.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
177
+ "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
178
+ "model.layers.22.post_feedforward_layernorm.weight": "model-00003-of-00003.safetensors",
179
+ "model.layers.22.self_attn.k_norm.weight": "model-00002-of-00003.safetensors",
180
+ "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
181
+ "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
182
+ "model.layers.22.self_attn.q_norm.weight": "model-00002-of-00003.safetensors",
183
+ "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
184
+ "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
185
+ "model.layers.23.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
186
+ "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
187
+ "model.layers.23.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
188
+ "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
189
+ "model.layers.23.post_feedforward_layernorm.weight": "model-00003-of-00003.safetensors",
190
+ "model.layers.23.self_attn.k_norm.weight": "model-00003-of-00003.safetensors",
191
+ "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
192
+ "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
193
+ "model.layers.23.self_attn.q_norm.weight": "model-00003-of-00003.safetensors",
194
+ "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
195
+ "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
196
+ "model.layers.24.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
197
+ "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
198
+ "model.layers.24.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
199
+ "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
200
+ "model.layers.24.post_feedforward_layernorm.weight": "model-00003-of-00003.safetensors",
201
+ "model.layers.24.self_attn.k_norm.weight": "model-00003-of-00003.safetensors",
202
+ "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
203
+ "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
204
+ "model.layers.24.self_attn.q_norm.weight": "model-00003-of-00003.safetensors",
205
+ "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
206
+ "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
207
+ "model.layers.25.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
208
+ "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
209
+ "model.layers.25.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
210
+ "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
211
+ "model.layers.25.post_feedforward_layernorm.weight": "model-00003-of-00003.safetensors",
212
+ "model.layers.25.self_attn.k_norm.weight": "model-00003-of-00003.safetensors",
213
+ "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
214
+ "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
215
+ "model.layers.25.self_attn.q_norm.weight": "model-00003-of-00003.safetensors",
216
+ "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
217
+ "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
218
+ "model.layers.26.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
219
+ "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
220
+ "model.layers.26.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
221
+ "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
222
+ "model.layers.26.post_feedforward_layernorm.weight": "model-00003-of-00003.safetensors",
223
+ "model.layers.26.self_attn.k_norm.weight": "model-00003-of-00003.safetensors",
224
+ "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
225
+ "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
226
+ "model.layers.26.self_attn.q_norm.weight": "model-00003-of-00003.safetensors",
227
+ "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
228
+ "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
229
+ "model.layers.27.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
230
+ "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
231
+ "model.layers.27.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
232
+ "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
233
+ "model.layers.27.post_feedforward_layernorm.weight": "model-00003-of-00003.safetensors",
234
+ "model.layers.27.self_attn.k_norm.weight": "model-00003-of-00003.safetensors",
235
+ "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
236
+ "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
237
+ "model.layers.27.self_attn.q_norm.weight": "model-00003-of-00003.safetensors",
238
+ "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
239
+ "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
240
+ "model.layers.28.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
241
+ "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
242
+ "model.layers.28.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
243
+ "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
244
+ "model.layers.28.post_feedforward_layernorm.weight": "model-00003-of-00003.safetensors",
245
+ "model.layers.28.self_attn.k_norm.weight": "model-00003-of-00003.safetensors",
246
+ "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
247
+ "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
248
+ "model.layers.28.self_attn.q_norm.weight": "model-00003-of-00003.safetensors",
249
+ "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
250
+ "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
251
+ "model.layers.29.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
252
+ "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
253
+ "model.layers.29.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
254
+ "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
255
+ "model.layers.29.post_feedforward_layernorm.weight": "model-00003-of-00003.safetensors",
256
+ "model.layers.29.self_attn.k_norm.weight": "model-00003-of-00003.safetensors",
257
+ "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
258
+ "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
259
+ "model.layers.29.self_attn.q_norm.weight": "model-00003-of-00003.safetensors",
260
+ "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
261
+ "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
262
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
263
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
264
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
265
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
266
+ "model.layers.3.post_feedforward_layernorm.weight": "model-00001-of-00003.safetensors",
267
+ "model.layers.3.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
268
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
269
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
270
+ "model.layers.3.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
271
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
272
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
273
+ "model.layers.30.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
274
+ "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
275
+ "model.layers.30.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
276
+ "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
277
+ "model.layers.30.post_feedforward_layernorm.weight": "model-00003-of-00003.safetensors",
278
+ "model.layers.30.self_attn.k_norm.weight": "model-00003-of-00003.safetensors",
279
+ "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
280
+ "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
281
+ "model.layers.30.self_attn.q_norm.weight": "model-00003-of-00003.safetensors",
282
+ "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
283
+ "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
284
+ "model.layers.31.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
285
+ "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
286
+ "model.layers.31.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
287
+ "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
288
+ "model.layers.31.post_feedforward_layernorm.weight": "model-00003-of-00003.safetensors",
289
+ "model.layers.31.self_attn.k_norm.weight": "model-00003-of-00003.safetensors",
290
+ "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
291
+ "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
292
+ "model.layers.31.self_attn.q_norm.weight": "model-00003-of-00003.safetensors",
293
+ "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
294
+ "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
295
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
296
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
297
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
298
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
299
+ "model.layers.4.post_feedforward_layernorm.weight": "model-00001-of-00003.safetensors",
300
+ "model.layers.4.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
301
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
302
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
303
+ "model.layers.4.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
304
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
305
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
306
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
307
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
308
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
309
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
310
+ "model.layers.5.post_feedforward_layernorm.weight": "model-00001-of-00003.safetensors",
311
+ "model.layers.5.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
312
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
313
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
314
+ "model.layers.5.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
315
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
316
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
317
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
318
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
319
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
320
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
321
+ "model.layers.6.post_feedforward_layernorm.weight": "model-00001-of-00003.safetensors",
322
+ "model.layers.6.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
323
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
324
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
325
+ "model.layers.6.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
326
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
327
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
328
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
329
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
330
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
331
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
332
+ "model.layers.7.post_feedforward_layernorm.weight": "model-00001-of-00003.safetensors",
333
+ "model.layers.7.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
334
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
335
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
336
+ "model.layers.7.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
337
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
338
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
339
+ "model.layers.8.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
340
+ "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
341
+ "model.layers.8.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
342
+ "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
343
+ "model.layers.8.post_feedforward_layernorm.weight": "model-00001-of-00003.safetensors",
344
+ "model.layers.8.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
345
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
346
+ "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
347
+ "model.layers.8.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
348
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
349
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
350
+ "model.layers.9.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
351
+ "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
352
+ "model.layers.9.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
353
+ "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
354
+ "model.layers.9.post_feedforward_layernorm.weight": "model-00001-of-00003.safetensors",
355
+ "model.layers.9.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
356
+ "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
357
+ "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
358
+ "model.layers.9.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
359
+ "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
360
+ "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
361
+ "model.norm.weight": "model-00003-of-00003.safetensors"
362
+ }
363
+ }
checkpoint-760/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ccf4cb3c27300eaa002cb063804a1b3a86513c371e42bac7b15ceea97fa2eec
3
+ size 17276110630
checkpoint-760/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:557e4758dfc7a414eef0073c0d08ad7c255e55ad4d589a9b74edaa3cf89a575d
3
+ size 14960
checkpoint-760/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72452d3138d0ca2ff89429e3294a834ae7a68e8596fc757735ca56ae52509d57
3
+ size 15024
checkpoint-760/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f36e306fb8ebcf53a167bfd6c9af74db410a269ada1e619e3e816f5269543b9d
3
+ size 15024
checkpoint-760/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb47ce0c6f815a6f8302b0e3819b4c2315ca71dae3138d97fdceb765cdd0a039
3
+ size 15024
checkpoint-760/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:827fe8bf959965f6208320059009df02e09c8a140b1d7c87745a40445a082e8b
3
+ size 1064
checkpoint-760/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|pad|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<|endoftext|>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
checkpoint-760/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-760/tokenizer_config.json ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "100256": {
5
+ "content": "<|extra_id_0|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": false
11
+ },
12
+ "100257": {
13
+ "content": "<|endoftext|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "100258": {
21
+ "content": "<|fim_prefix|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "100259": {
29
+ "content": "<|fim_middle|>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "100260": {
37
+ "content": "<|fim_suffix|>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "100261": {
45
+ "content": "|||PHONE_NUMBER|||",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": false
51
+ },
52
+ "100262": {
53
+ "content": "|||EMAIL_ADDRESS|||",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": false
59
+ },
60
+ "100263": {
61
+ "content": "|||IP_ADDRESS|||",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": false
67
+ },
68
+ "100264": {
69
+ "content": "<|im_start|>",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "100265": {
77
+ "content": "<|im_end|>",
78
+ "lstrip": false,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "100266": {
85
+ "content": "<functions>",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": false
91
+ },
92
+ "100267": {
93
+ "content": "</functions>",
94
+ "lstrip": false,
95
+ "normalized": false,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": false
99
+ },
100
+ "100268": {
101
+ "content": "<function_calls>",
102
+ "lstrip": false,
103
+ "normalized": false,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": false
107
+ },
108
+ "100269": {
109
+ "content": "</function_calls>",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": false
115
+ },
116
+ "100270": {
117
+ "content": "<|extra_id_1|>",
118
+ "lstrip": false,
119
+ "normalized": false,
120
+ "rstrip": false,
121
+ "single_word": false,
122
+ "special": false
123
+ },
124
+ "100271": {
125
+ "content": "<|extra_id_2|>",
126
+ "lstrip": false,
127
+ "normalized": false,
128
+ "rstrip": false,
129
+ "single_word": false,
130
+ "special": false
131
+ },
132
+ "100272": {
133
+ "content": "<|extra_id_3|>",
134
+ "lstrip": false,
135
+ "normalized": false,
136
+ "rstrip": false,
137
+ "single_word": false,
138
+ "special": false
139
+ },
140
+ "100273": {
141
+ "content": "<|extra_id_4|>",
142
+ "lstrip": false,
143
+ "normalized": false,
144
+ "rstrip": false,
145
+ "single_word": false,
146
+ "special": false
147
+ },
148
+ "100274": {
149
+ "content": "<|extra_id_5|>",
150
+ "lstrip": false,
151
+ "normalized": false,
152
+ "rstrip": false,
153
+ "single_word": false,
154
+ "special": false
155
+ },
156
+ "100275": {
157
+ "content": "<|extra_id_6|>",
158
+ "lstrip": false,
159
+ "normalized": false,
160
+ "rstrip": false,
161
+ "single_word": false,
162
+ "special": false
163
+ },
164
+ "100276": {
165
+ "content": "<|endofprompt|>",
166
+ "lstrip": false,
167
+ "normalized": false,
168
+ "rstrip": false,
169
+ "single_word": false,
170
+ "special": true
171
+ },
172
+ "100277": {
173
+ "content": "<|pad|>",
174
+ "lstrip": false,
175
+ "normalized": false,
176
+ "rstrip": false,
177
+ "single_word": false,
178
+ "special": true
179
+ }
180
+ },
181
+ "bos_token": "<|endoftext|>",
182
+ "clean_up_tokenization_spaces": false,
183
+ "eos_token": "<|endoftext|>",
184
+ "extra_special_tokens": {},
185
+ "model_max_length": 65536,
186
+ "pad_token": "<|pad|>",
187
+ "tokenizer_class": "GPT2Tokenizer",
188
+ "unk_token": "<|endoftext|>"
189
+ }
checkpoint-760/trainer_state.json ADDED
@@ -0,0 +1,1174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 2.0,
6
+ "eval_steps": 500,
7
+ "global_step": 760,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.026333113890717578,
14
+ "grad_norm": 2.109375,
15
+ "learning_rate": 2.3684210526315787e-07,
16
+ "loss": 0.1142,
17
+ "lras/base_loss": 0.10167421974547323,
18
+ "lras/critic_logp": -0.9331221472347808,
19
+ "lras/eos_logp_mean": -0.5980340207461268,
20
+ "lras/eos_logratio_mean": -0.052469537456636316,
21
+ "lras/len_signal_mean": -0.10167421974547323,
22
+ "lras/policy_logp": -0.8831515078396478,
23
+ "lras/signal_mean": -0.049970643251954536,
24
+ "lras/signal_std": 0.5512100008316339,
25
+ "step": 10
26
+ },
27
+ {
28
+ "epoch": 0.052666227781435156,
29
+ "grad_norm": 2.109375,
30
+ "learning_rate": 5e-07,
31
+ "loss": 0.1118,
32
+ "lras/base_loss": 0.09821037890214938,
33
+ "lras/critic_logp": -0.9952976092397623,
34
+ "lras/eos_logp_mean": -0.7277968910173513,
35
+ "lras/eos_logratio_mean": -0.043992197626357664,
36
+ "lras/len_signal_mean": -0.09821037890214938,
37
+ "lras/policy_logp": -0.9570521209021733,
38
+ "lras/signal_mean": -0.03824548757672638,
39
+ "lras/signal_std": 0.5455936640966683,
40
+ "step": 20
41
+ },
42
+ {
43
+ "epoch": 0.07899934167215274,
44
+ "grad_norm": 2.03125,
45
+ "learning_rate": 7.631578947368421e-07,
46
+ "loss": 0.1178,
47
+ "lras/base_loss": 0.10162280514559825,
48
+ "lras/critic_logp": -0.9195498289467471,
49
+ "lras/eos_logp_mean": -0.6399376740329898,
50
+ "lras/eos_logratio_mean": -0.11855290308740223,
51
+ "lras/len_signal_mean": -0.10162280514559825,
52
+ "lras/policy_logp": -0.8777651083465754,
53
+ "lras/signal_mean": -0.04178472033515842,
54
+ "lras/signal_std": 0.5618033302482217,
55
+ "step": 30
56
+ },
57
+ {
58
+ "epoch": 0.10533245556287031,
59
+ "grad_norm": 2.28125,
60
+ "learning_rate": 1.0263157894736843e-06,
61
+ "loss": 0.0924,
62
+ "lras/base_loss": 0.08383929146366427,
63
+ "lras/critic_logp": -0.9476410532435064,
64
+ "lras/eos_logp_mean": -0.6351981242245529,
65
+ "lras/eos_logratio_mean": -0.07531133993761614,
66
+ "lras/len_signal_mean": -0.08383929146366427,
67
+ "lras/policy_logp": -0.8975484069094307,
68
+ "lras/signal_mean": -0.05009264715579568,
69
+ "lras/signal_std": 0.5447276963386685,
70
+ "step": 40
71
+ },
72
+ {
73
+ "epoch": 0.1316655694535879,
74
+ "grad_norm": 2.0,
75
+ "learning_rate": 1.2894736842105264e-06,
76
+ "loss": 0.1126,
77
+ "lras/base_loss": 0.12420870506011852,
78
+ "lras/critic_logp": -0.9581334099634796,
79
+ "lras/eos_logp_mean": -0.6612339781131595,
80
+ "lras/eos_logratio_mean": -0.058000585943227635,
81
+ "lras/len_signal_mean": -0.12420870506011852,
82
+ "lras/policy_logp": -0.9160325896369954,
83
+ "lras/signal_mean": -0.042100816195298534,
84
+ "lras/signal_std": 0.5634231527801603,
85
+ "step": 50
86
+ },
87
+ {
88
+ "epoch": 0.15799868334430547,
89
+ "grad_norm": 2.109375,
90
+ "learning_rate": 1.5526315789473682e-06,
91
+ "loss": 0.0985,
92
+ "lras/base_loss": 0.10172083652432776,
93
+ "lras/critic_logp": -0.9688242651130583,
94
+ "lras/eos_logp_mean": -0.5217704355076421,
95
+ "lras/eos_logratio_mean": -0.09221591723398888,
96
+ "lras/len_signal_mean": -0.10172083652432776,
97
+ "lras/policy_logp": -0.9193514281016277,
98
+ "lras/signal_mean": -0.04947284637066594,
99
+ "lras/signal_std": 0.559893012791872,
100
+ "step": 60
101
+ },
102
+ {
103
+ "epoch": 0.18433179723502305,
104
+ "grad_norm": 2.21875,
105
+ "learning_rate": 1.8157894736842106e-06,
106
+ "loss": 0.1206,
107
+ "lras/base_loss": 0.0932533636863809,
108
+ "lras/critic_logp": -0.9705762892193726,
109
+ "lras/eos_logp_mean": -0.6604083704442019,
110
+ "lras/eos_logratio_mean": -0.05219597876712214,
111
+ "lras/len_signal_mean": -0.0932533636863809,
112
+ "lras/policy_logp": -0.9210699695716034,
113
+ "lras/signal_mean": -0.04950632269988121,
114
+ "lras/signal_std": 0.5388945213519036,
115
+ "step": 70
116
+ },
117
+ {
118
+ "epoch": 0.21066491112574062,
119
+ "grad_norm": 2.140625,
120
+ "learning_rate": 1.999905072250599e-06,
121
+ "loss": 0.1017,
122
+ "lras/base_loss": 0.09924555227044038,
123
+ "lras/critic_logp": -1.0084971192060963,
124
+ "lras/eos_logp_mean": -0.7261479496373795,
125
+ "lras/eos_logratio_mean": 0.005740139741101302,
126
+ "lras/len_signal_mean": -0.09924555227044038,
127
+ "lras/policy_logp": -0.9647895054685967,
128
+ "lras/signal_mean": -0.04370760570421148,
129
+ "lras/signal_std": 0.5396730012260378,
130
+ "step": 80
131
+ },
132
+ {
133
+ "epoch": 0.2369980250164582,
134
+ "grad_norm": 3.15625,
135
+ "learning_rate": 1.9982179691381198e-06,
136
+ "loss": 0.0908,
137
+ "lras/base_loss": 0.08481774836254771,
138
+ "lras/critic_logp": -0.9895585100273292,
139
+ "lras/eos_logp_mean": -0.6592542878817766,
140
+ "lras/eos_logratio_mean": -0.027437633188674225,
141
+ "lras/len_signal_mean": -0.08481774836254771,
142
+ "lras/policy_logp": -0.9531850873636806,
143
+ "lras/signal_mean": -0.036373416428257366,
144
+ "lras/signal_std": 0.5869219893123955,
145
+ "step": 90
146
+ },
147
+ {
148
+ "epoch": 0.2633311389071758,
149
+ "grad_norm": 2.71875,
150
+ "learning_rate": 1.994425456530222e-06,
151
+ "loss": 0.1129,
152
+ "lras/base_loss": 0.11199428366671782,
153
+ "lras/critic_logp": -0.9438399925283527,
154
+ "lras/eos_logp_mean": -0.8256492391199572,
155
+ "lras/eos_logratio_mean": -0.02534918904711958,
156
+ "lras/len_signal_mean": -0.11199428366671782,
157
+ "lras/policy_logp": -0.8999046212004524,
158
+ "lras/signal_mean": -0.043935376302829135,
159
+ "lras/signal_std": 0.5621409649495035,
160
+ "step": 100
161
+ },
162
+ {
163
+ "epoch": 0.2896642527978934,
164
+ "grad_norm": 2.40625,
165
+ "learning_rate": 1.988535533473508e-06,
166
+ "loss": 0.1024,
167
+ "lras/base_loss": 0.10810597011150094,
168
+ "lras/critic_logp": -0.9577996732205776,
169
+ "lras/eos_logp_mean": -0.8592553051421419,
170
+ "lras/eos_logratio_mean": -0.0717643543437589,
171
+ "lras/len_signal_mean": -0.10810597011150094,
172
+ "lras/policy_logp": -0.9219818851700149,
173
+ "lras/signal_mean": -0.03581778571356334,
174
+ "lras/signal_std": 0.5725194892380386,
175
+ "step": 110
176
+ },
177
+ {
178
+ "epoch": 0.31599736668861095,
179
+ "grad_norm": 3.015625,
180
+ "learning_rate": 1.9805606228057916e-06,
181
+ "loss": 0.1265,
182
+ "lras/base_loss": 0.12835807931842283,
183
+ "lras/critic_logp": -1.0095817961274958,
184
+ "lras/eos_logp_mean": -0.7684085411019623,
185
+ "lras/eos_logratio_mean": -0.08709225608035923,
186
+ "lras/len_signal_mean": -0.12835807931842283,
187
+ "lras/policy_logp": -0.9530015037296113,
188
+ "lras/signal_mean": -0.05658029363644257,
189
+ "lras/signal_std": 0.6249249442014844,
190
+ "step": 120
191
+ },
192
+ {
193
+ "epoch": 0.3423304805793285,
194
+ "grad_norm": 2.859375,
195
+ "learning_rate": 1.9705175449542357e-06,
196
+ "loss": 0.1224,
197
+ "lras/base_loss": 0.10125547938478122,
198
+ "lras/critic_logp": -0.9600592509813903,
199
+ "lras/eos_logp_mean": -1.0218269965684157,
200
+ "lras/eos_logratio_mean": -0.11965790124522754,
201
+ "lras/len_signal_mean": -0.10125547938478122,
202
+ "lras/policy_logp": -0.9064808995336749,
203
+ "lras/signal_mean": -0.05357835431948087,
204
+ "lras/signal_std": 0.610074704233557,
205
+ "step": 130
206
+ },
207
+ {
208
+ "epoch": 0.3686635944700461,
209
+ "grad_norm": 3.359375,
210
+ "learning_rate": 1.9584274824582526e-06,
211
+ "loss": 0.1018,
212
+ "lras/base_loss": 0.10054350115387933,
213
+ "lras/critic_logp": -1.0451293905971037,
214
+ "lras/eos_logp_mean": -0.9627558108069934,
215
+ "lras/eos_logratio_mean": -0.045046407813788394,
216
+ "lras/len_signal_mean": -0.10054350115387933,
217
+ "lras/policy_logp": -0.9888114047770802,
218
+ "lras/signal_mean": -0.05631798307189244,
219
+ "lras/signal_std": 0.6387343685142696,
220
+ "step": 140
221
+ },
222
+ {
223
+ "epoch": 0.39499670836076367,
224
+ "grad_norm": 3.625,
225
+ "learning_rate": 1.944315935291962e-06,
226
+ "loss": 0.1035,
227
+ "lras/base_loss": 0.12737492522719548,
228
+ "lras/critic_logp": -0.9631433882083107,
229
+ "lras/eos_logp_mean": -0.9592411807272583,
230
+ "lras/eos_logratio_mean": -0.07466923534375383,
231
+ "lras/len_signal_mean": -0.12737492522719548,
232
+ "lras/policy_logp": -0.9105693695366387,
233
+ "lras/signal_mean": -0.05257401920949931,
234
+ "lras/signal_std": 0.5964867433533072,
235
+ "step": 150
236
+ },
237
+ {
238
+ "epoch": 0.42132982225148125,
239
+ "grad_norm": 4.3125,
240
+ "learning_rate": 1.9282126670804613e-06,
241
+ "loss": 0.123,
242
+ "lras/base_loss": 0.11884618600852263,
243
+ "lras/critic_logp": -1.0371889562378134,
244
+ "lras/eos_logp_mean": -1.3217710635391995,
245
+ "lras/eos_logratio_mean": -0.1382402204500977,
246
+ "lras/len_signal_mean": -0.11884618600852263,
247
+ "lras/policy_logp": -0.9810781215265075,
248
+ "lras/signal_mean": -0.056110837148049986,
249
+ "lras/signal_std": 0.6808470624499023,
250
+ "step": 160
251
+ },
252
+ {
253
+ "epoch": 0.4476629361421988,
254
+ "grad_norm": 4.6875,
255
+ "learning_rate": 1.9101516423233367e-06,
256
+ "loss": 0.1198,
257
+ "lras/base_loss": 0.12323949924029876,
258
+ "lras/critic_logp": -1.0210646749700396,
259
+ "lras/eos_logp_mean": -1.4520364223513753,
260
+ "lras/eos_logratio_mean": 0.058064849331276494,
261
+ "lras/len_signal_mean": -0.12323949924029876,
262
+ "lras/policy_logp": -0.9611373103043608,
263
+ "lras/signal_mean": -0.05992736518949152,
264
+ "lras/signal_std": 0.6614585957024246,
265
+ "step": 170
266
+ },
267
+ {
268
+ "epoch": 0.4739960500329164,
269
+ "grad_norm": 4.875,
270
+ "learning_rate": 1.8901709547578243e-06,
271
+ "loss": 0.1146,
272
+ "lras/base_loss": 0.09676948979840745,
273
+ "lras/critic_logp": -1.0977452527910458,
274
+ "lras/eos_logp_mean": -1.5375359531419235,
275
+ "lras/eos_logratio_mean": -0.02415443646314088,
276
+ "lras/len_signal_mean": -0.09676948979840745,
277
+ "lras/policy_logp": -1.048075962383212,
278
+ "lras/signal_mean": -0.04966929181947091,
279
+ "lras/signal_std": 0.7021123599261045,
280
+ "step": 180
281
+ },
282
+ {
283
+ "epoch": 0.500329163923634,
284
+ "grad_norm": 5.9375,
285
+ "learning_rate": 1.868312747012715e-06,
286
+ "loss": 0.1145,
287
+ "lras/base_loss": 0.1299875703494763,
288
+ "lras/critic_logp": -1.1543702971034613,
289
+ "lras/eos_logp_mean": -1.7598466821829788,
290
+ "lras/eos_logratio_mean": -0.09233958793920465,
291
+ "lras/len_signal_mean": -0.1299875703494763,
292
+ "lras/policy_logp": -1.089421026425288,
293
+ "lras/signal_mean": -0.06494927172706769,
294
+ "lras/signal_std": 0.7361256897449493,
295
+ "step": 190
296
+ },
297
+ {
298
+ "epoch": 0.5266622778143516,
299
+ "grad_norm": 6.65625,
300
+ "learning_rate": 1.844623121722465e-06,
301
+ "loss": 0.1085,
302
+ "lras/base_loss": 0.10210197179476381,
303
+ "lras/critic_logp": -1.166679017353766,
304
+ "lras/eos_logp_mean": -2.256354816397652,
305
+ "lras/eos_logratio_mean": 0.06535769480396994,
306
+ "lras/len_signal_mean": -0.10210197179476381,
307
+ "lras/policy_logp": -1.1016303254100095,
308
+ "lras/signal_mean": -0.06504870085186205,
309
+ "lras/signal_std": 0.700895220413804,
310
+ "step": 200
311
+ },
312
+ {
313
+ "epoch": 0.5529953917050692,
314
+ "grad_norm": 6.625,
315
+ "learning_rate": 1.8191520442889917e-06,
316
+ "loss": 0.0873,
317
+ "lras/base_loss": 0.0839455575478496,
318
+ "lras/critic_logp": -1.16306316395247,
319
+ "lras/eos_logp_mean": -2.341231356584467,
320
+ "lras/eos_logratio_mean": -0.05382752762234304,
321
+ "lras/len_signal_mean": -0.0839455575478496,
322
+ "lras/policy_logp": -1.1152066120170876,
323
+ "lras/signal_mean": -0.047856557965688676,
324
+ "lras/signal_std": 0.714643185120076,
325
+ "step": 210
326
+ },
327
+ {
328
+ "epoch": 0.5793285055957867,
329
+ "grad_norm": 6.34375,
330
+ "learning_rate": 1.7919532374962413e-06,
331
+ "loss": 0.1069,
332
+ "lras/base_loss": 0.10131343678513076,
333
+ "lras/critic_logp": -1.1800464059677862,
334
+ "lras/eos_logp_mean": -2.5432689307490364,
335
+ "lras/eos_logratio_mean": -0.1375842320965603,
336
+ "lras/len_signal_mean": -0.10131343678513076,
337
+ "lras/policy_logp": -1.1335452714123295,
338
+ "lras/signal_mean": -0.04650113194303708,
339
+ "lras/signal_std": 0.7621932537294924,
340
+ "step": 220
341
+ },
342
+ {
343
+ "epoch": 0.6056616194865043,
344
+ "grad_norm": 5.375,
345
+ "learning_rate": 1.7630840681998065e-06,
346
+ "loss": 0.0999,
347
+ "lras/base_loss": 0.11961322827119147,
348
+ "lras/critic_logp": -1.2542716302399035,
349
+ "lras/eos_logp_mean": -3.1082507333368996,
350
+ "lras/eos_logratio_mean": -0.18245783513411878,
351
+ "lras/len_signal_mean": -0.11961322827119147,
352
+ "lras/policy_logp": -1.1768926516372546,
353
+ "lras/signal_mean": -0.07737897220920904,
354
+ "lras/signal_std": 0.7640609228983521,
355
+ "step": 230
356
+ },
357
+ {
358
+ "epoch": 0.6319947333772219,
359
+ "grad_norm": 5.375,
360
+ "learning_rate": 1.7326054263305844e-06,
361
+ "loss": 0.0977,
362
+ "lras/base_loss": 0.14090983905480242,
363
+ "lras/critic_logp": -1.3281353382264833,
364
+ "lras/eos_logp_mean": -3.7832687627058474,
365
+ "lras/eos_logratio_mean": -0.2914778530233889,
366
+ "lras/len_signal_mean": -0.14090983905480242,
367
+ "lras/policy_logp": -1.245892498020099,
368
+ "lras/signal_mean": -0.08224283723857301,
369
+ "lras/signal_std": 0.8179677240550518,
370
+ "step": 240
371
+ },
372
+ {
373
+ "epoch": 0.6583278472679395,
374
+ "grad_norm": 5.8125,
375
+ "learning_rate": 1.7005815964676785e-06,
376
+ "loss": 0.0713,
377
+ "lras/base_loss": 0.09521725961567426,
378
+ "lras/critic_logp": -1.3763090826428979,
379
+ "lras/eos_logp_mean": -4.498324389662594,
380
+ "lras/eos_logratio_mean": -0.13128667979035527,
381
+ "lras/len_signal_mean": -0.09521725961567426,
382
+ "lras/policy_logp": -1.3112279033000536,
383
+ "lras/signal_mean": -0.06508118566841849,
384
+ "lras/signal_std": 0.7666989624034614,
385
+ "step": 250
386
+ },
387
+ {
388
+ "epoch": 0.684660961158657,
389
+ "grad_norm": 5.9375,
390
+ "learning_rate": 1.6670801222514133e-06,
391
+ "loss": 0.0811,
392
+ "lras/base_loss": 0.04594566313608084,
393
+ "lras/critic_logp": -1.4262544499332057,
394
+ "lras/eos_logp_mean": -5.154077760595828,
395
+ "lras/eos_logratio_mean": -0.11717722485773266,
396
+ "lras/len_signal_mean": -0.04594566313608084,
397
+ "lras/policy_logp": -1.3737510004653928,
398
+ "lras/signal_mean": -0.05250344919726765,
399
+ "lras/signal_std": 0.8633918762207031,
400
+ "step": 260
401
+ },
402
+ {
403
+ "epoch": 0.7109940750493746,
404
+ "grad_norm": 7.53125,
405
+ "learning_rate": 1.6321716639224433e-06,
406
+ "loss": 0.0677,
407
+ "lras/base_loss": 0.08778424562478904,
408
+ "lras/critic_logp": -1.3960568453598623,
409
+ "lras/eos_logp_mean": -5.789941209973767,
410
+ "lras/eos_logratio_mean": -0.27142641189275307,
411
+ "lras/len_signal_mean": -0.08778424562478904,
412
+ "lras/policy_logp": -1.333653977581725,
413
+ "lras/signal_mean": -0.06240286751914599,
414
+ "lras/signal_std": 0.8300917990505695,
415
+ "step": 270
416
+ },
417
+ {
418
+ "epoch": 0.7373271889400922,
419
+ "grad_norm": 6.0625,
420
+ "learning_rate": 1.5959298492874288e-06,
421
+ "loss": 0.0679,
422
+ "lras/base_loss": 0.09906132236574194,
423
+ "lras/critic_logp": -1.498338274056929,
424
+ "lras/eos_logp_mean": -5.556122765317559,
425
+ "lras/eos_logratio_mean": -0.3260869638994336,
426
+ "lras/len_signal_mean": -0.09906132236574194,
427
+ "lras/policy_logp": -1.425540412903731,
428
+ "lras/signal_mean": -0.07279786617986106,
429
+ "lras/signal_std": 0.8526875531300903,
430
+ "step": 280
431
+ },
432
+ {
433
+ "epoch": 0.7636603028308098,
434
+ "grad_norm": 6.65625,
435
+ "learning_rate": 1.558431118425614e-06,
436
+ "loss": 0.0328,
437
+ "lras/base_loss": 0.03676887405454181,
438
+ "lras/critic_logp": -1.4990205292569576,
439
+ "lras/eos_logp_mean": -6.733434393815696,
440
+ "lras/eos_logratio_mean": 0.036905642971396445,
441
+ "lras/len_signal_mean": -0.03676887405454181,
442
+ "lras/policy_logp": -1.4447339227398688,
443
+ "lras/signal_mean": -0.05428659613297363,
444
+ "lras/signal_std": 0.912068764027208,
445
+ "step": 290
446
+ },
447
+ {
448
+ "epoch": 0.7899934167215273,
449
+ "grad_norm": 7.125,
450
+ "learning_rate": 1.5197545624638505e-06,
451
+ "loss": 0.0699,
452
+ "lras/base_loss": 0.07409377554431558,
453
+ "lras/critic_logp": -1.5187017017601145,
454
+ "lras/eos_logp_mean": -7.141052421624773,
455
+ "lras/eos_logratio_mean": -0.07092250637360849,
456
+ "lras/len_signal_mean": -0.07409377554431558,
457
+ "lras/policy_logp": -1.4784312940628777,
458
+ "lras/signal_mean": -0.04027039942419637,
459
+ "lras/signal_std": 0.9233064419589937,
460
+ "step": 300
461
+ },
462
+ {
463
+ "epoch": 0.8163265306122449,
464
+ "grad_norm": 8.5625,
465
+ "learning_rate": 1.4799817567601156e-06,
466
+ "loss": 0.0349,
467
+ "lras/base_loss": 0.07394144225254422,
468
+ "lras/critic_logp": -1.569767797320449,
469
+ "lras/eos_logp_mean": -8.250754566118122,
470
+ "lras/eos_logratio_mean": -0.09081489420495928,
471
+ "lras/len_signal_mean": -0.07394144225254422,
472
+ "lras/policy_logp": -1.5029217528392145,
473
+ "lras/signal_mean": -0.0668460627845576,
474
+ "lras/signal_std": 0.9525773910805583,
475
+ "step": 310
476
+ },
477
+ {
478
+ "epoch": 0.8426596445029625,
479
+ "grad_norm": 6.78125,
480
+ "learning_rate": 1.4391965888473703e-06,
481
+ "loss": 0.0505,
482
+ "lras/base_loss": 0.01940623640548438,
483
+ "lras/critic_logp": -1.5741578373848195,
484
+ "lras/eos_logp_mean": -8.720079303951934,
485
+ "lras/eos_logratio_mean": -0.27737888786941767,
486
+ "lras/len_signal_mean": -0.01940623640548438,
487
+ "lras/policy_logp": -1.532193167358912,
488
+ "lras/signal_mean": -0.04196467372728212,
489
+ "lras/signal_std": 0.9471153903752565,
490
+ "step": 320
491
+ },
492
+ {
493
+ "epoch": 0.8689927583936801,
494
+ "grad_norm": 8.0625,
495
+ "learning_rate": 1.39748508150065e-06,
496
+ "loss": 0.0336,
497
+ "lras/base_loss": 0.09266982418484986,
498
+ "lras/critic_logp": -1.6629574067419906,
499
+ "lras/eos_logp_mean": -9.290708424896001,
500
+ "lras/eos_logratio_mean": -0.22771619798149914,
501
+ "lras/len_signal_mean": -0.09266982418484986,
502
+ "lras/policy_logp": -1.5927546886739972,
503
+ "lras/signal_mean": -0.07020271456252122,
504
+ "lras/signal_std": 0.9127426297403872,
505
+ "step": 330
506
+ },
507
+ {
508
+ "epoch": 0.8953258722843976,
509
+ "grad_norm": 9.0625,
510
+ "learning_rate": 1.3549352113005727e-06,
511
+ "loss": -0.0109,
512
+ "lras/base_loss": -0.04638232409124612,
513
+ "lras/critic_logp": -1.6357357667299923,
514
+ "lras/eos_logp_mean": -9.180945594608783,
515
+ "lras/eos_logratio_mean": 0.0466614278499037,
516
+ "lras/len_signal_mean": 0.04638232409124612,
517
+ "lras/policy_logp": -1.6036042739469465,
518
+ "lras/signal_mean": -0.03213150173185709,
519
+ "lras/signal_std": 0.8950126395560801,
520
+ "step": 340
521
+ },
522
+ {
523
+ "epoch": 0.9216589861751152,
524
+ "grad_norm": 10.375,
525
+ "learning_rate": 1.3116367230759414e-06,
526
+ "loss": 0.0409,
527
+ "lras/base_loss": 0.06272783552994951,
528
+ "lras/critic_logp": -1.7237166816719136,
529
+ "lras/eos_logp_mean": -9.778454429842531,
530
+ "lras/eos_logratio_mean": -0.5281537286005914,
531
+ "lras/len_signal_mean": -0.06272783552994951,
532
+ "lras/policy_logp": -1.6541788693096684,
533
+ "lras/signal_mean": -0.06953781113950352,
534
+ "lras/signal_std": 0.9097115381620824,
535
+ "step": 350
536
+ },
537
+ {
538
+ "epoch": 0.9479921000658328,
539
+ "grad_norm": 8.3125,
540
+ "learning_rate": 1.2676809406168133e-06,
541
+ "loss": 0.0195,
542
+ "lras/base_loss": 0.020540355570847168,
543
+ "lras/critic_logp": -1.783623909266041,
544
+ "lras/eos_logp_mean": -10.980530028697103,
545
+ "lras/eos_logratio_mean": -0.20119506297633052,
546
+ "lras/len_signal_mean": -0.020540355570847168,
547
+ "lras/policy_logp": -1.737673381956658,
548
+ "lras/signal_mean": -0.04595051322927215,
549
+ "lras/signal_std": 0.9669136556796729,
550
+ "step": 360
551
+ },
552
+ {
553
+ "epoch": 0.9743252139565504,
554
+ "grad_norm": 10.0,
555
+ "learning_rate": 1.2231605740572765e-06,
556
+ "loss": 0.0101,
557
+ "lras/base_loss": -0.014017862822220195,
558
+ "lras/critic_logp": -1.8771186804407587,
559
+ "lras/eos_logp_mean": -10.722892824187875,
560
+ "lras/eos_logratio_mean": -0.19163870057091117,
561
+ "lras/len_signal_mean": 0.014017862822220195,
562
+ "lras/policy_logp": -1.8190668903678329,
563
+ "lras/signal_mean": -0.058051801457459584,
564
+ "lras/signal_std": 0.9267905389890074,
565
+ "step": 370
566
+ },
567
+ {
568
+ "epoch": 1.0,
569
+ "grad_norm": 10.4375,
570
+ "learning_rate": 1.1781695243341932e-06,
571
+ "loss": 0.0096,
572
+ "lras/base_loss": 0.01787232184328903,
573
+ "lras/critic_logp": -1.8658862981943412,
574
+ "lras/eos_logp_mean": -11.325760680035902,
575
+ "lras/eos_logratio_mean": -0.3614978581093825,
576
+ "lras/len_signal_mean": -0.01787232184328903,
577
+ "lras/policy_logp": -1.7924621608585636,
578
+ "lras/signal_mean": -0.07342414031628625,
579
+ "lras/signal_std": 0.9301170832835711,
580
+ "step": 380
581
+ },
582
+ {
583
+ "epoch": 1.0263331138907177,
584
+ "grad_norm": 10.0625,
585
+ "learning_rate": 1.1328026851343365e-06,
586
+ "loss": -0.0022,
587
+ "lras/base_loss": -0.03552438716578763,
588
+ "lras/critic_logp": -1.9353655989692151,
589
+ "lras/eos_logp_mean": -12.03261490613222,
590
+ "lras/eos_logratio_mean": -0.21478390959091484,
591
+ "lras/len_signal_mean": 0.03552438716578763,
592
+ "lras/policy_logp": -1.8756053114619102,
593
+ "lras/signal_mean": -0.059760286731985225,
594
+ "lras/signal_std": 0.9637264542281627,
595
+ "step": 390
596
+ },
597
+ {
598
+ "epoch": 1.0526662277814351,
599
+ "grad_norm": 11.5,
600
+ "learning_rate": 1.0871557427476583e-06,
601
+ "loss": 0.014,
602
+ "lras/base_loss": 0.020028115014429203,
603
+ "lras/critic_logp": -2.0396122023801113,
604
+ "lras/eos_logp_mean": -12.066804607212543,
605
+ "lras/eos_logratio_mean": -0.6468546989664901,
606
+ "lras/len_signal_mean": -0.020028115014429203,
607
+ "lras/policy_logp": -1.990343654454398,
608
+ "lras/signal_mean": -0.049268564877216176,
609
+ "lras/signal_std": 0.9831893128342927,
610
+ "step": 400
611
+ },
612
+ {
613
+ "epoch": 1.0789993416721528,
614
+ "grad_norm": 12.375,
615
+ "learning_rate": 1.041324974248813e-06,
616
+ "loss": 0.0192,
617
+ "lras/base_loss": 0.03678358557954198,
618
+ "lras/critic_logp": -2.008346917672788,
619
+ "lras/eos_logp_mean": -13.69460350126028,
620
+ "lras/eos_logratio_mean": -0.22998049389570951,
621
+ "lras/len_signal_mean": -0.03678358557954198,
622
+ "lras/policy_logp": -1.9487180784452243,
623
+ "lras/signal_mean": -0.05962882687038085,
624
+ "lras/signal_std": 1.0659304469823838,
625
+ "step": 410
626
+ },
627
+ {
628
+ "epoch": 1.1053324555628703,
629
+ "grad_norm": 10.9375,
630
+ "learning_rate": 9.954070444326292e-07,
631
+ "loss": 0.0331,
632
+ "lras/base_loss": 0.04220464527315926,
633
+ "lras/critic_logp": -2.0565851966669575,
634
+ "lras/eos_logp_mean": -13.397886303812266,
635
+ "lras/eos_logratio_mean": -0.2665590210468508,
636
+ "lras/len_signal_mean": -0.04220464527315926,
637
+ "lras/policy_logp": -2.001194008887519,
638
+ "lras/signal_mean": -0.05539118589257863,
639
+ "lras/signal_std": 1.02963876305148,
640
+ "step": 420
641
+ },
642
+ {
643
+ "epoch": 1.131665569453588,
644
+ "grad_norm": 11.875,
645
+ "learning_rate": 9.49498801931804e-07,
646
+ "loss": 0.0367,
647
+ "lras/base_loss": -0.0027663632179610433,
648
+ "lras/critic_logp": -2.06930486489408,
649
+ "lras/eos_logp_mean": -14.120340882986785,
650
+ "lras/eos_logratio_mean": -0.04804678615182638,
651
+ "lras/len_signal_mean": 0.0027663632179610433,
652
+ "lras/policy_logp": -1.9985212805169184,
653
+ "lras/signal_mean": -0.0707835745968341,
654
+ "lras/signal_std": 0.9479865215718746,
655
+ "step": 430
656
+ },
657
+ {
658
+ "epoch": 1.1579986833443054,
659
+ "grad_norm": 14.6875,
660
+ "learning_rate": 9.036970749468583e-07,
661
+ "loss": 0.0175,
662
+ "lras/base_loss": 0.044095185585319996,
663
+ "lras/critic_logp": -2.247919617028651,
664
+ "lras/eos_logp_mean": -14.632720437645911,
665
+ "lras/eos_logratio_mean": -0.2282877266407013,
666
+ "lras/len_signal_mean": -0.044095185585319996,
667
+ "lras/policy_logp": -2.15015134356286,
668
+ "lras/signal_mean": -0.09776828288686631,
669
+ "lras/signal_std": 1.0318338803946971,
670
+ "step": 440
671
+ },
672
+ {
673
+ "epoch": 1.1843317972350231,
674
+ "grad_norm": 13.4375,
675
+ "learning_rate": 8.580984670191846e-07,
676
+ "loss": 0.0309,
677
+ "lras/base_loss": -0.030644303339067848,
678
+ "lras/critic_logp": -2.279695383551006,
679
+ "lras/eos_logp_mean": -14.45384646076709,
680
+ "lras/eos_logratio_mean": -0.08510959930717946,
681
+ "lras/len_signal_mean": 0.030644303339067848,
682
+ "lras/policy_logp": -2.239907492669972,
683
+ "lras/signal_mean": -0.03978788317634428,
684
+ "lras/signal_std": 1.1209653861820699,
685
+ "step": 450
686
+ },
687
+ {
688
+ "epoch": 1.2106649111257406,
689
+ "grad_norm": 14.125,
690
+ "learning_rate": 8.127991532779401e-07,
691
+ "loss": 0.0096,
692
+ "lras/base_loss": -0.03979152666870504,
693
+ "lras/critic_logp": -2.231798601824813,
694
+ "lras/eos_logp_mean": -15.657092943787575,
695
+ "lras/eos_logratio_mean": 0.09287480898201465,
696
+ "lras/len_signal_mean": 0.03979152666870504,
697
+ "lras/policy_logp": -2.1883991963309577,
698
+ "lras/signal_mean": -0.04339941730130521,
699
+ "lras/signal_std": 1.0946278177201747,
700
+ "step": 460
701
+ },
702
+ {
703
+ "epoch": 1.2369980250164583,
704
+ "grad_norm": 14.375,
705
+ "learning_rate": 7.678946775905323e-07,
706
+ "loss": -0.003,
707
+ "lras/base_loss": 0.08693211713980417,
708
+ "lras/critic_logp": -2.4233703207256694,
709
+ "lras/eos_logp_mean": -15.827413031458855,
710
+ "lras/eos_logratio_mean": -0.17956097405403854,
711
+ "lras/len_signal_mean": -0.08693211713980417,
712
+ "lras/policy_logp": -2.349267315874182,
713
+ "lras/signal_mean": -0.07410297757779284,
714
+ "lras/signal_std": 1.134578407369554,
715
+ "step": 470
716
+ },
717
+ {
718
+ "epoch": 1.2633311389071757,
719
+ "grad_norm": 15.75,
720
+ "learning_rate": 7.234797510445411e-07,
721
+ "loss": -0.0003,
722
+ "lras/base_loss": 0.025725822610547767,
723
+ "lras/critic_logp": -2.434139209148783,
724
+ "lras/eos_logp_mean": -15.722262739762664,
725
+ "lras/eos_logratio_mean": -0.026692338287830353,
726
+ "lras/len_signal_mean": -0.025725822610547767,
727
+ "lras/policy_logp": -2.328056712114324,
728
+ "lras/signal_mean": -0.10608248874414787,
729
+ "lras/signal_std": 1.0992211825214326,
730
+ "step": 480
731
+ },
732
+ {
733
+ "epoch": 1.2896642527978934,
734
+ "grad_norm": 15.3125,
735
+ "learning_rate": 6.79648052186115e-07,
736
+ "loss": 0.0369,
737
+ "lras/base_loss": 0.03026081353018526,
738
+ "lras/critic_logp": -2.110416657504174,
739
+ "lras/eos_logp_mean": -15.254110097885132,
740
+ "lras/eos_logratio_mean": -0.496195587515831,
741
+ "lras/len_signal_mean": -0.03026081353018526,
742
+ "lras/policy_logp": -2.0470811323319777,
743
+ "lras/signal_mean": -0.06333551601408564,
744
+ "lras/signal_std": 0.9533898154273629,
745
+ "step": 490
746
+ },
747
+ {
748
+ "epoch": 1.315997366688611,
749
+ "grad_norm": 15.8125,
750
+ "learning_rate": 6.364920294361699e-07,
751
+ "loss": 0.0575,
752
+ "lras/base_loss": 0.06158552574343048,
753
+ "lras/critic_logp": -2.224405850326711,
754
+ "lras/eos_logp_mean": -15.108362324908375,
755
+ "lras/eos_logratio_mean": -0.7382585784420371,
756
+ "lras/len_signal_mean": -0.06158552574343048,
757
+ "lras/policy_logp": -2.1515553726047054,
758
+ "lras/signal_mean": -0.07285048099603358,
759
+ "lras/signal_std": 0.9770862588658928,
760
+ "step": 500
761
+ },
762
+ {
763
+ "epoch": 1.3423304805793286,
764
+ "grad_norm": 16.75,
765
+ "learning_rate": 5.941027061011303e-07,
766
+ "loss": -0.0174,
767
+ "lras/base_loss": 0.0008343593450263143,
768
+ "lras/critic_logp": -2.4317118576817474,
769
+ "lras/eos_logp_mean": -15.597903436794876,
770
+ "lras/eos_logratio_mean": -0.2150385939516127,
771
+ "lras/len_signal_mean": -0.0008343593450263143,
772
+ "lras/policy_logp": -2.3689128480731942,
773
+ "lras/signal_mean": -0.06279901310225874,
774
+ "lras/signal_std": 1.0831936337985097,
775
+ "step": 510
776
+ },
777
+ {
778
+ "epoch": 1.368663594470046,
779
+ "grad_norm": 15.9375,
780
+ "learning_rate": 5.52569488389472e-07,
781
+ "loss": 0.043,
782
+ "lras/base_loss": 0.058240242666215636,
783
+ "lras/critic_logp": -2.6131641746570837,
784
+ "lras/eos_logp_mean": -16.068558446317912,
785
+ "lras/eos_logratio_mean": -0.2740385436452925,
786
+ "lras/len_signal_mean": -0.058240242666215636,
787
+ "lras/policy_logp": -2.5551394695444665,
788
+ "lras/signal_mean": -0.05802470178027107,
789
+ "lras/signal_std": 1.149809922138229,
790
+ "step": 520
791
+ },
792
+ {
793
+ "epoch": 1.3949967083607637,
794
+ "grad_norm": 17.0,
795
+ "learning_rate": 5.11979976839002e-07,
796
+ "loss": -0.0169,
797
+ "lras/base_loss": -0.0021918164269663976,
798
+ "lras/critic_logp": -2.3649934510763186,
799
+ "lras/eos_logp_mean": -14.996331504732371,
800
+ "lras/eos_logratio_mean": -0.14344595246948302,
801
+ "lras/len_signal_mean": 0.0021918164269663976,
802
+ "lras/policy_logp": -2.330086388513167,
803
+ "lras/signal_mean": -0.0349070573574782,
804
+ "lras/signal_std": 1.0876175165176392,
805
+ "step": 530
806
+ },
807
+ {
808
+ "epoch": 1.4213298222514812,
809
+ "grad_norm": 15.375,
810
+ "learning_rate": 4.724197815525992e-07,
811
+ "loss": 0.0197,
812
+ "lras/base_loss": -0.0468383116327459,
813
+ "lras/critic_logp": -2.53067398066888,
814
+ "lras/eos_logp_mean": -16.535732762515543,
815
+ "lras/eos_logratio_mean": -0.08761630833614617,
816
+ "lras/len_signal_mean": 0.0468383116327459,
817
+ "lras/policy_logp": -2.4693613863555597,
818
+ "lras/signal_mean": -0.06131259949122595,
819
+ "lras/signal_std": 1.1454029347747565,
820
+ "step": 540
821
+ },
822
+ {
823
+ "epoch": 1.4476629361421989,
824
+ "grad_norm": 17.5,
825
+ "learning_rate": 4.3397234163211484e-07,
826
+ "loss": 0.0268,
827
+ "lras/base_loss": 0.04140681747230701,
828
+ "lras/critic_logp": -2.385503157051001,
829
+ "lras/eos_logp_mean": -15.886599569767714,
830
+ "lras/eos_logratio_mean": -0.3488292686641216,
831
+ "lras/len_signal_mean": -0.04140681747230701,
832
+ "lras/policy_logp": -2.322975989256461,
833
+ "lras/signal_mean": -0.06252715005886085,
834
+ "lras/signal_std": 1.037683429988101,
835
+ "step": 550
836
+ },
837
+ {
838
+ "epoch": 1.4739960500329163,
839
+ "grad_norm": 18.0,
840
+ "learning_rate": 3.9671874919128125e-07,
841
+ "loss": 0.0145,
842
+ "lras/base_loss": -0.0025144488725345584,
843
+ "lras/critic_logp": -2.5965433204335,
844
+ "lras/eos_logp_mean": -16.94149838462472,
845
+ "lras/eos_logratio_mean": -0.015818399004638196,
846
+ "lras/len_signal_mean": 0.0025144488725345584,
847
+ "lras/policy_logp": -2.5307474223972894,
848
+ "lras/signal_mean": -0.0657959025496038,
849
+ "lras/signal_std": 1.0758267390541731,
850
+ "step": 560
851
+ },
852
+ {
853
+ "epoch": 1.500329163923634,
854
+ "grad_norm": 15.3125,
855
+ "learning_rate": 3.6073757831881244e-07,
856
+ "loss": 0.0176,
857
+ "lras/base_loss": 0.06200949700141791,
858
+ "lras/critic_logp": -2.392406307298439,
859
+ "lras/eos_logp_mean": -16.300534684956073,
860
+ "lras/eos_logratio_mean": -0.30153655624017117,
861
+ "lras/len_signal_mean": -0.06200949700141791,
862
+ "lras/policy_logp": -2.3042945046805796,
863
+ "lras/signal_mean": -0.08811182339869983,
864
+ "lras/signal_std": 1.0404585162177682,
865
+ "step": 570
866
+ },
867
+ {
868
+ "epoch": 1.5266622778143515,
869
+ "grad_norm": 16.875,
870
+ "learning_rate": 3.261047193524439e-07,
871
+ "loss": -0.0326,
872
+ "lras/base_loss": -0.06760416808247101,
873
+ "lras/critic_logp": -2.6577579113558363,
874
+ "lras/eos_logp_mean": -17.370185589790346,
875
+ "lras/eos_logratio_mean": 0.29878572942689063,
876
+ "lras/len_signal_mean": 0.06760416808247101,
877
+ "lras/policy_logp": -2.6247902024977887,
878
+ "lras/signal_mean": -0.032967715641459255,
879
+ "lras/signal_std": 1.2247103542089461,
880
+ "step": 580
881
+ },
882
+ {
883
+ "epoch": 1.5529953917050692,
884
+ "grad_norm": 15.75,
885
+ "learning_rate": 2.9289321881345254e-07,
886
+ "loss": 0.0452,
887
+ "lras/base_loss": -0.007348847654066048,
888
+ "lras/critic_logp": -2.306881194473115,
889
+ "lras/eos_logp_mean": -16.12271338701248,
890
+ "lras/eos_logratio_mean": -0.11883539147675037,
891
+ "lras/len_signal_mean": 0.007348847654066048,
892
+ "lras/policy_logp": -2.2347532244045207,
893
+ "lras/signal_mean": -0.07212796678582019,
894
+ "lras/signal_std": 0.9801751000806689,
895
+ "step": 590
896
+ },
897
+ {
898
+ "epoch": 1.5793285055957869,
899
+ "grad_norm": 16.625,
900
+ "learning_rate": 2.611731253392636e-07,
901
+ "loss": 0.0156,
902
+ "lras/base_loss": 0.012532747784280217,
903
+ "lras/critic_logp": -2.4853958261376463,
904
+ "lras/eos_logp_mean": -16.995042578876017,
905
+ "lras/eos_logratio_mean": -0.21502913725562395,
906
+ "lras/len_signal_mean": -0.012532747784280217,
907
+ "lras/policy_logp": -2.4150401586631465,
908
+ "lras/signal_mean": -0.07035568429843155,
909
+ "lras/signal_std": 1.0984039671719075,
910
+ "step": 600
911
+ },
912
+ {
913
+ "epoch": 1.6056616194865043,
914
+ "grad_norm": 17.875,
915
+ "learning_rate": 2.310113419391002e-07,
916
+ "loss": 0.0125,
917
+ "lras/base_loss": -0.013646831101505085,
918
+ "lras/critic_logp": -2.456756533377968,
919
+ "lras/eos_logp_mean": -16.79105181824416,
920
+ "lras/eos_logratio_mean": 0.12275656980345957,
921
+ "lras/len_signal_mean": 0.013646831101505085,
922
+ "lras/policy_logp": -2.403605774764165,
923
+ "lras/signal_mean": -0.05315075517672918,
924
+ "lras/signal_std": 1.1066610222682356,
925
+ "step": 610
926
+ },
927
+ {
928
+ "epoch": 1.6319947333772218,
929
+ "grad_norm": 16.25,
930
+ "learning_rate": 2.02471484884291e-07,
931
+ "loss": 0.0119,
932
+ "lras/base_loss": -0.047292615578044206,
933
+ "lras/critic_logp": -2.546518089146452,
934
+ "lras/eos_logp_mean": -17.33827044069767,
935
+ "lras/eos_logratio_mean": 0.05550766550004482,
936
+ "lras/len_signal_mean": 0.047292615578044206,
937
+ "lras/policy_logp": -2.562370175782289,
938
+ "lras/signal_mean": 0.015852110908139077,
939
+ "lras/signal_std": 1.1021088421344758,
940
+ "step": 620
941
+ },
942
+ {
943
+ "epoch": 1.6583278472679395,
944
+ "grad_norm": 15.875,
945
+ "learning_rate": 1.756137495308594e-07,
946
+ "loss": 0.0072,
947
+ "lras/base_loss": -0.003694472834467888,
948
+ "lras/critic_logp": -2.437133218209924,
949
+ "lras/eos_logp_mean": -16.410118286311626,
950
+ "lras/eos_logratio_mean": -0.3761923125013709,
951
+ "lras/len_signal_mean": 0.003694472834467888,
952
+ "lras/policy_logp": -2.3621272154242097,
953
+ "lras/signal_mean": -0.07500599257311638,
954
+ "lras/signal_std": 1.0659800309687852,
955
+ "step": 630
956
+ },
957
+ {
958
+ "epoch": 1.6846609611586572,
959
+ "grad_norm": 17.0,
960
+ "learning_rate": 1.5049478335739883e-07,
961
+ "loss": 0.0084,
962
+ "lras/base_loss": -0.02764880711620208,
963
+ "lras/critic_logp": -2.5047604143513587,
964
+ "lras/eos_logp_mean": -16.866475162468852,
965
+ "lras/eos_logratio_mean": -0.10876648616977036,
966
+ "lras/len_signal_mean": 0.02764880711620208,
967
+ "lras/policy_logp": -2.4860704024467766,
968
+ "lras/signal_mean": -0.018689985024040467,
969
+ "lras/signal_std": 1.1336077319458127,
970
+ "step": 640
971
+ },
972
+ {
973
+ "epoch": 1.7109940750493746,
974
+ "grad_norm": 18.5,
975
+ "learning_rate": 1.2716756648601856e-07,
976
+ "loss": 0.017,
977
+ "lras/base_loss": 0.031482654724823075,
978
+ "lras/critic_logp": -2.4651034242143015,
979
+ "lras/eos_logp_mean": -17.17294084727764,
980
+ "lras/eos_logratio_mean": -0.16243524220772088,
981
+ "lras/len_signal_mean": -0.031482654724823075,
982
+ "lras/policy_logp": -2.384486557203078,
983
+ "lras/signal_mean": -0.08061687196427163,
984
+ "lras/signal_std": 1.076336015574634,
985
+ "step": 650
986
+ },
987
+ {
988
+ "epoch": 1.737327188940092,
989
+ "grad_norm": 18.25,
990
+ "learning_rate": 1.0568129993836039e-07,
991
+ "loss": -0.0159,
992
+ "lras/base_loss": 0.027414782461710273,
993
+ "lras/critic_logp": -2.5933194540201234,
994
+ "lras/eos_logp_mean": -17.062600272521376,
995
+ "lras/eos_logratio_mean": -0.24033235143870116,
996
+ "lras/len_signal_mean": -0.027414782461710273,
997
+ "lras/policy_logp": -2.536566412894524,
998
+ "lras/signal_mean": -0.056753033917510444,
999
+ "lras/signal_std": 1.1109898013994097,
1000
+ "step": 660
1001
+ },
1002
+ {
1003
+ "epoch": 1.7636603028308098,
1004
+ "grad_norm": 17.75,
1005
+ "learning_rate": 8.608130186237328e-08,
1006
+ "loss": 0.0115,
1007
+ "lras/base_loss": -0.03821857803268358,
1008
+ "lras/critic_logp": -2.3734066799687032,
1009
+ "lras/eos_logp_mean": -16.351687154173852,
1010
+ "lras/eos_logratio_mean": 0.0580406597815454,
1011
+ "lras/len_signal_mean": 0.03821857803268358,
1012
+ "lras/policy_logp": -2.314815712059991,
1013
+ "lras/signal_mean": -0.058590953137746295,
1014
+ "lras/signal_std": 1.0151456581428646,
1015
+ "step": 670
1016
+ },
1017
+ {
1018
+ "epoch": 1.7899934167215275,
1019
+ "grad_norm": 19.5,
1020
+ "learning_rate": 6.840891194872111e-08,
1021
+ "loss": 0.002,
1022
+ "lras/base_loss": -0.04417614057601895,
1023
+ "lras/critic_logp": -2.6200193790038826,
1024
+ "lras/eos_logp_mean": -16.932576566934586,
1025
+ "lras/eos_logratio_mean": 0.1384289343841374,
1026
+ "lras/len_signal_mean": 0.04417614057601895,
1027
+ "lras/policy_logp": -2.570919756222518,
1028
+ "lras/signal_mean": -0.049099608114942496,
1029
+ "lras/signal_std": 1.108743331208825,
1030
+ "step": 680
1031
+ },
1032
+ {
1033
+ "epoch": 1.816326530612245,
1034
+ "grad_norm": 16.875,
1035
+ "learning_rate": 5.270140423842606e-08,
1036
+ "loss": -0.0075,
1037
+ "lras/base_loss": -0.04747567040612921,
1038
+ "lras/critic_logp": -2.5634090900972355,
1039
+ "lras/eos_logp_mean": -16.961749491095542,
1040
+ "lras/eos_logratio_mean": 0.2620813576504588,
1041
+ "lras/len_signal_mean": 0.04747567040612921,
1042
+ "lras/policy_logp": -2.502021145417211,
1043
+ "lras/signal_mean": -0.06138794084279452,
1044
+ "lras/signal_std": 1.0915549699217082,
1045
+ "step": 690
1046
+ },
1047
+ {
1048
+ "epoch": 1.8426596445029624,
1049
+ "grad_norm": 16.5,
1050
+ "learning_rate": 3.899190850565115e-08,
1051
+ "loss": 0.0097,
1052
+ "lras/base_loss": 0.05106182043091394,
1053
+ "lras/critic_logp": -2.4543609573990937,
1054
+ "lras/eos_logp_mean": -15.569522052630782,
1055
+ "lras/eos_logratio_mean": -0.332894785143435,
1056
+ "lras/len_signal_mean": -0.05106182043091394,
1057
+ "lras/policy_logp": -2.367433876935615,
1058
+ "lras/signal_mean": -0.08692705947167448,
1059
+ "lras/signal_std": 1.041911705583334,
1060
+ "step": 700
1061
+ },
1062
+ {
1063
+ "epoch": 1.86899275839368,
1064
+ "grad_norm": 17.625,
1065
+ "learning_rate": 2.7309340381436064e-08,
1066
+ "loss": 0.0118,
1067
+ "lras/base_loss": -0.03686453927366529,
1068
+ "lras/critic_logp": -2.454046491647108,
1069
+ "lras/eos_logp_mean": -17.108138289675118,
1070
+ "lras/eos_logratio_mean": -0.02865053452551365,
1071
+ "lras/len_signal_mean": 0.03686453927366529,
1072
+ "lras/policy_logp": -2.413266429991225,
1073
+ "lras/signal_mean": -0.04078007064555185,
1074
+ "lras/signal_std": 1.085499944910407,
1075
+ "step": 710
1076
+ },
1077
+ {
1078
+ "epoch": 1.8953258722843978,
1079
+ "grad_norm": 18.0,
1080
+ "learning_rate": 1.7678340365772203e-08,
1081
+ "loss": 0.0382,
1082
+ "lras/base_loss": 0.0055879024948808365,
1083
+ "lras/critic_logp": -2.502583679419604,
1084
+ "lras/eos_logp_mean": -17.123634773492814,
1085
+ "lras/eos_logratio_mean": -0.4117021427722648,
1086
+ "lras/len_signal_mean": -0.0055879024948808365,
1087
+ "lras/policy_logp": -2.427938670433141,
1088
+ "lras/signal_mean": -0.07464500179640673,
1089
+ "lras/signal_std": 1.0575703646987678,
1090
+ "step": 720
1091
+ },
1092
+ {
1093
+ "epoch": 1.9216589861751152,
1094
+ "grad_norm": 22.0,
1095
+ "learning_rate": 1.011922185664471e-08,
1096
+ "loss": -0.0027,
1097
+ "lras/base_loss": -0.10265924405830447,
1098
+ "lras/critic_logp": -2.6089221828032767,
1099
+ "lras/eos_logp_mean": -16.981125724315643,
1100
+ "lras/eos_logratio_mean": 0.19451139154843985,
1101
+ "lras/len_signal_mean": 0.10265924405830447,
1102
+ "lras/policy_logp": -2.554374224068253,
1103
+ "lras/signal_mean": -0.054547943455510464,
1104
+ "lras/signal_std": 1.1660822635516523,
1105
+ "step": 730
1106
+ },
1107
+ {
1108
+ "epoch": 1.9479921000658327,
1109
+ "grad_norm": 18.0,
1110
+ "learning_rate": 4.647928305662851e-09,
1111
+ "loss": 0.0317,
1112
+ "lras/base_loss": 0.007404178951401263,
1113
+ "lras/critic_logp": -2.518398690186143,
1114
+ "lras/eos_logp_mean": -16.704739168286324,
1115
+ "lras/eos_logratio_mean": -0.32062844494357706,
1116
+ "lras/len_signal_mean": -0.007404178951401263,
1117
+ "lras/policy_logp": -2.4638789154207856,
1118
+ "lras/signal_mean": -0.054519774758635786,
1119
+ "lras/signal_std": 1.034599607810378,
1120
+ "step": 740
1121
+ },
1122
+ {
1123
+ "epoch": 1.9743252139565504,
1124
+ "grad_norm": 17.875,
1125
+ "learning_rate": 1.2759995906392873e-09,
1126
+ "loss": 0.0181,
1127
+ "lras/base_loss": -0.07731338242592756,
1128
+ "lras/critic_logp": -2.6291819973882866,
1129
+ "lras/eos_logp_mean": -17.124461753666402,
1130
+ "lras/eos_logratio_mean": 0.06718716314062476,
1131
+ "lras/len_signal_mean": 0.07731338242592756,
1132
+ "lras/policy_logp": -2.6082319075483857,
1133
+ "lras/signal_mean": -0.02095007741266897,
1134
+ "lras/signal_std": 1.116696286201477,
1135
+ "step": 750
1136
+ },
1137
+ {
1138
+ "epoch": 2.0,
1139
+ "grad_norm": 16.375,
1140
+ "learning_rate": 1.0547676048688892e-11,
1141
+ "loss": 0.039,
1142
+ "lras/base_loss": 0.03092502773954318,
1143
+ "lras/critic_logp": -2.568873341916912,
1144
+ "lras/eos_logp_mean": -16.902431547450714,
1145
+ "lras/eos_logratio_mean": -0.3201881614633096,
1146
+ "lras/len_signal_mean": -0.03092502773954318,
1147
+ "lras/policy_logp": -2.4733478146684633,
1148
+ "lras/signal_mean": -0.0955255111616805,
1149
+ "lras/signal_std": 1.0889918152720501,
1150
+ "step": 760
1151
+ }
1152
+ ],
1153
+ "logging_steps": 10,
1154
+ "max_steps": 760,
1155
+ "num_input_tokens_seen": 0,
1156
+ "num_train_epochs": 2,
1157
+ "save_steps": 100,
1158
+ "stateful_callbacks": {
1159
+ "TrainerControl": {
1160
+ "args": {
1161
+ "should_epoch_stop": false,
1162
+ "should_evaluate": false,
1163
+ "should_log": false,
1164
+ "should_save": true,
1165
+ "should_training_stop": true
1166
+ },
1167
+ "attributes": {}
1168
+ }
1169
+ },
1170
+ "total_flos": 0.0,
1171
+ "train_batch_size": 2,
1172
+ "trial_name": null,
1173
+ "trial_params": null
1174
+ }
checkpoint-760/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58e47af9d5fa16de9ecf04c53919303e289e2d6c960bdb24086714cba764fa05
3
+ size 5496
checkpoint-760/vocab.json ADDED
The diff for this file is too large to render. See raw diff