Abdohaaland commited on
Commit
9ddf1f7
·
verified ·
1 Parent(s): 2511587

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  Models/25/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  Models/25/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ Models/50/tokenizer.json filter=lfs diff=lfs merge=lfs -text
Models/50/chat_template.jinja ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0].role == 'system' %}
4
+ {{- messages[0].content + '\n\n' }}
5
+ {%- endif %}
6
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
7
+ {%- for tool in tools %}
8
+ {{- "\n" }}
9
+ {{- tool | tojson }}
10
+ {%- endfor %}
11
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
12
+ {%- else %}
13
+ {%- if messages[0].role == 'system' %}
14
+ {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
15
+ {%- endif %}
16
+ {%- endif %}
17
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
18
+ {%- for message in messages[::-1] %}
19
+ {%- set index = (messages|length - 1) - loop.index0 %}
20
+ {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
21
+ {%- set ns.multi_step_tool = false %}
22
+ {%- set ns.last_query_index = index %}
23
+ {%- endif %}
24
+ {%- endfor %}
25
+ {%- for message in messages %}
26
+ {%- if message.content is string %}
27
+ {%- set content = message.content %}
28
+ {%- else %}
29
+ {%- set content = '' %}
30
+ {%- endif %}
31
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
32
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
33
+ {%- elif message.role == "assistant" %}
34
+ {%- set reasoning_content = '' %}
35
+ {%- if message.reasoning_content is string %}
36
+ {%- set reasoning_content = message.reasoning_content %}
37
+ {%- else %}
38
+ {%- if '</think>' in content %}
39
+ {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
40
+ {%- set content = content.split('</think>')[-1].lstrip('\n') %}
41
+ {%- endif %}
42
+ {%- endif %}
43
+ {%- if loop.index0 > ns.last_query_index %}
44
+ {%- if loop.last or (not loop.last and reasoning_content) %}
45
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
46
+ {%- else %}
47
+ {{- '<|im_start|>' + message.role + '\n' + content }}
48
+ {%- endif %}
49
+ {%- else %}
50
+ {{- '<|im_start|>' + message.role + '\n' + content }}
51
+ {%- endif %}
52
+ {%- if message.tool_calls %}
53
+ {%- for tool_call in message.tool_calls %}
54
+ {%- if (loop.first and content) or (not loop.first) %}
55
+ {{- '\n' }}
56
+ {%- endif %}
57
+ {%- if tool_call.function %}
58
+ {%- set tool_call = tool_call.function %}
59
+ {%- endif %}
60
+ {{- '<tool_call>\n{"name": "' }}
61
+ {{- tool_call.name }}
62
+ {{- '", "arguments": ' }}
63
+ {%- if tool_call.arguments is string %}
64
+ {{- tool_call.arguments }}
65
+ {%- else %}
66
+ {{- tool_call.arguments | tojson }}
67
+ {%- endif %}
68
+ {{- '}\n</tool_call>' }}
69
+ {%- endfor %}
70
+ {%- endif %}
71
+ {{- '<|im_end|>\n' }}
72
+ {%- elif message.role == "tool" %}
73
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
74
+ {{- '<|im_start|>user' }}
75
+ {%- endif %}
76
+ {{- '\n<tool_response>\n' }}
77
+ {{- content }}
78
+ {{- '\n</tool_response>' }}
79
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
80
+ {{- '<|im_end|>\n' }}
81
+ {%- endif %}
82
+ {%- endif %}
83
+ {%- endfor %}
84
+ {%- if add_generation_prompt %}
85
+ {{- '<|im_start|>assistant\n' }}
86
+ {%- if enable_thinking is defined and enable_thinking is false %}
87
+ {{- '<think>\n\n</think>\n\n' }}
88
+ {%- endif %}
89
+ {%- endif %}
Models/50/config.json ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "dtype": "bfloat16",
9
+ "eos_token_id": 151645,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2560,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 9728,
15
+ "layer_types": [
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention"
52
+ ],
53
+ "max_position_embeddings": 40960,
54
+ "max_window_layers": 36,
55
+ "model_type": "qwen3",
56
+ "num_attention_heads": 32,
57
+ "num_hidden_layers": 36,
58
+ "num_key_value_heads": 8,
59
+ "pad_token_id": 151643,
60
+ "quantization_config": {
61
+ "bits": 4,
62
+ "checkpoint_format": "gptq",
63
+ "desc_act": false,
64
+ "format": "gptq",
65
+ "group_size": 128,
66
+ "lm_head": false,
67
+ "meta": {
68
+ "act_group_aware": true,
69
+ "auto_forward_data_parallel": true,
70
+ "damp_auto_increment": 0.01,
71
+ "damp_percent": 0.05,
72
+ "failsafe": {
73
+ "smooth": null,
74
+ "strategy": "rtn",
75
+ "threshold": "0.5%"
76
+ },
77
+ "gc_mode": "interval",
78
+ "gptaq": null,
79
+ "hessian": {
80
+ "chunk_bytes": null,
81
+ "chunk_size": null,
82
+ "staging_dtype": "float32"
83
+ },
84
+ "mock_quantization": false,
85
+ "mse": 0.0,
86
+ "offload_to_disk": true,
87
+ "offload_to_disk_path": "./gptqmodel_offload/xkhyhwls-oqfmdfeq/",
88
+ "pack_impl": "cpu",
89
+ "quantizer": [
90
+ "gptqmodel:5.8.0"
91
+ ],
92
+ "static_groups": false,
93
+ "true_sequential": true,
94
+ "uri": "https://github.com/modelcloud/gptqmodel",
95
+ "vram_strategy": "exclusive",
96
+ "wait_for_submodule_finalizers": false
97
+ },
98
+ "pack_dtype": "int32",
99
+ "quant_method": "gptq",
100
+ "sym": true
101
+ },
102
+ "rms_norm_eps": 1e-06,
103
+ "rope_parameters": {
104
+ "rope_theta": 1000000,
105
+ "rope_type": "default"
106
+ },
107
+ "sliding_window": null,
108
+ "tie_word_embeddings": true,
109
+ "transformers_version": "5.2.0",
110
+ "use_cache": true,
111
+ "use_sliding_window": false,
112
+ "vocab_size": 151936
113
+ }
Models/50/generation_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "temperature": 0.6,
9
+ "top_k": 20,
10
+ "top_p": 0.95,
11
+ "transformers_version": "5.2.0"
12
+ }
Models/50/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb9376da4cc21e7b867541b9af962b3a3b7d3376b468569728d95da84dce1699
3
+ size 2669888992
Models/50/quant_log.csv ADDED
@@ -0,0 +1,253 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ layer,module,loss,samples,damp,time
2
+ 0,self_attn.q_proj,0.0000003892,0.05000,4.898
3
+ 0,self_attn.v_proj,0.0000000914,0.05000,4.964
4
+ 0,self_attn.k_proj,0.0000001061,0.05000,4.994
5
+ 0,self_attn.o_proj,0.0000002403,0.05000,1.686
6
+ 0,mlp.gate_proj,0.0000248745,0.05000,2.588
7
+ 0,mlp.up_proj,0.0000215972,0.05000,2.621
8
+ 0,mlp.down_proj,0.0000025529,0.05000,4.167
9
+ 1,self_attn.k_proj,0.0000001765,0.05000,4.454
10
+ 1,self_attn.v_proj,0.0000001794,0.05000,4.541
11
+ 1,self_attn.q_proj,0.0000006520,0.05000,4.572
12
+ 1,self_attn.o_proj,0.0000002912,0.05000,1.417
13
+ 1,mlp.gate_proj,0.0008361811,0.05000,2.121
14
+ 1,mlp.up_proj,0.0003858884,0.05000,2.142
15
+ 1,mlp.down_proj,0.0000039680,0.05000,3.713
16
+ 2,self_attn.v_proj,0.0000004924,0.05000,4.481
17
+ 2,self_attn.k_proj,0.0000005247,0.05000,4.530
18
+ 2,self_attn.q_proj,0.0000018126,0.05000,4.564
19
+ 2,self_attn.o_proj,0.0000004236,0.05000,1.428
20
+ 2,mlp.gate_proj,0.0014728842,0.05000,2.118
21
+ 2,mlp.up_proj,0.0011552544,0.05000,2.136
22
+ 2,mlp.down_proj,0.0000031700,0.05000,3.747
23
+ 3,self_attn.v_proj,0.0000009231,0.05000,4.443
24
+ 3,self_attn.k_proj,0.0000009410,0.05000,4.494
25
+ 3,self_attn.q_proj,0.0000036298,0.05000,4.515
26
+ 3,self_attn.o_proj,0.0000006802,0.05000,1.470
27
+ 3,mlp.up_proj,0.0006774332,0.05000,2.075
28
+ 3,mlp.gate_proj,0.0013199807,0.05000,2.098
29
+ 3,mlp.down_proj,0.0000069405,0.05000,3.733
30
+ 4,self_attn.q_proj,0.0000065567,0.05000,4.445
31
+ 4,self_attn.k_proj,0.0000018016,0.05000,4.494
32
+ 4,self_attn.v_proj,0.0000017432,0.05000,4.519
33
+ 4,self_attn.o_proj,0.0000010358,0.05000,1.419
34
+ 4,mlp.up_proj,0.0004369767,0.05000,2.079
35
+ 4,mlp.gate_proj,0.0011432846,0.05000,2.112
36
+ 4,mlp.down_proj,0.0000079352,0.05000,3.826
37
+ 5,self_attn.v_proj,0.0000019041,0.05000,4.476
38
+ 5,self_attn.k_proj,0.0000018155,0.05000,4.476
39
+ 5,self_attn.q_proj,0.0000070605,0.05000,4.539
40
+ 5,self_attn.o_proj,0.0000018124,0.05000,1.440
41
+ 5,mlp.up_proj,0.0001843741,0.05000,2.105
42
+ 5,mlp.gate_proj,0.0004063439,0.05000,2.131
43
+ 5,mlp.down_proj,0.0000111709,0.05000,3.842
44
+ 6,self_attn.v_proj,0.0000040331,0.05000,4.427
45
+ 6,self_attn.k_proj,0.0000036274,0.05000,4.473
46
+ 6,self_attn.q_proj,0.0000152004,0.05000,4.489
47
+ 6,self_attn.o_proj,0.0000041757,0.05000,1.448
48
+ 6,mlp.gate_proj,0.0004484141,0.05000,2.256
49
+ 6,mlp.up_proj,0.0002336842,0.05000,2.285
50
+ 6,mlp.down_proj,0.0005620144,0.05000,3.773
51
+ 7,self_attn.v_proj,0.0000081437,0.05000,4.465
52
+ 7,self_attn.q_proj,0.0000311665,0.05000,4.572
53
+ 7,self_attn.k_proj,0.0000085449,0.05000,4.573
54
+ 7,self_attn.o_proj,0.0000051851,0.05000,1.443
55
+ 7,mlp.up_proj,0.0002692974,0.05000,2.137
56
+ 7,mlp.gate_proj,0.0004860758,0.05000,2.168
57
+ 7,mlp.down_proj,0.0000193441,0.05000,3.857
58
+ 8,self_attn.v_proj,0.0000124433,0.05000,4.425
59
+ 8,self_attn.q_proj,0.0000461009,0.05000,4.430
60
+ 8,self_attn.k_proj,0.0000120985,0.05000,4.472
61
+ 8,self_attn.o_proj,0.0000070641,0.05000,1.454
62
+ 8,mlp.gate_proj,0.0003251363,0.05000,1.991
63
+ 8,mlp.up_proj,0.0002605974,0.05000,1.988
64
+ 8,mlp.down_proj,0.0000314368,0.05000,3.883
65
+ 9,self_attn.k_proj,0.0000169086,0.05000,4.294
66
+ 9,self_attn.q_proj,0.0000589728,0.05000,4.408
67
+ 9,self_attn.v_proj,0.0000152795,0.05000,4.425
68
+ 9,self_attn.o_proj,0.0000086252,0.05000,1.540
69
+ 9,mlp.gate_proj,0.0005424109,0.05000,2.123
70
+ 9,mlp.up_proj,0.0003351726,0.05000,2.146
71
+ 9,mlp.down_proj,0.0000309148,0.05000,3.826
72
+ 10,self_attn.v_proj,0.0000244346,0.05000,4.451
73
+ 10,self_attn.q_proj,0.0000857773,0.05000,4.490
74
+ 10,self_attn.k_proj,0.0000222903,0.05000,4.521
75
+ 10,self_attn.o_proj,0.0000124682,0.05000,1.459
76
+ 10,mlp.gate_proj,0.0004176983,0.05000,1.985
77
+ 10,mlp.up_proj,0.0002851436,0.05000,2.013
78
+ 10,mlp.down_proj,0.0000257911,0.05000,3.751
79
+ 11,self_attn.q_proj,0.0000414936,0.05000,4.344
80
+ 11,self_attn.v_proj,0.0000111498,0.05000,4.412
81
+ 11,self_attn.k_proj,0.0000114142,0.05000,4.420
82
+ 11,self_attn.o_proj,0.0000084636,0.05000,1.479
83
+ 11,mlp.up_proj,0.0002661766,0.05000,2.072
84
+ 11,mlp.gate_proj,0.0003413026,0.05000,2.094
85
+ 11,mlp.down_proj,0.0000248982,0.05000,3.852
86
+ 12,self_attn.v_proj,0.0000136642,0.05000,4.438
87
+ 12,self_attn.k_proj,0.0000133012,0.05000,4.487
88
+ 12,self_attn.q_proj,0.0000495074,0.05000,4.490
89
+ 12,self_attn.o_proj,0.0000102142,0.05000,1.436
90
+ 12,mlp.gate_proj,0.0003048110,0.05000,2.085
91
+ 12,mlp.up_proj,0.0002618897,0.05000,2.117
92
+ 12,mlp.down_proj,0.0000251344,0.05000,3.762
93
+ 13,self_attn.q_proj,0.0000380912,0.05000,4.432
94
+ 13,self_attn.v_proj,0.0000094012,0.05000,4.432
95
+ 13,self_attn.k_proj,0.0000102629,0.05000,4.455
96
+ 13,self_attn.o_proj,0.0000085788,0.05000,1.441
97
+ 13,mlp.gate_proj,0.0002850975,0.05000,2.084
98
+ 13,mlp.up_proj,0.0002683511,0.05000,2.110
99
+ 13,mlp.down_proj,0.0000268963,0.05000,3.765
100
+ 14,self_attn.q_proj,0.0000627420,0.05000,4.459
101
+ 14,self_attn.k_proj,0.0000162856,0.05000,4.523
102
+ 14,self_attn.v_proj,0.0000165909,0.05000,4.555
103
+ 14,self_attn.o_proj,0.0000132363,0.05000,1.429
104
+ 14,mlp.up_proj,0.0002667329,0.05000,2.120
105
+ 14,mlp.gate_proj,0.0002825767,0.05000,2.155
106
+ 14,mlp.down_proj,0.0000251557,0.05000,3.783
107
+ 15,self_attn.k_proj,0.0000167620,0.05000,4.314
108
+ 15,self_attn.v_proj,0.0000153608,0.05000,4.378
109
+ 15,self_attn.q_proj,0.0000641311,0.05000,4.410
110
+ 15,self_attn.o_proj,0.0000114332,0.05000,1.439
111
+ 15,mlp.up_proj,0.0002600501,0.05000,2.165
112
+ 15,mlp.gate_proj,0.0002651350,0.05000,2.185
113
+ 15,mlp.down_proj,0.0000244735,0.05000,3.813
114
+ 16,self_attn.v_proj,0.0000304125,0.05000,4.336
115
+ 16,self_attn.q_proj,0.0001121167,0.05000,4.425
116
+ 16,self_attn.k_proj,0.0000278084,0.05000,4.450
117
+ 16,self_attn.o_proj,0.0000125121,0.05000,1.439
118
+ 16,mlp.gate_proj,0.0003042360,0.05000,2.117
119
+ 16,mlp.up_proj,0.0002817502,0.05000,2.139
120
+ 16,mlp.down_proj,0.0003102970,0.05000,3.794
121
+ 17,self_attn.k_proj,0.0000216207,0.05000,4.342
122
+ 17,self_attn.q_proj,0.0000981481,0.05000,4.380
123
+ 17,self_attn.v_proj,0.0000235827,0.05000,4.421
124
+ 17,self_attn.o_proj,0.0000135337,0.05000,1.455
125
+ 17,mlp.gate_proj,0.0002716687,0.05000,2.068
126
+ 17,mlp.up_proj,0.0002553188,0.05000,2.086
127
+ 17,mlp.down_proj,0.0000246212,0.05000,3.851
128
+ 18,self_attn.v_proj,0.0000281725,0.05000,4.252
129
+ 18,self_attn.q_proj,0.0001061965,0.05000,4.310
130
+ 18,self_attn.k_proj,0.0000259232,0.05000,4.339
131
+ 18,self_attn.o_proj,0.0000173272,0.05000,1.469
132
+ 18,mlp.up_proj,0.0002783262,0.05000,2.040
133
+ 18,mlp.gate_proj,0.0002907026,0.05000,2.060
134
+ 18,mlp.down_proj,0.0000303231,0.05000,3.794
135
+ 19,self_attn.k_proj,0.0000492216,0.05000,4.381
136
+ 19,self_attn.v_proj,0.0000490726,0.05000,4.433
137
+ 19,self_attn.q_proj,0.0002095862,0.05000,4.480
138
+ 19,self_attn.o_proj,0.0000210620,0.05000,1.460
139
+ 19,mlp.up_proj,0.0003063424,0.05000,1.990
140
+ 19,mlp.gate_proj,0.0003174873,0.05000,2.012
141
+ 19,mlp.down_proj,0.0000387613,0.05000,3.854
142
+ 20,self_attn.k_proj,0.0000430631,0.05000,4.405
143
+ 20,self_attn.q_proj,0.0001980007,0.05000,4.477
144
+ 20,self_attn.v_proj,0.0000455319,0.05000,4.506
145
+ 20,self_attn.o_proj,0.0000226272,0.05000,1.474
146
+ 20,mlp.gate_proj,0.0003353285,0.05000,2.040
147
+ 20,mlp.up_proj,0.0003304923,0.05000,2.068
148
+ 20,mlp.down_proj,0.0000424498,0.05000,3.794
149
+ 21,self_attn.q_proj,0.0002467767,0.05000,4.283
150
+ 21,self_attn.k_proj,0.0000562617,0.05000,4.328
151
+ 21,self_attn.v_proj,0.0000601845,0.05000,4.356
152
+ 21,self_attn.o_proj,0.0000256164,0.05000,1.463
153
+ 21,mlp.up_proj,0.0003876656,0.05000,1.998
154
+ 21,mlp.gate_proj,0.0003971141,0.05000,2.022
155
+ 21,mlp.down_proj,0.0000551685,0.05000,3.871
156
+ 22,self_attn.v_proj,0.0001205271,0.05000,4.378
157
+ 22,self_attn.k_proj,0.0001126014,0.05000,4.436
158
+ 22,self_attn.q_proj,0.0004939758,0.05000,4.459
159
+ 22,self_attn.o_proj,0.0000473019,0.05000,1.485
160
+ 22,mlp.up_proj,0.0004406472,0.05000,2.101
161
+ 22,mlp.gate_proj,0.0004752020,0.05000,2.119
162
+ 22,mlp.down_proj,0.0000884866,0.05000,3.858
163
+ 23,self_attn.q_proj,0.0004799678,0.05000,4.516
164
+ 23,self_attn.v_proj,0.0001249926,0.05000,4.563
165
+ 23,self_attn.k_proj,0.0001114300,0.05000,4.571
166
+ 23,self_attn.o_proj,0.0000583897,0.05000,1.460
167
+ 23,mlp.up_proj,0.0005192255,0.05000,2.057
168
+ 23,mlp.gate_proj,0.0005905081,0.05000,2.082
169
+ 23,mlp.down_proj,0.0001205872,0.05000,3.755
170
+ 24,self_attn.k_proj,0.0001827744,0.05000,4.333
171
+ 24,self_attn.v_proj,0.0002347561,0.05000,4.400
172
+ 24,self_attn.q_proj,0.0007712197,0.05000,4.434
173
+ 24,self_attn.o_proj,0.0000619258,0.05000,1.433
174
+ 24,mlp.gate_proj,0.0006641536,0.05000,2.010
175
+ 24,mlp.up_proj,0.0005745464,0.05000,2.016
176
+ 24,mlp.down_proj,0.0001467679,0.05000,3.859
177
+ 25,self_attn.q_proj,0.0005373499,0.05000,4.433
178
+ 25,self_attn.k_proj,0.0001349687,0.05000,4.473
179
+ 25,self_attn.v_proj,0.0001483015,0.05000,4.499
180
+ 25,self_attn.o_proj,0.0000400881,0.05000,1.459
181
+ 25,mlp.gate_proj,0.0007548458,0.05000,2.121
182
+ 25,mlp.up_proj,0.0006440500,0.05000,2.149
183
+ 25,mlp.down_proj,0.0001719484,0.05000,3.805
184
+ 26,self_attn.k_proj,0.0002035127,0.05000,4.441
185
+ 26,self_attn.q_proj,0.0008599364,0.05000,4.477
186
+ 26,self_attn.v_proj,0.0002357316,0.05000,4.492
187
+ 26,self_attn.o_proj,0.0000370940,0.05000,1.434
188
+ 26,mlp.up_proj,0.0007832906,0.05000,2.080
189
+ 26,mlp.gate_proj,0.0008894922,0.05000,2.103
190
+ 26,mlp.down_proj,0.0002024437,0.05000,3.804
191
+ 27,self_attn.q_proj,0.0010485745,0.05000,4.313
192
+ 27,self_attn.v_proj,0.0003151497,0.05000,4.392
193
+ 27,self_attn.k_proj,0.0002427360,0.05000,4.415
194
+ 27,self_attn.o_proj,0.0000494488,0.05000,1.487
195
+ 27,mlp.gate_proj,0.0009806582,0.05000,2.119
196
+ 27,mlp.up_proj,0.0008931541,0.05000,2.144
197
+ 27,mlp.down_proj,0.0002912259,0.05000,3.804
198
+ 28,self_attn.v_proj,0.0003319008,0.05000,4.377
199
+ 28,self_attn.q_proj,0.0011587325,0.05000,4.435
200
+ 28,self_attn.k_proj,0.0002960693,0.05000,4.458
201
+ 28,self_attn.o_proj,0.0000796063,0.05000,1.483
202
+ 28,mlp.up_proj,0.0010471307,0.05000,2.063
203
+ 28,mlp.gate_proj,0.0011029143,0.05000,2.087
204
+ 28,mlp.down_proj,0.0004399204,0.05000,3.750
205
+ 29,self_attn.q_proj,0.0027975505,0.05000,4.514
206
+ 29,self_attn.v_proj,0.0009092517,0.05000,4.587
207
+ 29,self_attn.k_proj,0.0006667851,0.05000,4.612
208
+ 29,self_attn.o_proj,0.0000638855,0.05000,1.417
209
+ 29,mlp.up_proj,0.0012708087,0.05000,2.009
210
+ 29,mlp.gate_proj,0.0012681824,0.05000,2.030
211
+ 29,mlp.down_proj,0.0004382937,0.05000,3.804
212
+ 30,self_attn.q_proj,0.0030858449,0.05000,4.423
213
+ 30,self_attn.k_proj,0.0007915465,0.05000,4.480
214
+ 30,self_attn.v_proj,0.0009827334,0.05000,4.496
215
+ 30,self_attn.o_proj,0.0001427864,0.05000,1.448
216
+ 30,mlp.gate_proj,0.0013024663,0.05000,2.031
217
+ 30,mlp.up_proj,0.0013509312,0.05000,2.066
218
+ 30,mlp.down_proj,0.0005863545,0.05000,3.799
219
+ 31,self_attn.q_proj,0.0039200108,0.05000,4.377
220
+ 31,self_attn.k_proj,0.0010847513,0.05000,4.434
221
+ 31,self_attn.v_proj,0.0014725985,0.05000,4.466
222
+ 31,self_attn.o_proj,0.0001331001,0.05000,1.455
223
+ 31,mlp.gate_proj,0.0012615698,0.05000,2.082
224
+ 31,mlp.up_proj,0.0013760641,0.05000,2.110
225
+ 31,mlp.down_proj,0.0007124285,0.05000,3.789
226
+ 32,self_attn.k_proj,0.0014174075,0.05000,4.457
227
+ 32,self_attn.q_proj,0.0056738988,0.05000,4.468
228
+ 32,self_attn.v_proj,0.0021258397,0.05000,4.533
229
+ 32,self_attn.o_proj,0.0001640287,0.05000,1.476
230
+ 32,mlp.up_proj,0.0014403572,0.05000,1.993
231
+ 32,mlp.gate_proj,0.0012891649,0.05000,2.017
232
+ 32,mlp.down_proj,0.0008746092,0.05000,3.744
233
+ 33,self_attn.k_proj,0.0026198407,0.05000,4.279
234
+ 33,self_attn.q_proj,0.0125964452,0.05000,4.312
235
+ 33,self_attn.v_proj,0.0049467264,0.05000,4.353
236
+ 33,self_attn.o_proj,0.0002326117,0.05000,1.468
237
+ 33,mlp.gate_proj,0.0014007832,0.05000,2.023
238
+ 33,mlp.up_proj,0.0015872569,0.05000,2.036
239
+ 33,mlp.down_proj,0.0011593697,0.05000,3.832
240
+ 34,self_attn.q_proj,0.0094372779,0.05000,4.304
241
+ 34,self_attn.k_proj,0.0021265713,0.05000,4.424
242
+ 34,self_attn.v_proj,0.0034324985,0.05000,4.441
243
+ 34,self_attn.o_proj,0.0004474311,0.05000,1.478
244
+ 34,mlp.gate_proj,0.0016823788,0.05000,2.039
245
+ 34,mlp.up_proj,0.0017845972,0.05000,2.038
246
+ 34,mlp.down_proj,0.0014755854,0.05000,3.751
247
+ 35,self_attn.v_proj,0.0013864043,0.05000,4.366
248
+ 35,self_attn.k_proj,0.0010910282,0.05000,4.469
249
+ 35,self_attn.q_proj,0.0041450113,0.05000,4.472
250
+ 35,self_attn.o_proj,0.0005922779,0.05000,1.439
251
+ 35,mlp.gate_proj,0.0033476329,0.05000,2.023
252
+ 35,mlp.up_proj,0.0038470728,0.05000,2.051
253
+ 35,mlp.down_proj,0.0026691593,0.05000,3.786
Models/50/quantize_config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bits": 4,
3
+ "group_size": 128,
4
+ "desc_act": false,
5
+ "lm_head": false,
6
+ "quant_method": "gptq",
7
+ "checkpoint_format": "gptq",
8
+ "pack_dtype": "int32",
9
+ "meta": {
10
+ "quantizer": [
11
+ "gptqmodel:5.8.0"
12
+ ],
13
+ "uri": "https://github.com/modelcloud/gptqmodel",
14
+ "damp_percent": 0.05,
15
+ "damp_auto_increment": 0.01,
16
+ "static_groups": false,
17
+ "true_sequential": true,
18
+ "mse": 0.0,
19
+ "gptaq": null,
20
+ "act_group_aware": true,
21
+ "failsafe": {
22
+ "strategy": "rtn",
23
+ "threshold": "0.5%",
24
+ "smooth": null
25
+ },
26
+ "offload_to_disk": true,
27
+ "offload_to_disk_path": "./gptqmodel_offload/xkhyhwls-oqfmdfeq/",
28
+ "pack_impl": "cpu",
29
+ "mock_quantization": false,
30
+ "gc_mode": "interval",
31
+ "wait_for_submodule_finalizers": false,
32
+ "auto_forward_data_parallel": true,
33
+ "hessian": {
34
+ "chunk_size": null,
35
+ "chunk_bytes": null,
36
+ "staging_dtype": "float32"
37
+ },
38
+ "vram_strategy": "exclusive"
39
+ },
40
+ "sym": true,
41
+ "format": "gptq"
42
+ }
Models/50/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7029094cd70eca33e2f5d6837051bd1b63789ebde3c05bcce93b0fb31c094a85
3
+ size 11422928
Models/50/tokenizer_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "backend": "tokenizers",
4
+ "bos_token": null,
5
+ "clean_up_tokenization_spaces": false,
6
+ "eos_token": "<|im_end|>",
7
+ "errors": "replace",
8
+ "extra_special_tokens": [
9
+ "<|im_start|>",
10
+ "<|im_end|>",
11
+ "<|object_ref_start|>",
12
+ "<|object_ref_end|>",
13
+ "<|box_start|>",
14
+ "<|box_end|>",
15
+ "<|quad_start|>",
16
+ "<|quad_end|>",
17
+ "<|vision_start|>",
18
+ "<|vision_end|>",
19
+ "<|vision_pad|>",
20
+ "<|image_pad|>",
21
+ "<|video_pad|>"
22
+ ],
23
+ "is_local": false,
24
+ "model_max_length": 131072,
25
+ "pad_token": "<|endoftext|>",
26
+ "split_special_tokens": false,
27
+ "tokenizer_class": "Qwen2Tokenizer",
28
+ "unk_token": null
29
+ }