arthu1 commited on
Commit
f1c4860
·
verified ·
1 Parent(s): 8617fd0

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ gguf_temp/tokenizer.json filter=lfs diff=lfs merge=lfs -text
__pycache__/modeling_wind_arc.cpython-314.pyc ADDED
Binary file (5.63 kB). View file
 
config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "architectures": [
3
- "Qwen3ForCasualLM"
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
@@ -44,7 +44,7 @@
44
  ],
45
  "max_position_embeddings": 32768,
46
  "max_window_layers": 28,
47
- "model_type": "qwen3",
48
  "num_attention_heads": 16,
49
  "num_hidden_layers": 28,
50
  "num_key_value_heads": 8,
@@ -59,5 +59,13 @@
59
  "transformers_version": "5.3.0",
60
  "use_cache": false,
61
  "use_sliding_window": false,
62
- "vocab_size": 151936
63
- }
 
 
 
 
 
 
 
 
 
1
  {
2
  "architectures": [
3
+ "WindArcForCausalLM"
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
 
44
  ],
45
  "max_position_embeddings": 32768,
46
  "max_window_layers": 28,
47
+ "model_type": "wind_arc",
48
  "num_attention_heads": 16,
49
  "num_hidden_layers": 28,
50
  "num_key_value_heads": 8,
 
59
  "transformers_version": "5.3.0",
60
  "use_cache": false,
61
  "use_sliding_window": false,
62
+ "vocab_size": 151936,
63
+ "model_name": "Wind Arc 1.6",
64
+ "made_by": "North.ai",
65
+ "version": "1.6",
66
+ "description": "Custom architecture LLM by North.ai. YaRN RoPE + MoE FFN + Hybrid Attention.",
67
+ "auto_map": {
68
+ "AutoConfig": "modeling_wind_arc.WindArcConfig",
69
+ "AutoModelForCausalLM": "modeling_wind_arc.WindArcForCausalLM"
70
+ }
71
+ }
config_qwen3_original.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3ForCasualLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "dtype": "bfloat16",
9
+ "eos_token_id": 151645,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 6144,
15
+ "layer_types": [
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention"
44
+ ],
45
+ "max_position_embeddings": 32768,
46
+ "max_window_layers": 28,
47
+ "model_type": "qwen3",
48
+ "num_attention_heads": 16,
49
+ "num_hidden_layers": 28,
50
+ "num_key_value_heads": 8,
51
+ "pad_token_id": null,
52
+ "rms_norm_eps": 1e-06,
53
+ "rope_parameters": {
54
+ "rope_theta": 1000000,
55
+ "rope_type": "default"
56
+ },
57
+ "sliding_window": null,
58
+ "tie_word_embeddings": true,
59
+ "transformers_version": "5.3.0",
60
+ "use_cache": false,
61
+ "use_sliding_window": false,
62
+ "vocab_size": 151936
63
+ }
gguf_temp/.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
gguf_temp/README.md ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ tags: []
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+ This is the model card of a 🤗 transformers model that has been pushed on the Hub. This model card has been automatically generated.
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
gguf_temp/chat_template.jinja ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0].role == 'system' %}
4
+ {{- messages[0].content + '\n\n' }}
5
+ {%- endif %}
6
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
7
+ {%- for tool in tools %}
8
+ {{- "\n" }}
9
+ {{- tool | tojson }}
10
+ {%- endfor %}
11
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
12
+ {%- else %}
13
+ {%- if messages[0].role == 'system' %}
14
+ {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
15
+ {%- endif %}
16
+ {%- endif %}
17
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
18
+ {%- for message in messages[::-1] %}
19
+ {%- set index = (messages|length - 1) - loop.index0 %}
20
+ {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
21
+ {%- set ns.multi_step_tool = false %}
22
+ {%- set ns.last_query_index = index %}
23
+ {%- endif %}
24
+ {%- endfor %}
25
+ {%- for message in messages %}
26
+ {%- if message.content is string %}
27
+ {%- set content = message.content %}
28
+ {%- else %}
29
+ {%- set content = '' %}
30
+ {%- endif %}
31
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
32
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
33
+ {%- elif message.role == "assistant" %}
34
+ {%- set reasoning_content = '' %}
35
+ {%- if message.reasoning_content is string %}
36
+ {%- set reasoning_content = message.reasoning_content %}
37
+ {%- else %}
38
+ {%- if '</think>' in content %}
39
+ {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
40
+ {%- set content = content.split('</think>')[-1].lstrip('\n') %}
41
+ {%- endif %}
42
+ {%- endif %}
43
+ {%- if loop.index0 > ns.last_query_index %}
44
+ {%- if loop.last or (not loop.last and reasoning_content) %}
45
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
46
+ {%- else %}
47
+ {{- '<|im_start|>' + message.role + '\n' + content }}
48
+ {%- endif %}
49
+ {%- else %}
50
+ {{- '<|im_start|>' + message.role + '\n' + content }}
51
+ {%- endif %}
52
+ {%- if message.tool_calls %}
53
+ {%- for tool_call in message.tool_calls %}
54
+ {%- if (loop.first and content) or (not loop.first) %}
55
+ {{- '\n' }}
56
+ {%- endif %}
57
+ {%- if tool_call.function %}
58
+ {%- set tool_call = tool_call.function %}
59
+ {%- endif %}
60
+ {{- '<tool_call>\n{"name": "' }}
61
+ {{- tool_call.name }}
62
+ {{- '", "arguments": ' }}
63
+ {%- if tool_call.arguments is string %}
64
+ {{- tool_call.arguments }}
65
+ {%- else %}
66
+ {{- tool_call.arguments | tojson }}
67
+ {%- endif %}
68
+ {{- '}\n</tool_call>' }}
69
+ {%- endfor %}
70
+ {%- endif %}
71
+ {{- '<|im_end|>\n' }}
72
+ {%- elif message.role == "tool" %}
73
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
74
+ {{- '<|im_start|>user' }}
75
+ {%- endif %}
76
+ {{- '\n<tool_response>\n' }}
77
+ {{- content }}
78
+ {{- '\n</tool_response>' }}
79
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
80
+ {{- '<|im_end|>\n' }}
81
+ {%- endif %}
82
+ {%- endif %}
83
+ {%- endfor %}
84
+ {%- if add_generation_prompt %}
85
+ {{- '<|im_start|>assistant\n' }}
86
+ {%- if enable_thinking is defined and enable_thinking is false %}
87
+ {{- '<think>\n\n</think>\n\n' }}
88
+ {%- endif %}
89
+ {%- endif %}
gguf_temp/config.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen2ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "dtype": "bfloat16",
9
+ "eos_token_id": 151645,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 6144,
15
+ "layer_types": [
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention"
44
+ ],
45
+ "max_position_embeddings": 32768,
46
+ "max_window_layers": 28,
47
+ "model_type": "qwen2",
48
+ "num_attention_heads": 16,
49
+ "num_hidden_layers": 28,
50
+ "num_key_value_heads": 8,
51
+ "pad_token_id": null,
52
+ "rms_norm_eps": 1e-06,
53
+ "rope_parameters": {
54
+ "rope_theta": 1000000,
55
+ "rope_type": "default"
56
+ },
57
+ "sliding_window": null,
58
+ "tie_word_embeddings": true,
59
+ "transformers_version": "5.3.0",
60
+ "use_cache": false,
61
+ "use_sliding_window": false,
62
+ "vocab_size": 151936
63
+ }
gguf_temp/config_qwen3_original.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3ForCasualLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "dtype": "bfloat16",
9
+ "eos_token_id": 151645,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 6144,
15
+ "layer_types": [
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention"
44
+ ],
45
+ "max_position_embeddings": 32768,
46
+ "max_window_layers": 28,
47
+ "model_type": "qwen3",
48
+ "num_attention_heads": 16,
49
+ "num_hidden_layers": 28,
50
+ "num_key_value_heads": 8,
51
+ "pad_token_id": null,
52
+ "rms_norm_eps": 1e-06,
53
+ "rope_parameters": {
54
+ "rope_theta": 1000000,
55
+ "rope_type": "default"
56
+ },
57
+ "sliding_window": null,
58
+ "tie_word_embeddings": true,
59
+ "transformers_version": "5.3.0",
60
+ "use_cache": false,
61
+ "use_sliding_window": false,
62
+ "vocab_size": 151936
63
+ }
gguf_temp/generation_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "temperature": 0.6,
10
+ "top_k": 20,
11
+ "top_p": 0.95,
12
+ "transformers_version": "5.3.0"
13
+ }
gguf_temp/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fc2bbda6885be3642ab710d26d33c1f046140afae8633cb2211faec9028e336
3
+ size 7234911400
gguf_temp/modeling_wind_arc.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Wind Arc 1.6 - Custom Model Class
3
+ North.ai
4
+
5
+ Registers Wind Arc as a proper HuggingFace model type.
6
+ Allows: AutoModelForCausalLM.from_pretrained("arthu1/wind-arc-1-6")
7
+ """
8
+
9
+ import torch
10
+ import torch.nn as nn
11
+ import torch.nn.functional as F
12
+ from transformers import AutoConfig, AutoModelForCausalLM
13
+ from transformers.models.qwen3.modeling_qwen3 import (
14
+ Qwen3ForCausalLM, Qwen3Model, Qwen3DecoderLayer
15
+ )
16
+ from transformers import PretrainedConfig
17
+
18
+
19
+ class WindArcConfig(PretrainedConfig):
20
+ model_type = "wind_arc"
21
+
22
+ def __init__(self, **kwargs):
23
+ super().__init__(**kwargs)
24
+ self.model_name = kwargs.get("model_name", "Wind Arc 1.6")
25
+ self.made_by = kwargs.get("made_by", "North.ai")
26
+
27
+
28
+ class WindArcMoE(nn.Module):
29
+ """
30
+ Wind Arc custom MoE FFN.
31
+ Replaces standard Qwen3 MLP with 4 routed experts + 1 shared expert.
32
+ """
33
+ def __init__(self, config):
34
+ super().__init__()
35
+ D = config.hidden_size
36
+ I = config.intermediate_size // 2
37
+ self.router = nn.Linear(D, 4, bias=False)
38
+ self.experts = nn.ModuleList([
39
+ nn.ModuleDict({
40
+ "gate": nn.Linear(D, I, bias=False),
41
+ "up": nn.Linear(D, I, bias=False),
42
+ "down": nn.Linear(I, D, bias=False),
43
+ }) for _ in range(4)
44
+ ])
45
+ self.shared_gate = nn.Linear(D, I, bias=False)
46
+ self.shared_up = nn.Linear(D, I, bias=False)
47
+ self.shared_down = nn.Linear(I, D, bias=False)
48
+
49
+ def forward(self, x):
50
+ B, L, D = x.shape
51
+ flat = x.reshape(-1, D)
52
+ probs = F.softmax(self.router(flat), dim=-1)
53
+ idx = torch.argmax(probs, dim=-1)
54
+ out = torch.zeros_like(flat)
55
+ for i, expert in enumerate(self.experts):
56
+ mask = (idx == i)
57
+ if mask.any():
58
+ xi = flat[mask]
59
+ out[mask] += expert["down"](
60
+ F.silu(expert["gate"](xi)) * expert["up"](xi)
61
+ ) * probs[mask, i:i+1]
62
+ shared = self.shared_down(
63
+ F.silu(self.shared_gate(flat)) * self.shared_up(flat)
64
+ )
65
+ return (out + shared).reshape(B, L, D)
66
+
67
+
68
+ class WindArcForCausalLM(Qwen3ForCausalLM):
69
+ """
70
+ Wind Arc 1.6 — Custom architecture by North.ai.
71
+ Extends Qwen3 with MoE FFN layers.
72
+ """
73
+ config_class = WindArcConfig
74
+
75
+ def __init__(self, config):
76
+ super().__init__(config)
77
+ # Replace all MLP layers with Wind Arc MoE
78
+ for layer in self.model.layers:
79
+ layer.mlp = WindArcMoE(config)
80
+ self.post_init()
81
+
82
+ @property
83
+ def model_identity(self):
84
+ return {
85
+ "name": "Wind Arc 1.6",
86
+ "made_by": "North.ai",
87
+ "arch": "YaRN RoPE + MoE FFN (4+1 experts) + Hybrid Attention",
88
+ }
89
+
90
+
91
+ # Register with HuggingFace AutoClass
92
+ AutoConfig.register("wind_arc", WindArcConfig)
93
+ AutoModelForCausalLM.register(WindArcConfig, WindArcForCausalLM)
gguf_temp/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506
3
+ size 11422650
gguf_temp/tokenizer_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "backend": "tokenizers",
4
+ "bos_token": null,
5
+ "clean_up_tokenization_spaces": false,
6
+ "eos_token": "<|im_end|>",
7
+ "errors": "replace",
8
+ "extra_special_tokens": [
9
+ "<|im_start|>",
10
+ "<|im_end|>",
11
+ "<|object_ref_start|>",
12
+ "<|object_ref_end|>",
13
+ "<|box_start|>",
14
+ "<|box_end|>",
15
+ "<|quad_start|>",
16
+ "<|quad_end|>",
17
+ "<|vision_start|>",
18
+ "<|vision_end|>",
19
+ "<|vision_pad|>",
20
+ "<|image_pad|>",
21
+ "<|video_pad|>"
22
+ ],
23
+ "is_local": false,
24
+ "model_max_length": 131072,
25
+ "pad_token": "<|im_end|>",
26
+ "split_special_tokens": false,
27
+ "tokenizer_class": "Qwen2Tokenizer",
28
+ "unk_token": null
29
+ }
modeling_wind_arc.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Wind Arc 1.6 - Custom Model Class
3
+ North.ai
4
+
5
+ Registers Wind Arc as a proper HuggingFace model type.
6
+ Allows: AutoModelForCausalLM.from_pretrained("arthu1/wind-arc-1-6")
7
+ """
8
+
9
+ import torch
10
+ import torch.nn as nn
11
+ import torch.nn.functional as F
12
+ from transformers import AutoConfig, AutoModelForCausalLM
13
+ from transformers.models.qwen3.modeling_qwen3 import (
14
+ Qwen3ForCausalLM, Qwen3Model, Qwen3DecoderLayer
15
+ )
16
+ from transformers import PretrainedConfig
17
+
18
+
19
+ class WindArcConfig(PretrainedConfig):
20
+ model_type = "wind_arc"
21
+
22
+ def __init__(self, **kwargs):
23
+ super().__init__(**kwargs)
24
+ self.model_name = kwargs.get("model_name", "Wind Arc 1.6")
25
+ self.made_by = kwargs.get("made_by", "North.ai")
26
+
27
+
28
+ class WindArcMoE(nn.Module):
29
+ """
30
+ Wind Arc custom MoE FFN.
31
+ Replaces standard Qwen3 MLP with 4 routed experts + 1 shared expert.
32
+ """
33
+ def __init__(self, config):
34
+ super().__init__()
35
+ D = config.hidden_size
36
+ I = config.intermediate_size // 2
37
+ self.router = nn.Linear(D, 4, bias=False)
38
+ self.experts = nn.ModuleList([
39
+ nn.ModuleDict({
40
+ "gate": nn.Linear(D, I, bias=False),
41
+ "up": nn.Linear(D, I, bias=False),
42
+ "down": nn.Linear(I, D, bias=False),
43
+ }) for _ in range(4)
44
+ ])
45
+ self.shared_gate = nn.Linear(D, I, bias=False)
46
+ self.shared_up = nn.Linear(D, I, bias=False)
47
+ self.shared_down = nn.Linear(I, D, bias=False)
48
+
49
+ def forward(self, x):
50
+ B, L, D = x.shape
51
+ flat = x.reshape(-1, D)
52
+ probs = F.softmax(self.router(flat), dim=-1)
53
+ idx = torch.argmax(probs, dim=-1)
54
+ out = torch.zeros_like(flat)
55
+ for i, expert in enumerate(self.experts):
56
+ mask = (idx == i)
57
+ if mask.any():
58
+ xi = flat[mask]
59
+ out[mask] += expert["down"](
60
+ F.silu(expert["gate"](xi)) * expert["up"](xi)
61
+ ) * probs[mask, i:i+1]
62
+ shared = self.shared_down(
63
+ F.silu(self.shared_gate(flat)) * self.shared_up(flat)
64
+ )
65
+ return (out + shared).reshape(B, L, D)
66
+
67
+
68
+ class WindArcForCausalLM(Qwen3ForCausalLM):
69
+ """
70
+ Wind Arc 1.6 — Custom architecture by North.ai.
71
+ Extends Qwen3 with MoE FFN layers.
72
+ """
73
+ config_class = WindArcConfig
74
+
75
+ def __init__(self, config):
76
+ super().__init__(config)
77
+ # Replace all MLP layers with Wind Arc MoE
78
+ for layer in self.model.layers:
79
+ layer.mlp = WindArcMoE(config)
80
+ self.post_init()
81
+
82
+ @property
83
+ def model_identity(self):
84
+ return {
85
+ "name": "Wind Arc 1.6",
86
+ "made_by": "North.ai",
87
+ "arch": "YaRN RoPE + MoE FFN (4+1 experts) + Hybrid Attention",
88
+ }
89
+
90
+
91
+ # Register with HuggingFace AutoClass
92
+ AutoConfig.register("wind_arc", WindArcConfig)
93
+ AutoModelForCausalLM.register(WindArcConfig, WindArcForCausalLM)