| """ |
| Wind Arc 1.6 - Custom Model Class |
| North.ai |
| |
| Registers Wind Arc as a proper HuggingFace model type. |
| Allows: AutoModelForCausalLM.from_pretrained("arthu1/wind-arc-1-6") |
| """ |
|
|
| import torch |
| import torch.nn as nn |
| import torch.nn.functional as F |
| from transformers import AutoConfig, AutoModelForCausalLM |
| from transformers.models.qwen3.modeling_qwen3 import ( |
| Qwen3ForCausalLM, Qwen3Model, Qwen3DecoderLayer |
| ) |
| from transformers import PretrainedConfig |
|
|
|
|
| class WindArcConfig(PretrainedConfig): |
| model_type = "wind_arc" |
|
|
| def __init__(self, **kwargs): |
| super().__init__(**kwargs) |
| self.model_name = kwargs.get("model_name", "Wind Arc 1.6") |
| self.made_by = kwargs.get("made_by", "North.ai") |
|
|
|
|
| class WindArcMoE(nn.Module): |
| """ |
| Wind Arc custom MoE FFN. |
| Replaces standard Qwen3 MLP with 4 routed experts + 1 shared expert. |
| """ |
| def __init__(self, config): |
| super().__init__() |
| D = config.hidden_size |
| I = config.intermediate_size // 2 |
| self.router = nn.Linear(D, 4, bias=False) |
| self.experts = nn.ModuleList([ |
| nn.ModuleDict({ |
| "gate": nn.Linear(D, I, bias=False), |
| "up": nn.Linear(D, I, bias=False), |
| "down": nn.Linear(I, D, bias=False), |
| }) for _ in range(4) |
| ]) |
| self.shared_gate = nn.Linear(D, I, bias=False) |
| self.shared_up = nn.Linear(D, I, bias=False) |
| self.shared_down = nn.Linear(I, D, bias=False) |
|
|
| def forward(self, x): |
| B, L, D = x.shape |
| flat = x.reshape(-1, D) |
| probs = F.softmax(self.router(flat), dim=-1) |
| idx = torch.argmax(probs, dim=-1) |
| out = torch.zeros_like(flat) |
| for i, expert in enumerate(self.experts): |
| mask = (idx == i) |
| if mask.any(): |
| xi = flat[mask] |
| out[mask] += expert["down"]( |
| F.silu(expert["gate"](xi)) * expert["up"](xi) |
| ) * probs[mask, i:i+1] |
| shared = self.shared_down( |
| F.silu(self.shared_gate(flat)) * self.shared_up(flat) |
| ) |
| return (out + shared).reshape(B, L, D) |
|
|
|
|
| class WindArcForCausalLM(Qwen3ForCausalLM): |
| """ |
| Wind Arc 1.6 — Custom architecture by North.ai. |
| Extends Qwen3 with MoE FFN layers. |
| """ |
| config_class = WindArcConfig |
|
|
| def __init__(self, config): |
| super().__init__(config) |
| |
| for layer in self.model.layers: |
| layer.mlp = WindArcMoE(config) |
| self.post_init() |
|
|
| @property |
| def model_identity(self): |
| return { |
| "name": "Wind Arc 1.6", |
| "made_by": "North.ai", |
| "arch": "YaRN RoPE + MoE FFN (4+1 experts) + Hybrid Attention", |
| } |
|
|
|
|
| |
| AutoConfig.register("wind_arc", WindArcConfig) |
| AutoModelForCausalLM.register(WindArcConfig, WindArcForCausalLM) |
|
|