wind-arc-1-6-beta / modeling_wind_arc.py
arthu1's picture
Upload folder using huggingface_hub
f1c4860 verified
"""
Wind Arc 1.6 - Custom Model Class
North.ai
Registers Wind Arc as a proper HuggingFace model type.
Allows: AutoModelForCausalLM.from_pretrained("arthu1/wind-arc-1-6")
"""
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import AutoConfig, AutoModelForCausalLM
from transformers.models.qwen3.modeling_qwen3 import (
Qwen3ForCausalLM, Qwen3Model, Qwen3DecoderLayer
)
from transformers import PretrainedConfig
class WindArcConfig(PretrainedConfig):
model_type = "wind_arc"
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.model_name = kwargs.get("model_name", "Wind Arc 1.6")
self.made_by = kwargs.get("made_by", "North.ai")
class WindArcMoE(nn.Module):
"""
Wind Arc custom MoE FFN.
Replaces standard Qwen3 MLP with 4 routed experts + 1 shared expert.
"""
def __init__(self, config):
super().__init__()
D = config.hidden_size
I = config.intermediate_size // 2
self.router = nn.Linear(D, 4, bias=False)
self.experts = nn.ModuleList([
nn.ModuleDict({
"gate": nn.Linear(D, I, bias=False),
"up": nn.Linear(D, I, bias=False),
"down": nn.Linear(I, D, bias=False),
}) for _ in range(4)
])
self.shared_gate = nn.Linear(D, I, bias=False)
self.shared_up = nn.Linear(D, I, bias=False)
self.shared_down = nn.Linear(I, D, bias=False)
def forward(self, x):
B, L, D = x.shape
flat = x.reshape(-1, D)
probs = F.softmax(self.router(flat), dim=-1)
idx = torch.argmax(probs, dim=-1)
out = torch.zeros_like(flat)
for i, expert in enumerate(self.experts):
mask = (idx == i)
if mask.any():
xi = flat[mask]
out[mask] += expert["down"](
F.silu(expert["gate"](xi)) * expert["up"](xi)
) * probs[mask, i:i+1]
shared = self.shared_down(
F.silu(self.shared_gate(flat)) * self.shared_up(flat)
)
return (out + shared).reshape(B, L, D)
class WindArcForCausalLM(Qwen3ForCausalLM):
"""
Wind Arc 1.6 — Custom architecture by North.ai.
Extends Qwen3 with MoE FFN layers.
"""
config_class = WindArcConfig
def __init__(self, config):
super().__init__(config)
# Replace all MLP layers with Wind Arc MoE
for layer in self.model.layers:
layer.mlp = WindArcMoE(config)
self.post_init()
@property
def model_identity(self):
return {
"name": "Wind Arc 1.6",
"made_by": "North.ai",
"arch": "YaRN RoPE + MoE FFN (4+1 experts) + Hybrid Attention",
}
# Register with HuggingFace AutoClass
AutoConfig.register("wind_arc", WindArcConfig)
AutoModelForCausalLM.register(WindArcConfig, WindArcForCausalLM)