Upload folder using huggingface_hub
Browse files- README.md +9 -0
- model.safetensors +2 -2
README.md
CHANGED
|
@@ -158,6 +158,15 @@ config = AutoConfig.from_pretrained(
|
|
| 158 |
print(config)
|
| 159 |
torch.set_default_dtype(torch.bfloat16)
|
| 160 |
model = Qwen3_5MoeForConditionalGeneration(config)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
model.mtp = torch.nn.ModuleDict({
|
| 162 |
"pre_fc_norm_embedding": torch.nn.RMSNorm(config.text_config.hidden_size),
|
| 163 |
"fc": torch.nn.Linear(config.text_config.hidden_size * 2, config.text_config.hidden_size, bias=False),
|
|
|
|
| 158 |
print(config)
|
| 159 |
torch.set_default_dtype(torch.bfloat16)
|
| 160 |
model = Qwen3_5MoeForConditionalGeneration(config)
|
| 161 |
+
with torch.no_grad():
|
| 162 |
+
for i in range(3):
|
| 163 |
+
attn = model.model.language_model.layers[i].linear_attn
|
| 164 |
+
attn.A_log = torch.nn.Parameter(attn.A_log.float())
|
| 165 |
+
attn.norm.float()
|
| 166 |
+
|
| 167 |
+
print(model.state_dict()['model.language_model.layers.0.linear_attn.A_log'].dtype)
|
| 168 |
+
print(model.state_dict()['model.language_model.layers.0.linear_attn.norm.weight'].dtype)
|
| 169 |
+
|
| 170 |
model.mtp = torch.nn.ModuleDict({
|
| 171 |
"pre_fc_norm_embedding": torch.nn.RMSNorm(config.text_config.hidden_size),
|
| 172 |
"fc": torch.nn.Linear(config.text_config.hidden_size * 2, config.text_config.hidden_size, bias=False),
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4e568307fe7ff224d8af7fca904d2c196fbf8231257434b99ce9b9b01cbc31b0
|
| 3 |
+
size 10058136
|