yujiepan commited on
Commit
2ebfa8d
·
verified ·
1 Parent(s): 940a1ec

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. README.md +9 -0
  2. model.safetensors +2 -2
README.md CHANGED
@@ -158,6 +158,15 @@ config = AutoConfig.from_pretrained(
158
  print(config)
159
  torch.set_default_dtype(torch.bfloat16)
160
  model = Qwen3_5MoeForConditionalGeneration(config)
 
 
 
 
 
 
 
 
 
161
  model.mtp = torch.nn.ModuleDict({
162
  "pre_fc_norm_embedding": torch.nn.RMSNorm(config.text_config.hidden_size),
163
  "fc": torch.nn.Linear(config.text_config.hidden_size * 2, config.text_config.hidden_size, bias=False),
 
158
  print(config)
159
  torch.set_default_dtype(torch.bfloat16)
160
  model = Qwen3_5MoeForConditionalGeneration(config)
161
+ with torch.no_grad():
162
+ for i in range(3):
163
+ attn = model.model.language_model.layers[i].linear_attn
164
+ attn.A_log = torch.nn.Parameter(attn.A_log.float())
165
+ attn.norm.float()
166
+
167
+ print(model.state_dict()['model.language_model.layers.0.linear_attn.A_log'].dtype)
168
+ print(model.state_dict()['model.language_model.layers.0.linear_attn.norm.weight'].dtype)
169
+
170
  model.mtp = torch.nn.ModuleDict({
171
  "pre_fc_norm_embedding": torch.nn.RMSNorm(config.text_config.hidden_size),
172
  "fc": torch.nn.Linear(config.text_config.hidden_size * 2, config.text_config.hidden_size, bias=False),
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a68280ca252dadbb9875aa7455a9def12207fc29aa7bf34f10f578a4e4102cb
3
- size 10057952
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e568307fe7ff224d8af7fca904d2c196fbf8231257434b99ce9b9b01cbc31b0
3
+ size 10058136