msj19's picture
Add files using upload-large-folder tool
c39435c verified
import torch
from safetensors import safe_open
from safetensors.torch import save_file
# # 1. 读取原始文件中的所有参数
head = 8
r= 4
original_tensors = {}
# path = '/mnt/jfzn/msj/train_exp/mask_gdn_1B_hrr4_byt'
# '/mnt/jfzn/msj/train_exp/gdn_1B_a800'
# with safe_open("/mnt/jfzn/msj/train_exp/gdn_1B_a800/model.safetensors", framework="pt", device="cpu") as f:
# for key in f.keys():
# original_tensors[key] = f.get_tensor(key)
# import torch.nn.init as init
# import math
# for i in range(24):
# new_tensor = torch.randn([128,2048],dtype=torch.bfloat16)
# init.xavier_uniform_(new_tensor, gain=2 ** -2.5)
# name = 'model.layers.'+str(i)+str('.attn.mask.weight')
# original_tensors[name] = new_tensor
# save_file(original_tensors, "/mnt/jfzn/msj/train_exp/gdn_1B_a800_cp/model.safetensors")
with safe_open("/mnt/jfzn/msj/train_exp/mask_gdn_hrr4/model.safetensors", framework="pt", device="cpu") as f:
for key in f.keys():
original_tensors[key] = f.get_tensor(key)
if 'mask' in key:
if '19' in key:
print(key)
print(original_tensors[key].shape)
# w = (original_tensors[key]).cuda()
# x = torch.randn([1,2048],dtype=torch.bfloat16).cuda()
# from einops import rearrange, repeat
# from fla.modules.l2norm import l2_norm as l2_norm_fn
# target_matrix = (x@w.transpose(0,1)).abs()
# target_matrix = rearrange(target_matrix,'l (h r c)->h l r c',r=r,h=8)#bhlrr
# target_matrix = l2_norm_fn(target_matrix)
# target_matrix = target_matrix@target_matrix.transpose(-1,-2)
# print(target_matrix)