aspctu
/

t12

Model card Files Files and versions

t12 / lib.linux-x86_64-cpython-310 /tensorrt_llm /models /grok /weight.py

aspctu's picture

Upload folder using huggingface_hub

5000658 verified over 1 year ago

history blame contribute delete

2.09 kB

	# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
	# SPDX-License-Identifier: Apache-2.0
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	from typing import Union

	import numpy as np
	import torch


	def gen_suffix(rank, use_smooth_quant, quant_per_channel):
	suffix = f"{rank}.bin"
	if use_smooth_quant:
	sq_prefix = "int8."
	if quant_per_channel:
	sq_prefix += "col."
	suffix = sq_prefix + suffix
	return suffix


	def extract_layer_idx(name):
	ss = name.split('.')
	for s in ss:
	if s.isdigit():
	return s
	return None


	def split(v: Union[np.ndarray, torch.Tensor],
	tp_size: int,
	tp_rank: int,
	dim=0):
	if tp_size == 1:
	return v
	assert len(v.shape) > 1 or dim == 0
	if isinstance(v, np.ndarray):
	return np.ascontiguousarray(
	np.split(v, tp_size, axis=dim)[tp_rank].copy())
	else:
	assert v.shape[dim] % tp_size == 0, \
	'Unable to split: shape={v.shape} (dim={dim}) tp_size={tp_size}.'
	split_size = v.shape[dim] // tp_size
	return v.split(split_size, dim=dim)[tp_rank].clone().detach()


	def dup_kv_weight(v, num_head, tp_size):
	assert tp_size % num_head == 0
	reps = tp_size // num_head
	head_size = v.shape[0] // num_head
	v = v.reshape(num_head, head_size,
	-1)[:, None, :, :].expand(num_head, reps, head_size,
	v.shape[1])
	return v.reshape(num_head * reps * head_size, -1).clone().detach()