Curt-Park commited on
Commit ·
966f777
1
Parent(s): 6ed5945
Add fastertransformer model
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- Makefile +7 -0
- huggingface_gptj_ckpt_convert.py +189 -0
- model_repository/codegen-350M-mono-gptj/1/.tmp +0 -0
- model_repository/codegen-350M-mono-gptj/1/config.ini +11 -0
- model_repository/codegen-350M-mono-gptj/1/model.final_layernorm.bias.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.final_layernorm.weight.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.0.attention.dense.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.0.attention.query_key_value.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.0.input_layernorm.bias.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.0.input_layernorm.weight.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.0.mlp.dense_4h_to_h.bias.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.0.mlp.dense_4h_to_h.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.0.mlp.dense_h_to_4h.bias.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.0.mlp.dense_h_to_4h.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.1.attention.dense.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.1.attention.query_key_value.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.1.input_layernorm.bias.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.1.input_layernorm.weight.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.1.mlp.dense_4h_to_h.bias.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.1.mlp.dense_4h_to_h.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.1.mlp.dense_h_to_4h.bias.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.1.mlp.dense_h_to_4h.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.10.attention.dense.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.10.attention.query_key_value.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.10.input_layernorm.bias.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.10.input_layernorm.weight.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.10.mlp.dense_4h_to_h.bias.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.10.mlp.dense_4h_to_h.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.10.mlp.dense_h_to_4h.bias.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.10.mlp.dense_h_to_4h.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.11.attention.dense.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.11.attention.query_key_value.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.11.input_layernorm.bias.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.11.input_layernorm.weight.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.11.mlp.dense_4h_to_h.bias.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.11.mlp.dense_4h_to_h.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.11.mlp.dense_h_to_4h.bias.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.11.mlp.dense_h_to_4h.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.12.attention.dense.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.12.attention.query_key_value.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.12.input_layernorm.bias.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.12.input_layernorm.weight.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.12.mlp.dense_4h_to_h.bias.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.12.mlp.dense_4h_to_h.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.12.mlp.dense_h_to_4h.bias.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.12.mlp.dense_h_to_4h.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.13.attention.dense.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.13.attention.query_key_value.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.13.input_layernorm.bias.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.13.input_layernorm.weight.bin +3 -0
Makefile
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
TRITON_CONTAINER_NAME=registry.gitlab.com/curt-park/tritonserver-ft
|
| 2 |
+
TRITON_VERSION=22.12
|
| 3 |
+
|
| 4 |
+
triton:
|
| 5 |
+
docker run --gpus "device=0" --shm-size=4G --rm \
|
| 6 |
+
-p 8000:8000 -p 8001:8001 -p 8002:8002 -v $(PWD)/model_repository:/models \
|
| 7 |
+
$(TRITON_CONTAINER_NAME):$(TRITON_VERSION) tritonserver --model-repository=/models
|
huggingface_gptj_ckpt_convert.py
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Huggingface model coverter to FastTransformer.
|
| 2 |
+
|
| 3 |
+
Reference:
|
| 4 |
+
https://github.com/NVIDIA/FasterTransformer/tree/main/examples/pytorch/gptj/utils
|
| 5 |
+
"""
|
| 6 |
+
import configparser
|
| 7 |
+
from argparse import ArgumentParser
|
| 8 |
+
from os import makedirs
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
|
| 11 |
+
import numpy as np
|
| 12 |
+
import torch
|
| 13 |
+
from transformers import PretrainedConfig
|
| 14 |
+
|
| 15 |
+
torch.set_printoptions(linewidth=130, sci_mode=False)
|
| 16 |
+
np.set_printoptions(linewidth=130, suppress=True)
|
| 17 |
+
|
| 18 |
+
# This converter is used to convert the huggingface moyix/codegen-350M-mono-gptj model.
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def savebin(param, save_path):
|
| 22 |
+
if isinstance(param, torch.Tensor):
|
| 23 |
+
param = param.cpu().float().numpy()
|
| 24 |
+
np.squeeze(param).astype(np.float32).tofile(save_path + ".bin")
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def param2file(pt_param, layer_id, save_dir, dest_key):
|
| 28 |
+
base_n = save_dir + "/model.layers." + str(layer_id) + "."
|
| 29 |
+
save_path = base_n + dest_key
|
| 30 |
+
savebin(pt_param, save_path)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def param2distributed(
|
| 34 |
+
pt_param,
|
| 35 |
+
layer_id,
|
| 36 |
+
save_dir,
|
| 37 |
+
dest_key,
|
| 38 |
+
n_inference_gpus,
|
| 39 |
+
split_axis,
|
| 40 |
+
):
|
| 41 |
+
np_param = pt_param.cpu().float().numpy()
|
| 42 |
+
base_n = save_dir + "/model.layers." + str(layer_id) + "."
|
| 43 |
+
save_path = base_n + dest_key
|
| 44 |
+
split_param = np.split(np_param, n_inference_gpus, axis=split_axis)
|
| 45 |
+
for i, p in enumerate(split_param):
|
| 46 |
+
savebin(p, save_path + f".{i}")
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def save(w, save_dir, n_inference_gpus, n_layers, layer_id):
|
| 50 |
+
makedirs(save_dir, exist_ok=True)
|
| 51 |
+
|
| 52 |
+
savebin(w["transformer.wte.weight"], save_dir + "/model.wte")
|
| 53 |
+
l = layer_id
|
| 54 |
+
print(f"Saving layer {l + 1} / {n_layers}")
|
| 55 |
+
base_k = "transformer.h." + str(l) + "."
|
| 56 |
+
param2file(w[base_k + "ln_1.bias"], l, save_dir, "input_layernorm.bias")
|
| 57 |
+
param2file(w[base_k + "ln_1.weight"], l, save_dir, "input_layernorm.weight")
|
| 58 |
+
param2distributed(
|
| 59 |
+
w[base_k + "mlp.fc_in.weight"].T,
|
| 60 |
+
l,
|
| 61 |
+
save_dir,
|
| 62 |
+
"mlp.dense_h_to_4h.weight",
|
| 63 |
+
n_inference_gpus,
|
| 64 |
+
split_axis=-1, # split fast indx
|
| 65 |
+
)
|
| 66 |
+
param2distributed(
|
| 67 |
+
w[base_k + "mlp.fc_in.bias"],
|
| 68 |
+
l,
|
| 69 |
+
save_dir,
|
| 70 |
+
"mlp.dense_h_to_4h.bias",
|
| 71 |
+
n_inference_gpus,
|
| 72 |
+
split_axis=-1, # split fast indx
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
param2distributed(
|
| 76 |
+
w[base_k + "mlp.fc_out.weight"].T,
|
| 77 |
+
l,
|
| 78 |
+
save_dir,
|
| 79 |
+
"mlp.dense_4h_to_h.weight",
|
| 80 |
+
n_inference_gpus,
|
| 81 |
+
split_axis=0, # split slow indx
|
| 82 |
+
)
|
| 83 |
+
param2file(w[base_k + "mlp.fc_out.bias"], l, save_dir, "mlp.dense_4h_to_h.bias")
|
| 84 |
+
param2distributed(
|
| 85 |
+
w[base_k + "attn.out_proj.weight"].T,
|
| 86 |
+
l,
|
| 87 |
+
save_dir,
|
| 88 |
+
"attention.dense.weight",
|
| 89 |
+
n_inference_gpus,
|
| 90 |
+
split_axis=0, # split slow indx
|
| 91 |
+
)
|
| 92 |
+
QKV_w = torch.stack(
|
| 93 |
+
[
|
| 94 |
+
w[base_k + "attn.q_proj.weight"],
|
| 95 |
+
w[base_k + "attn.k_proj.weight"],
|
| 96 |
+
w[base_k + "attn.v_proj.weight"],
|
| 97 |
+
]
|
| 98 |
+
) # [qkv, n_heads * dim_head, latent_space]
|
| 99 |
+
QKV_w = QKV_w.permute(2, 0, 1)
|
| 100 |
+
param2distributed(
|
| 101 |
+
QKV_w,
|
| 102 |
+
l,
|
| 103 |
+
save_dir,
|
| 104 |
+
"attention.query_key_value.weight",
|
| 105 |
+
n_inference_gpus,
|
| 106 |
+
split_axis=-1, # split fast indx
|
| 107 |
+
)
|
| 108 |
+
# Other unneeded per-layer params:
|
| 109 |
+
# attn.attention.masked_bias = torch.tensor(-1e9)
|
| 110 |
+
# attn.attention.bias = torch.tril(torch.ones(1, 1, 2048, 2048))
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
if __name__ == "__main__":
|
| 114 |
+
parser = ArgumentParser(
|
| 115 |
+
description="Convert GPT-J slim checkpoint to FasterTransformer",
|
| 116 |
+
)
|
| 117 |
+
parser.add_argument(
|
| 118 |
+
"--output-dir",
|
| 119 |
+
help="Folder where binary files are stored",
|
| 120 |
+
default="c-models/",
|
| 121 |
+
)
|
| 122 |
+
parser.add_argument(
|
| 123 |
+
"--ckpt-dir",
|
| 124 |
+
help="File of GPT-J huggingface checkpoint",
|
| 125 |
+
default="./"
|
| 126 |
+
)
|
| 127 |
+
parser.add_argument(
|
| 128 |
+
"--n-inference-gpus",
|
| 129 |
+
help="Number of GPUs used for inference runtime",
|
| 130 |
+
default=1,
|
| 131 |
+
type=int,
|
| 132 |
+
)
|
| 133 |
+
parser.add_argument(
|
| 134 |
+
"--n-layers", help="Number of GPT-J decoder layer", default=20, type=int
|
| 135 |
+
)
|
| 136 |
+
args = parser.parse_args()
|
| 137 |
+
|
| 138 |
+
ckpt_file = args.ckpt_dir + "/pytorch_model.bin"
|
| 139 |
+
checkpoint = torch.load(ckpt_file)
|
| 140 |
+
print(f"loading from {ckpt_file}")
|
| 141 |
+
|
| 142 |
+
out_path = args.output_dir
|
| 143 |
+
output_dir = out_path + f"/{args.n_inference_gpus}-gpu/"
|
| 144 |
+
print(f"saving to {output_dir}")
|
| 145 |
+
|
| 146 |
+
config_file = args.ckpt_dir + "/config.json"
|
| 147 |
+
hf_config = PretrainedConfig.from_json_file(config_file).to_dict()
|
| 148 |
+
|
| 149 |
+
# NOTE: save parameters to config files (loaded by triton backends)
|
| 150 |
+
config = configparser.ConfigParser()
|
| 151 |
+
config["gptj"] = {}
|
| 152 |
+
try:
|
| 153 |
+
config["gptj"]["model_name"] = (
|
| 154 |
+
"gptj" if hf_config["_name_or_path"] == "" else hf_config["_name_or_path"]
|
| 155 |
+
)
|
| 156 |
+
config["gptj"]["head_num"] = str(hf_config["n_head"])
|
| 157 |
+
n_embd = hf_config["n_embd"]
|
| 158 |
+
config["gptj"]["size_per_head"] = str(n_embd // hf_config["n_head"])
|
| 159 |
+
config["gptj"]["inter_size"] = str(n_embd * 4)
|
| 160 |
+
config["gptj"]["num_layer"] = str(hf_config["n_layer"])
|
| 161 |
+
rotary_dim = (
|
| 162 |
+
n_embd // hf_config["n_head"]
|
| 163 |
+
if hf_config["rotary_dim"] is None
|
| 164 |
+
else hf_config["rotary_dim"]
|
| 165 |
+
)
|
| 166 |
+
config["gptj"]["rotary_embedding"] = str(hf_config["rotary_dim"])
|
| 167 |
+
config["gptj"]["vocab_size"] = str(hf_config["vocab_size"])
|
| 168 |
+
config["gptj"]["start_id"] = str(hf_config["bos_token_id"])
|
| 169 |
+
config["gptj"]["end_id"] = str(hf_config["eos_token_id"])
|
| 170 |
+
config["gptj"]["weight_data_type"] = "fp32"
|
| 171 |
+
Path(output_dir).mkdir(exist_ok=True, parents=True)
|
| 172 |
+
with open(output_dir + "/config.ini", "w") as configfile:
|
| 173 |
+
config.write(configfile)
|
| 174 |
+
except:
|
| 175 |
+
print(f"Fail to save the config in config.ini.")
|
| 176 |
+
|
| 177 |
+
for i in range(args.n_layers):
|
| 178 |
+
save(checkpoint, output_dir, args.n_inference_gpus, args.n_layers, i)
|
| 179 |
+
savebin(
|
| 180 |
+
checkpoint["transformer.ln_f.weight"],
|
| 181 |
+
output_dir + "/model.final_layernorm.weight",
|
| 182 |
+
)
|
| 183 |
+
savebin(
|
| 184 |
+
checkpoint["transformer.ln_f.bias"], output_dir + "/model.final_layernorm.bias"
|
| 185 |
+
)
|
| 186 |
+
savebin(checkpoint["lm_head.weight"], output_dir + "/model.lm_head.weight")
|
| 187 |
+
savebin(checkpoint["lm_head.bias"], output_dir + "/model.lm_head.bias")
|
| 188 |
+
|
| 189 |
+
print("done")
|
model_repository/codegen-350M-mono-gptj/1/.tmp
ADDED
|
File without changes
|
model_repository/codegen-350M-mono-gptj/1/config.ini
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[gptj]
|
| 2 |
+
model_name = codegen-350M-mono-gptj.modeldir
|
| 3 |
+
head_num = 16
|
| 4 |
+
size_per_head = 64
|
| 5 |
+
inter_size = 4096
|
| 6 |
+
num_layer = 20
|
| 7 |
+
rotary_embedding = 32
|
| 8 |
+
vocab_size = 51200
|
| 9 |
+
start_id = 1
|
| 10 |
+
end_id = 2
|
| 11 |
+
weight_data_type = fp16
|
model_repository/codegen-350M-mono-gptj/1/model.final_layernorm.bias.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6baadc4e864dfa0c53c3341dc67099ed09289698d544e634bacb08486a180528
|
| 3 |
+
size 4096
|
model_repository/codegen-350M-mono-gptj/1/model.final_layernorm.weight.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dcb310b47f71b0c56f1297e99dbad24c9ef9f0a81c9bc18fa29d82f0bcc08e91
|
| 3 |
+
size 4096
|
model_repository/codegen-350M-mono-gptj/1/model.layers.0.attention.dense.weight.0.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4c1dcf7be0863950cc63acd4f2485f7679d117419d17fc38c823674bb33a220e
|
| 3 |
+
size 4194304
|
model_repository/codegen-350M-mono-gptj/1/model.layers.0.attention.query_key_value.weight.0.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6af343be9479b9ffd272765ea7cce456f3f13f772fadd9902d19ea45fc971d1b
|
| 3 |
+
size 12582912
|
model_repository/codegen-350M-mono-gptj/1/model.layers.0.input_layernorm.bias.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f315eb011965fa6db1d263f4544ef6e41c060acf2afe3a4e27a803d2200a7c99
|
| 3 |
+
size 4096
|
model_repository/codegen-350M-mono-gptj/1/model.layers.0.input_layernorm.weight.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2f2ff6f3e68f1134f1d335f9114b3e990088d818527de82979a5c39064bde295
|
| 3 |
+
size 4096
|
model_repository/codegen-350M-mono-gptj/1/model.layers.0.mlp.dense_4h_to_h.bias.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8f219b47cf228f28bb2f1ca91f51fc94f73c740c2177ec195f6124f04f9c62c2
|
| 3 |
+
size 4096
|
model_repository/codegen-350M-mono-gptj/1/model.layers.0.mlp.dense_4h_to_h.weight.0.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3db2a87c94b1199184ae3100aaf7aebd529f7580698f1c5f9d68e354b4ca1462
|
| 3 |
+
size 16777216
|
model_repository/codegen-350M-mono-gptj/1/model.layers.0.mlp.dense_h_to_4h.bias.0.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:22cf998991af5d8a7bc36d08bf7662b367cbe551873819159258fa274e3594f2
|
| 3 |
+
size 16384
|
model_repository/codegen-350M-mono-gptj/1/model.layers.0.mlp.dense_h_to_4h.weight.0.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a102b9c88fc373088053ff00c4941bcccab807596341d9f85f72806041d9a556
|
| 3 |
+
size 16777216
|
model_repository/codegen-350M-mono-gptj/1/model.layers.1.attention.dense.weight.0.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:243c25a80fca60c324fdeac51834ca7b778ff44a2ddca1e850c69d80b317af76
|
| 3 |
+
size 4194304
|
model_repository/codegen-350M-mono-gptj/1/model.layers.1.attention.query_key_value.weight.0.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:47f7cf47b6a572476edcc6301b33aaac926c3c1a28da028a319780208ebdaba6
|
| 3 |
+
size 12582912
|
model_repository/codegen-350M-mono-gptj/1/model.layers.1.input_layernorm.bias.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:13715f7522d1fbc56c6faa84022665c78b8e31a73fab1c405184ae28d7dc7120
|
| 3 |
+
size 4096
|
model_repository/codegen-350M-mono-gptj/1/model.layers.1.input_layernorm.weight.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8def3f2b5036fb8eff90deb32abe452d015c1006855e74696b854c254f7006a9
|
| 3 |
+
size 4096
|
model_repository/codegen-350M-mono-gptj/1/model.layers.1.mlp.dense_4h_to_h.bias.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:202128f16470b7491bee6a1b7f4cc73a38969c9b7092eb0f218acf2dea26a9ab
|
| 3 |
+
size 4096
|
model_repository/codegen-350M-mono-gptj/1/model.layers.1.mlp.dense_4h_to_h.weight.0.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:025555851b1722d58333dfdee090054f84aa58f8debc85796e6bd3352ce8ad3b
|
| 3 |
+
size 16777216
|
model_repository/codegen-350M-mono-gptj/1/model.layers.1.mlp.dense_h_to_4h.bias.0.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1d5e12312267825cf338344300d163a4cb203347f08dd6a0c3dc7158ab072bb9
|
| 3 |
+
size 16384
|
model_repository/codegen-350M-mono-gptj/1/model.layers.1.mlp.dense_h_to_4h.weight.0.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e3bde7b51d3bff88b9caa4488eaa1b0f95fcaf807bf23d6291f355a60f998435
|
| 3 |
+
size 16777216
|
model_repository/codegen-350M-mono-gptj/1/model.layers.10.attention.dense.weight.0.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e9727928fc6c85917f01692245748e9495d3f1f81b1ba7c644cfa69165e4697a
|
| 3 |
+
size 4194304
|
model_repository/codegen-350M-mono-gptj/1/model.layers.10.attention.query_key_value.weight.0.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:869e375c28a90cf5afc071d9a400aada12afdab863d431dd9de89e03a48fb76f
|
| 3 |
+
size 12582912
|
model_repository/codegen-350M-mono-gptj/1/model.layers.10.input_layernorm.bias.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ed4a6ff55b24cf68c72fd5e0166ff92a1e9b3386f8a2594c1bc92a601dcd9c25
|
| 3 |
+
size 4096
|
model_repository/codegen-350M-mono-gptj/1/model.layers.10.input_layernorm.weight.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9f901163e6531520d7fdf85516b2848da3af7795bb4d89b18242d11e9bf5fa92
|
| 3 |
+
size 4096
|
model_repository/codegen-350M-mono-gptj/1/model.layers.10.mlp.dense_4h_to_h.bias.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:90141c941963c3e6f673e23e5f916ced5427f04fbf35c4ddd1a2b65c562d26ba
|
| 3 |
+
size 4096
|
model_repository/codegen-350M-mono-gptj/1/model.layers.10.mlp.dense_4h_to_h.weight.0.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eca2280290060ea1ebcf939255556277600edcd06ed0a193352fa86386ddfccb
|
| 3 |
+
size 16777216
|
model_repository/codegen-350M-mono-gptj/1/model.layers.10.mlp.dense_h_to_4h.bias.0.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:28dcce9330a8980d959f09d1225e3493c6277def4b0e821dcb3f0fb6c6d785f9
|
| 3 |
+
size 16384
|
model_repository/codegen-350M-mono-gptj/1/model.layers.10.mlp.dense_h_to_4h.weight.0.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9f6cc56de1d451c3026fa36389df2a15bfeea8b82e17d35556ac45ccdb1ff41a
|
| 3 |
+
size 16777216
|
model_repository/codegen-350M-mono-gptj/1/model.layers.11.attention.dense.weight.0.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:527ecabc9993548e5ab44d02a9d50ac5438ed8d7cc9877c3bd568282af05a150
|
| 3 |
+
size 4194304
|
model_repository/codegen-350M-mono-gptj/1/model.layers.11.attention.query_key_value.weight.0.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:452f635467e63d403ae8290171d36442999e54194cf99a917cbee6c68c0746da
|
| 3 |
+
size 12582912
|
model_repository/codegen-350M-mono-gptj/1/model.layers.11.input_layernorm.bias.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dc785b33a9ec3058514bfc2961ddb0eeff416aa59abf1a6aaae854c3e389a672
|
| 3 |
+
size 4096
|
model_repository/codegen-350M-mono-gptj/1/model.layers.11.input_layernorm.weight.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:00e30bb025be916ec8c6e7e4a9017f55a10a3dd165062952de80c7d6727fc569
|
| 3 |
+
size 4096
|
model_repository/codegen-350M-mono-gptj/1/model.layers.11.mlp.dense_4h_to_h.bias.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:743698edcab6405ec376a346d9dcea77158557896ebd31bc802b436d2c926b3f
|
| 3 |
+
size 4096
|
model_repository/codegen-350M-mono-gptj/1/model.layers.11.mlp.dense_4h_to_h.weight.0.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b15fd6bdc29d6e873ce030f9218153b260b540b8d8b1784da7c0da35b957d85b
|
| 3 |
+
size 16777216
|
model_repository/codegen-350M-mono-gptj/1/model.layers.11.mlp.dense_h_to_4h.bias.0.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d88855ac27749c80b4e7bf0c56ea865994ba7dbca813ef0d7f2abf03dd1ff7b
|
| 3 |
+
size 16384
|
model_repository/codegen-350M-mono-gptj/1/model.layers.11.mlp.dense_h_to_4h.weight.0.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b7e1da468c678728282689498134678867f8170adc6a5146c01b448b75c0ca2d
|
| 3 |
+
size 16777216
|
model_repository/codegen-350M-mono-gptj/1/model.layers.12.attention.dense.weight.0.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b880954fcff18203fdb5d0e9bcc91ae8cb4353eb8ce653d40e714ed13c0f2416
|
| 3 |
+
size 4194304
|
model_repository/codegen-350M-mono-gptj/1/model.layers.12.attention.query_key_value.weight.0.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eebf29afbd6dc7bca9a6b4c7c908c3c6899d0d0babe38da051593d5fbe4467ea
|
| 3 |
+
size 12582912
|
model_repository/codegen-350M-mono-gptj/1/model.layers.12.input_layernorm.bias.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:da5dabbde26c2a8e26563c77d784fece6aa31c9b0e5d3a7a1bfc9bacb14f3190
|
| 3 |
+
size 4096
|
model_repository/codegen-350M-mono-gptj/1/model.layers.12.input_layernorm.weight.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:94da027e1b7e1904de1be8edadaab75208d0691a3461ed8f85508aaf1f663b1a
|
| 3 |
+
size 4096
|
model_repository/codegen-350M-mono-gptj/1/model.layers.12.mlp.dense_4h_to_h.bias.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0c25103cd0354680d6cef87b2043e39a2d24766b798feec5f2133b9de9e26b4d
|
| 3 |
+
size 4096
|
model_repository/codegen-350M-mono-gptj/1/model.layers.12.mlp.dense_4h_to_h.weight.0.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a08674f5549b01d5e001bff7da0543561af8ac994b7e863f5a8682b9c711137
|
| 3 |
+
size 16777216
|
model_repository/codegen-350M-mono-gptj/1/model.layers.12.mlp.dense_h_to_4h.bias.0.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:099f92ef914830dab645ac9f324c91190ec88b0ef357b1ecfab8385bb05124b0
|
| 3 |
+
size 16384
|
model_repository/codegen-350M-mono-gptj/1/model.layers.12.mlp.dense_h_to_4h.weight.0.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8aa5cf3d95ef48bc8674f006f6254a89de6def30256f3aaa4d639062b975cf34
|
| 3 |
+
size 16777216
|
model_repository/codegen-350M-mono-gptj/1/model.layers.13.attention.dense.weight.0.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dbd19b1dad736b78b633ee056c782f0613a20ed7e5aacc629506863a9055b9b5
|
| 3 |
+
size 4194304
|
model_repository/codegen-350M-mono-gptj/1/model.layers.13.attention.query_key_value.weight.0.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:17c89b990942abbe469ed7b0379631b2a698b18c66e867a5ad277648f8c75eb9
|
| 3 |
+
size 12582912
|
model_repository/codegen-350M-mono-gptj/1/model.layers.13.input_layernorm.bias.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:22dab386db6ff716ac91335582aaf4fe2844e5d657d47a8e3c39154f75188661
|
| 3 |
+
size 4096
|
model_repository/codegen-350M-mono-gptj/1/model.layers.13.input_layernorm.weight.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fe7a64a19a0a0288e56cddfb3eb9496641bc8c64b633ae0f454403f36f7bd989
|
| 3 |
+
size 4096
|