| | |
| | |
| |
|
| | import sys |
| | import torch |
| | import struct |
| | import numpy as np |
| |
|
| | if len(sys.argv) < 3: |
| | print("Usage: convert-pth-to-ggml.py file-model dir-output [ftype]\n") |
| | print(" ftype == 0 -> float32") |
| | print(" ftype == 1 -> float16") |
| | sys.exit(1) |
| |
|
| | |
| | fname_model = sys.argv[1] |
| | dir_out = sys.argv[2] |
| | fname_out = dir_out + "/ggml-model.bin" |
| |
|
| | |
| | |
| | |
| | |
| | |
| | ftype_str = ["f32", "f16"] |
| |
|
| | ftype = 1 |
| | if len(sys.argv) > 3: |
| | ftype = int(sys.argv[3]) |
| |
|
| | if ftype < 0 or ftype > 1: |
| | print("Invalid ftype: " + str(ftype)) |
| | sys.exit(1) |
| |
|
| | fname_out = fname_out.replace(".bin", "-" + ftype_str[ftype] + ".bin") |
| |
|
| | |
| | n_enc_state = 768 |
| | n_enc_layers = 12 |
| | n_enc_heads = 12 |
| | n_enc_out_chans = 256 |
| | n_pt_embd = 4 |
| |
|
| | model = torch.load(fname_model, map_location="cpu") |
| | for k, v in model.items(): |
| | print(k, v.shape) |
| | if k == "image_encoder.blocks.0.norm1.weight": |
| | n_enc_state = v.shape[0] |
| |
|
| | if n_enc_state == 1024: |
| | n_enc_layers = 24 |
| | n_enc_heads = 16 |
| | elif n_enc_state == 1280: |
| | n_enc_layers = 32 |
| | n_enc_heads = 16 |
| |
|
| | hparams = { |
| | "n_enc_state": n_enc_state, |
| | "n_enc_layers": n_enc_layers, |
| | "n_enc_heads": n_enc_heads, |
| | "n_enc_out_chans": n_enc_out_chans, |
| | "n_pt_embd": n_pt_embd, |
| | } |
| |
|
| | print(hparams) |
| |
|
| | for k, v in model.items(): |
| | print(k, v.shape) |
| |
|
| | |
| | |
| |
|
| | fout = open(fname_out, "wb") |
| |
|
| | fout.write(struct.pack("i", 0x67676d6c)) |
| | fout.write(struct.pack("i", hparams["n_enc_state"])) |
| | fout.write(struct.pack("i", hparams["n_enc_layers"])) |
| | fout.write(struct.pack("i", hparams["n_enc_heads"])) |
| | fout.write(struct.pack("i", hparams["n_enc_out_chans"])) |
| | fout.write(struct.pack("i", hparams["n_pt_embd"])) |
| | fout.write(struct.pack("i", ftype)) |
| |
|
| | for k, v in model.items(): |
| | name = k |
| | shape = v.shape |
| |
|
| | if name[:19] == "prompt_encoder.mask": |
| | continue |
| |
|
| | print("Processing variable: " + name + " with shape: ", shape, " and type: ", v.dtype) |
| |
|
| | |
| | |
| | data = v.numpy() |
| | n_dims = len(data.shape) |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | dshape = data.shape |
| |
|
| | |
| | ftype_cur = 1 |
| | if ftype == 0 or n_dims == 1 or \ |
| | name == "image_encoder.pos_embed" or \ |
| | name.startswith("prompt_encoder") or \ |
| | name.startswith("mask_decoder.iou_token") or \ |
| | name.startswith("mask_decoder.mask_tokens"): |
| | print(" Converting to float32") |
| | data = data.astype(np.float32) |
| | ftype_cur = 0 |
| | else: |
| | print(" Converting to float16") |
| | data = data.astype(np.float16) |
| |
|
| | |
| | |
| | if name == "image_encoder.patch_embed.proj.bias": |
| | data = data.reshape(1, data.shape[0], 1, 1) |
| | n_dims = len(data.shape) |
| | dshape = data.shape |
| |
|
| | print(" New shape: ", dshape) |
| |
|
| | |
| | str = name.encode('utf-8') |
| | fout.write(struct.pack("iii", n_dims, len(str), ftype_cur)) |
| | for i in range(n_dims): |
| | fout.write(struct.pack("i", dshape[n_dims - 1 - i])) |
| | fout.write(str) |
| |
|
| | |
| | data.tofile(fout) |
| |
|
| | fout.close() |
| |
|
| | print("Done. Output file: " + fname_out) |
| | print("") |
| |
|