File size: 2,519 Bytes
a0994f1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env python3
"""Rename GGUF architecture (and all arch-namespaced KV keys)."""
from __future__ import annotations

import argparse
import sys
from pathlib import Path

from tqdm import tqdm

from gguf import GGUFReader, GGUFValueType, GGUFWriter
from gguf.constants import Keys


def rename(src: Path, dst: Path, old_arch: str, new_arch: str) -> None:
    reader = GGUFReader(src, "r")
    cur = reader.get_field(Keys.General.ARCHITECTURE)
    if cur is None:
        sys.exit(f"no {Keys.General.ARCHITECTURE} in {src}")
    cur_val = str(bytes(cur.parts[cur.data[0]]), encoding="utf-8")
    if cur_val != old_arch:
        sys.exit(f"expected arch={old_arch!r}, found {cur_val!r}")

    writer = GGUFWriter(dst, new_arch, endianess=reader.endianess)

    renamed_keys = 0
    for field in reader.fields.values():
        # GGUFWriter writes general.architecture from the `arch` ctor arg
        if field.name == Keys.General.ARCHITECTURE or field.name.startswith("GGUF."):
            continue

        new_name = field.name
        if field.name.startswith(f"{old_arch}."):
            new_name = f"{new_arch}." + field.name[len(old_arch) + 1 :]
            renamed_keys += 1

        val_type = field.types[0]
        sub_type = field.types[-1] if val_type == GGUFValueType.ARRAY else None
        writer.add_key_value(new_name, field.contents(), val_type, sub_type=sub_type)

    total_bytes = 0
    for tensor in reader.tensors:
        total_bytes += tensor.n_bytes
        writer.add_tensor_info(
            tensor.name,
            tensor.data.shape,
            tensor.data.dtype,
            tensor.data.nbytes,
            tensor.tensor_type,
        )

    print(f"[*] renamed {renamed_keys} {old_arch}.* keys -> {new_arch}.*")
    print(f"[*] writing {dst}")

    bar = tqdm(desc="Writing", total=total_bytes, unit="B", unit_scale=True)
    writer.write_header_to_file()
    writer.write_kv_data_to_file()
    writer.write_ti_data_to_file()
    for tensor in reader.tensors:
        writer.write_tensor_data(tensor.data, tensor_endianess=reader.endianess)
        bar.update(tensor.n_bytes)
    writer.close()
    bar.close()


def main() -> None:
    p = argparse.ArgumentParser()
    p.add_argument("input", type=Path)
    p.add_argument("output", type=Path)
    p.add_argument("--from-arch", default="qwen35")
    p.add_argument("--to-arch", default="qwen36")
    args = p.parse_args()
    rename(args.input, args.output, args.from_arch, args.to_arch)


if __name__ == "__main__":
    main()