Thanatos-27B / scripts /rename_arch.py
FoolDev's picture
scripts: add rename_arch.py + gitignore qwen-suffixed rebadges
a0994f1
#!/usr/bin/env python3
"""Rename GGUF architecture (and all arch-namespaced KV keys)."""
from __future__ import annotations
import argparse
import sys
from pathlib import Path
from tqdm import tqdm
from gguf import GGUFReader, GGUFValueType, GGUFWriter
from gguf.constants import Keys
def rename(src: Path, dst: Path, old_arch: str, new_arch: str) -> None:
reader = GGUFReader(src, "r")
cur = reader.get_field(Keys.General.ARCHITECTURE)
if cur is None:
sys.exit(f"no {Keys.General.ARCHITECTURE} in {src}")
cur_val = str(bytes(cur.parts[cur.data[0]]), encoding="utf-8")
if cur_val != old_arch:
sys.exit(f"expected arch={old_arch!r}, found {cur_val!r}")
writer = GGUFWriter(dst, new_arch, endianess=reader.endianess)
renamed_keys = 0
for field in reader.fields.values():
# GGUFWriter writes general.architecture from the `arch` ctor arg
if field.name == Keys.General.ARCHITECTURE or field.name.startswith("GGUF."):
continue
new_name = field.name
if field.name.startswith(f"{old_arch}."):
new_name = f"{new_arch}." + field.name[len(old_arch) + 1 :]
renamed_keys += 1
val_type = field.types[0]
sub_type = field.types[-1] if val_type == GGUFValueType.ARRAY else None
writer.add_key_value(new_name, field.contents(), val_type, sub_type=sub_type)
total_bytes = 0
for tensor in reader.tensors:
total_bytes += tensor.n_bytes
writer.add_tensor_info(
tensor.name,
tensor.data.shape,
tensor.data.dtype,
tensor.data.nbytes,
tensor.tensor_type,
)
print(f"[*] renamed {renamed_keys} {old_arch}.* keys -> {new_arch}.*")
print(f"[*] writing {dst}")
bar = tqdm(desc="Writing", total=total_bytes, unit="B", unit_scale=True)
writer.write_header_to_file()
writer.write_kv_data_to_file()
writer.write_ti_data_to_file()
for tensor in reader.tensors:
writer.write_tensor_data(tensor.data, tensor_endianess=reader.endianess)
bar.update(tensor.n_bytes)
writer.close()
bar.close()
def main() -> None:
p = argparse.ArgumentParser()
p.add_argument("input", type=Path)
p.add_argument("output", type=Path)
p.add_argument("--from-arch", default="qwen35")
p.add_argument("--to-arch", default="qwen36")
args = p.parse_args()
rename(args.input, args.output, args.from_arch, args.to_arch)
if __name__ == "__main__":
main()