MobileLLM-R1-950M-MLX / custom_mlx_lm /quant_summary.py

add mlx and mlx-lm support

e39ff3a 5 months ago

2.02 kB

	import argparse
	import json
	from pathlib import Path
	import numpy as np


	def main():
	p = argparse.ArgumentParser(description="Summarize MLX-LM quantization layout")
	p.add_argument("--model-path", required=True, help="Path to converted MLX model")
	p.add_argument("--show", type=int, default=10, help="Show up to N entries per group")
	args = p.parse_args()

	mpath = Path(args.model_path)
	cfg = json.loads((mpath / "config.json").read_text())
	q = cfg.get("quantization") or {}
	method = q.get("method", "none")
	gsize = q.get("group_size")
	plb = q.get("per_layer_bits", {})

	print(f"Method: {method}")
	print(f"Group size: {gsize}")
	if method == "uniform":
	print(f"Uniform bits: {q.get('bits')}")
	return

	if not plb:
	print("No per-layer bits found in config.")
	return

	# Basic counts
	buckets = {4: [], 8: [], "other": []}
	for k, b in plb.items():
	if b == 4:
	buckets[4].append(k)
	elif b == 8:
	buckets[8].append(k)
	else:
	buckets["other"].append(k)

	total = sum(len(v) for v in buckets.values())
	print(f"Total linear layers: {total}")
	print(f"4-bit layers: {len(buckets[4])}")
	print(f"8-bit layers: {len(buckets[8])}")
	if buckets["other"]:
	print(f"Other-bit layers: {len(buckets['other'])}")

	# Optional: show a few examples
	for b in (8, 4):
	items = sorted(buckets[b])
	if not items:
	continue
	print(f"\nExamples ({b}-bit):")
	for k in items[: args.show]:
	print(f"- {k}")

	# Optional: sanity-check against npz contents
	try:
	npz = np.load(mpath / "weights.npz", allow_pickle=False)
	has_q = any(k.endswith(".scales") or k.endswith(".biases") for k in npz.files)
	print(f"\nweights.npz contains quantized tensors: {has_q}")
	except Exception as e:
	print(f"Note: could not open weights.npz: {e}")


	if __name__ == "__main__":
	main()