import argparse import json from pathlib import Path import numpy as np def main(): p = argparse.ArgumentParser(description="Summarize MLX-LM quantization layout") p.add_argument("--model-path", required=True, help="Path to converted MLX model") p.add_argument("--show", type=int, default=10, help="Show up to N entries per group") args = p.parse_args() mpath = Path(args.model_path) cfg = json.loads((mpath / "config.json").read_text()) q = cfg.get("quantization") or {} method = q.get("method", "none") gsize = q.get("group_size") plb = q.get("per_layer_bits", {}) print(f"Method: {method}") print(f"Group size: {gsize}") if method == "uniform": print(f"Uniform bits: {q.get('bits')}") return if not plb: print("No per-layer bits found in config.") return # Basic counts buckets = {4: [], 8: [], "other": []} for k, b in plb.items(): if b == 4: buckets[4].append(k) elif b == 8: buckets[8].append(k) else: buckets["other"].append(k) total = sum(len(v) for v in buckets.values()) print(f"Total linear layers: {total}") print(f"4-bit layers: {len(buckets[4])}") print(f"8-bit layers: {len(buckets[8])}") if buckets["other"]: print(f"Other-bit layers: {len(buckets['other'])}") # Optional: show a few examples for b in (8, 4): items = sorted(buckets[b]) if not items: continue print(f"\nExamples ({b}-bit):") for k in items[: args.show]: print(f"- {k}") # Optional: sanity-check against npz contents try: npz = np.load(mpath / "weights.npz", allow_pickle=False) has_q = any(k.endswith(".scales") or k.endswith(".biases") for k in npz.files) print(f"\nweights.npz contains quantized tensors: {has_q}") except Exception as e: print(f"Note: could not open weights.npz: {e}") if __name__ == "__main__": main()