| | from ggml import ffi, lib |
| | from ggml.utils import init, numpy, copy |
| | import numpy as np |
| | from math import pi, cos, sin, ceil |
| |
|
| | import matplotlib.pyplot as plt |
| |
|
| | ctx = init(mem_size=100*1024*1024) |
| | n = 256 |
| |
|
| | orig = np.array([ |
| | [ |
| | cos(j * 2 * pi / n) * (sin(i * 2 * pi / n)) |
| | for j in range(n) |
| | ] |
| | for i in range(n) |
| | ], np.float32) |
| | orig_tensor = lib.ggml_new_tensor_2d(ctx, lib.GGML_TYPE_F32, n, n) |
| | copy(orig, orig_tensor) |
| |
|
| | quants = [ |
| | type for type in range(lib.GGML_TYPE_COUNT) |
| | if lib.ggml_is_quantized(type) and |
| | type not in [lib.GGML_TYPE_Q8_1, lib.GGML_TYPE_Q8_K] |
| | ] |
| | |
| |
|
| | def get_name(type): |
| | name = lib.ggml_type_name(type) |
| | return ffi.string(name).decode('utf-8') if name else '?' |
| |
|
| | quants.sort(key=get_name) |
| | quants.insert(0, None) |
| | print(quants) |
| |
|
| | ncols=4 |
| | nrows = ceil(len(quants) / ncols) |
| |
|
| | plt.figure(figsize=(ncols * 5, nrows * 5), layout='tight') |
| |
|
| | for i, type in enumerate(quants): |
| | plt.subplot(nrows, ncols, i + 1) |
| | try: |
| | if type == None: |
| | plt.title('Original') |
| | plt.imshow(orig) |
| | else: |
| | quantized_tensor = lib.ggml_new_tensor_2d(ctx, type, n, n) |
| | copy(orig_tensor, quantized_tensor) |
| | quantized = numpy(quantized_tensor, allow_copy=True) |
| | d = quantized - orig |
| | results = { |
| | "l2": np.linalg.norm(d, 2), |
| | "linf": np.linalg.norm(d, np.inf), |
| | "compression": |
| | round(lib.ggml_nbytes(orig_tensor) / |
| | lib.ggml_nbytes(quantized_tensor), 1) |
| | } |
| | name = get_name(type) |
| | print(f'{name}: {results}') |
| |
|
| | plt.title(f'{name} ({results["compression"]}x smaller)') |
| | plt.imshow(quantized, interpolation='nearest') |
| | |
| | except Exception as e: |
| | print(f'Error: {e}') |
| |
|
| | plt.show() |