| """Adds NumPy array support to msgpack. | |
| msgpack is good for (de)serializing data over a network for multiple reasons: | |
| - msgpack is secure (as opposed to pickle/dill/etc which allow for arbitrary code execution) | |
| - msgpack is widely used and has good cross-language support | |
| - msgpack does not require a schema (as opposed to protobuf/flatbuffers/etc) which is convenient in dynamically typed | |
| languages like Python and JavaScript | |
| - msgpack is fast and efficient (as opposed to readable formats like JSON/YAML/etc); I found that msgpack was ~4x faster | |
| than pickle for serializing large arrays using the below strategy | |
| The code below is adapted from https://github.com/lebedov/msgpack-numpy. The reason not to use that library directly is | |
| that it falls back to pickle for object arrays. | |
| """ | |
| import functools | |
| import msgpack | |
| import numpy as np | |
| def pack_array(obj): | |
| if (isinstance(obj, (np.ndarray, np.generic))) and obj.dtype.kind in ("V", "O", "c"): | |
| raise ValueError(f"Unsupported dtype: {obj.dtype}") | |
| if isinstance(obj, np.ndarray): | |
| return { | |
| b"__ndarray__": True, | |
| b"data": obj.tobytes(), | |
| b"dtype": obj.dtype.str, | |
| b"shape": obj.shape, | |
| } | |
| if isinstance(obj, np.generic): | |
| return { | |
| b"__npgeneric__": True, | |
| b"data": obj.item(), | |
| b"dtype": obj.dtype.str, | |
| } | |
| return obj | |
| def unpack_array(obj): | |
| if b"__ndarray__" in obj: | |
| return np.ndarray(buffer=obj[b"data"], dtype=np.dtype(obj[b"dtype"]), shape=obj[b"shape"]) | |
| if b"__npgeneric__" in obj: | |
| return np.dtype(obj[b"dtype"]).type(obj[b"data"]) | |
| return obj | |
| Packer = functools.partial(msgpack.Packer, default=pack_array) | |
| packb = functools.partial(msgpack.packb, default=pack_array) | |
| Unpacker = functools.partial(msgpack.Unpacker, object_hook=unpack_array) | |
| unpackb = functools.partial(msgpack.unpackb, object_hook=unpack_array) | |