| | import asyncio |
| | import sys |
| | import zlib |
| | from abc import ABC, abstractmethod |
| | from concurrent.futures import Executor |
| | from typing import Any, Final, Optional, Protocol, TypedDict, cast |
| |
|
| | if sys.version_info >= (3, 12): |
| | from collections.abc import Buffer |
| | else: |
| | from typing import Union |
| |
|
| | Buffer = Union[bytes, bytearray, "memoryview[int]", "memoryview[bytes]"] |
| |
|
| | try: |
| | try: |
| | import brotlicffi as brotli |
| | except ImportError: |
| | import brotli |
| |
|
| | HAS_BROTLI = True |
| | except ImportError: |
| | HAS_BROTLI = False |
| |
|
| | try: |
| | if sys.version_info >= (3, 14): |
| | from compression.zstd import ZstdDecompressor |
| | else: |
| | from backports.zstd import ZstdDecompressor |
| |
|
| | HAS_ZSTD = True |
| | except ImportError: |
| | HAS_ZSTD = False |
| |
|
| |
|
| | MAX_SYNC_CHUNK_SIZE = 4096 |
| | DEFAULT_MAX_DECOMPRESS_SIZE = 2**25 |
| |
|
| | |
| | ZLIB_MAX_LENGTH_UNLIMITED = 0 |
| | ZSTD_MAX_LENGTH_UNLIMITED = -1 |
| |
|
| |
|
| | class ZLibCompressObjProtocol(Protocol): |
| | def compress(self, data: Buffer) -> bytes: ... |
| | def flush(self, mode: int = ..., /) -> bytes: ... |
| |
|
| |
|
| | class ZLibDecompressObjProtocol(Protocol): |
| | def decompress(self, data: Buffer, max_length: int = ...) -> bytes: ... |
| | def flush(self, length: int = ..., /) -> bytes: ... |
| |
|
| | @property |
| | def eof(self) -> bool: ... |
| |
|
| |
|
| | class ZLibBackendProtocol(Protocol): |
| | MAX_WBITS: int |
| | Z_FULL_FLUSH: int |
| | Z_SYNC_FLUSH: int |
| | Z_BEST_SPEED: int |
| | Z_FINISH: int |
| |
|
| | def compressobj( |
| | self, |
| | level: int = ..., |
| | method: int = ..., |
| | wbits: int = ..., |
| | memLevel: int = ..., |
| | strategy: int = ..., |
| | zdict: Optional[Buffer] = ..., |
| | ) -> ZLibCompressObjProtocol: ... |
| | def decompressobj( |
| | self, wbits: int = ..., zdict: Buffer = ... |
| | ) -> ZLibDecompressObjProtocol: ... |
| |
|
| | def compress( |
| | self, data: Buffer, /, level: int = ..., wbits: int = ... |
| | ) -> bytes: ... |
| | def decompress( |
| | self, data: Buffer, /, wbits: int = ..., bufsize: int = ... |
| | ) -> bytes: ... |
| |
|
| |
|
| | class CompressObjArgs(TypedDict, total=False): |
| | wbits: int |
| | strategy: int |
| | level: int |
| |
|
| |
|
| | class ZLibBackendWrapper: |
| | def __init__(self, _zlib_backend: ZLibBackendProtocol): |
| | self._zlib_backend: ZLibBackendProtocol = _zlib_backend |
| |
|
| | @property |
| | def name(self) -> str: |
| | return getattr(self._zlib_backend, "__name__", "undefined") |
| |
|
| | @property |
| | def MAX_WBITS(self) -> int: |
| | return self._zlib_backend.MAX_WBITS |
| |
|
| | @property |
| | def Z_FULL_FLUSH(self) -> int: |
| | return self._zlib_backend.Z_FULL_FLUSH |
| |
|
| | @property |
| | def Z_SYNC_FLUSH(self) -> int: |
| | return self._zlib_backend.Z_SYNC_FLUSH |
| |
|
| | @property |
| | def Z_BEST_SPEED(self) -> int: |
| | return self._zlib_backend.Z_BEST_SPEED |
| |
|
| | @property |
| | def Z_FINISH(self) -> int: |
| | return self._zlib_backend.Z_FINISH |
| |
|
| | def compressobj(self, *args: Any, **kwargs: Any) -> ZLibCompressObjProtocol: |
| | return self._zlib_backend.compressobj(*args, **kwargs) |
| |
|
| | def decompressobj(self, *args: Any, **kwargs: Any) -> ZLibDecompressObjProtocol: |
| | return self._zlib_backend.decompressobj(*args, **kwargs) |
| |
|
| | def compress(self, data: Buffer, *args: Any, **kwargs: Any) -> bytes: |
| | return self._zlib_backend.compress(data, *args, **kwargs) |
| |
|
| | def decompress(self, data: Buffer, *args: Any, **kwargs: Any) -> bytes: |
| | return self._zlib_backend.decompress(data, *args, **kwargs) |
| |
|
| | |
| | def __getattr__(self, attrname: str) -> Any: |
| | return getattr(self._zlib_backend, attrname) |
| |
|
| |
|
| | ZLibBackend: ZLibBackendWrapper = ZLibBackendWrapper(zlib) |
| |
|
| |
|
| | def set_zlib_backend(new_zlib_backend: ZLibBackendProtocol) -> None: |
| | ZLibBackend._zlib_backend = new_zlib_backend |
| |
|
| |
|
| | def encoding_to_mode( |
| | encoding: Optional[str] = None, |
| | suppress_deflate_header: bool = False, |
| | ) -> int: |
| | if encoding == "gzip": |
| | return 16 + ZLibBackend.MAX_WBITS |
| |
|
| | return -ZLibBackend.MAX_WBITS if suppress_deflate_header else ZLibBackend.MAX_WBITS |
| |
|
| |
|
| | class DecompressionBaseHandler(ABC): |
| | def __init__( |
| | self, |
| | executor: Optional[Executor] = None, |
| | max_sync_chunk_size: Optional[int] = MAX_SYNC_CHUNK_SIZE, |
| | ): |
| | """Base class for decompression handlers.""" |
| | self._executor = executor |
| | self._max_sync_chunk_size = max_sync_chunk_size |
| |
|
| | @abstractmethod |
| | def decompress_sync( |
| | self, data: bytes, max_length: int = ZLIB_MAX_LENGTH_UNLIMITED |
| | ) -> bytes: |
| | """Decompress the given data.""" |
| |
|
| | async def decompress( |
| | self, data: bytes, max_length: int = ZLIB_MAX_LENGTH_UNLIMITED |
| | ) -> bytes: |
| | """Decompress the given data.""" |
| | if ( |
| | self._max_sync_chunk_size is not None |
| | and len(data) > self._max_sync_chunk_size |
| | ): |
| | return await asyncio.get_event_loop().run_in_executor( |
| | self._executor, self.decompress_sync, data, max_length |
| | ) |
| | return self.decompress_sync(data, max_length) |
| |
|
| |
|
| | class ZLibCompressor: |
| | def __init__( |
| | self, |
| | encoding: Optional[str] = None, |
| | suppress_deflate_header: bool = False, |
| | level: Optional[int] = None, |
| | wbits: Optional[int] = None, |
| | strategy: Optional[int] = None, |
| | executor: Optional[Executor] = None, |
| | max_sync_chunk_size: Optional[int] = MAX_SYNC_CHUNK_SIZE, |
| | ): |
| | self._executor = executor |
| | self._max_sync_chunk_size = max_sync_chunk_size |
| | self._mode = ( |
| | encoding_to_mode(encoding, suppress_deflate_header) |
| | if wbits is None |
| | else wbits |
| | ) |
| | self._zlib_backend: Final = ZLibBackendWrapper(ZLibBackend._zlib_backend) |
| |
|
| | kwargs: CompressObjArgs = {} |
| | kwargs["wbits"] = self._mode |
| | if strategy is not None: |
| | kwargs["strategy"] = strategy |
| | if level is not None: |
| | kwargs["level"] = level |
| | self._compressor = self._zlib_backend.compressobj(**kwargs) |
| |
|
| | def compress_sync(self, data: bytes) -> bytes: |
| | return self._compressor.compress(data) |
| |
|
| | async def compress(self, data: bytes) -> bytes: |
| | """Compress the data and returned the compressed bytes. |
| | |
| | Note that flush() must be called after the last call to compress() |
| | |
| | If the data size is large than the max_sync_chunk_size, the compression |
| | will be done in the executor. Otherwise, the compression will be done |
| | in the event loop. |
| | |
| | **WARNING: This method is NOT cancellation-safe when used with flush().** |
| | If this operation is cancelled, the compressor state may be corrupted. |
| | The connection MUST be closed after cancellation to avoid data corruption |
| | in subsequent compress operations. |
| | |
| | For cancellation-safe compression (e.g., WebSocket), the caller MUST wrap |
| | compress() + flush() + send operations in a shield and lock to ensure atomicity. |
| | """ |
| | |
| | should_use_executor = ( |
| | self._max_sync_chunk_size is not None |
| | and len(data) > self._max_sync_chunk_size |
| | ) |
| | if should_use_executor: |
| | return await asyncio.get_running_loop().run_in_executor( |
| | self._executor, self._compressor.compress, data |
| | ) |
| | return self.compress_sync(data) |
| |
|
| | def flush(self, mode: Optional[int] = None) -> bytes: |
| | """Flush the compressor synchronously. |
| | |
| | **WARNING: This method is NOT cancellation-safe when called after compress().** |
| | The flush() operation accesses shared compressor state. If compress() was |
| | cancelled, calling flush() may result in corrupted data. The connection MUST |
| | be closed after compress() cancellation. |
| | |
| | For cancellation-safe compression (e.g., WebSocket), the caller MUST wrap |
| | compress() + flush() + send operations in a shield and lock to ensure atomicity. |
| | """ |
| | return self._compressor.flush( |
| | mode if mode is not None else self._zlib_backend.Z_FINISH |
| | ) |
| |
|
| |
|
| | class ZLibDecompressor(DecompressionBaseHandler): |
| | def __init__( |
| | self, |
| | encoding: Optional[str] = None, |
| | suppress_deflate_header: bool = False, |
| | executor: Optional[Executor] = None, |
| | max_sync_chunk_size: Optional[int] = MAX_SYNC_CHUNK_SIZE, |
| | ): |
| | super().__init__(executor=executor, max_sync_chunk_size=max_sync_chunk_size) |
| | self._mode = encoding_to_mode(encoding, suppress_deflate_header) |
| | self._zlib_backend: Final = ZLibBackendWrapper(ZLibBackend._zlib_backend) |
| | self._decompressor = self._zlib_backend.decompressobj(wbits=self._mode) |
| |
|
| | def decompress_sync( |
| | self, data: Buffer, max_length: int = ZLIB_MAX_LENGTH_UNLIMITED |
| | ) -> bytes: |
| | return self._decompressor.decompress(data, max_length) |
| |
|
| | def flush(self, length: int = 0) -> bytes: |
| | return ( |
| | self._decompressor.flush(length) |
| | if length > 0 |
| | else self._decompressor.flush() |
| | ) |
| |
|
| | @property |
| | def eof(self) -> bool: |
| | return self._decompressor.eof |
| |
|
| |
|
| | class BrotliDecompressor(DecompressionBaseHandler): |
| | |
| | |
| | |
| | def __init__( |
| | self, |
| | executor: Optional[Executor] = None, |
| | max_sync_chunk_size: Optional[int] = MAX_SYNC_CHUNK_SIZE, |
| | ) -> None: |
| | """Decompress data using the Brotli library.""" |
| | if not HAS_BROTLI: |
| | raise RuntimeError( |
| | "The brotli decompression is not available. " |
| | "Please install `Brotli` module" |
| | ) |
| | self._obj = brotli.Decompressor() |
| | super().__init__(executor=executor, max_sync_chunk_size=max_sync_chunk_size) |
| |
|
| | def decompress_sync( |
| | self, data: Buffer, max_length: int = ZLIB_MAX_LENGTH_UNLIMITED |
| | ) -> bytes: |
| | """Decompress the given data.""" |
| | if hasattr(self._obj, "decompress"): |
| | return cast(bytes, self._obj.decompress(data, max_length)) |
| | return cast(bytes, self._obj.process(data, max_length)) |
| |
|
| | def flush(self) -> bytes: |
| | """Flush the decompressor.""" |
| | if hasattr(self._obj, "flush"): |
| | return cast(bytes, self._obj.flush()) |
| | return b"" |
| |
|
| |
|
| | class ZSTDDecompressor(DecompressionBaseHandler): |
| | def __init__( |
| | self, |
| | executor: Optional[Executor] = None, |
| | max_sync_chunk_size: Optional[int] = MAX_SYNC_CHUNK_SIZE, |
| | ) -> None: |
| | if not HAS_ZSTD: |
| | raise RuntimeError( |
| | "The zstd decompression is not available. " |
| | "Please install `backports.zstd` module" |
| | ) |
| | self._obj = ZstdDecompressor() |
| | super().__init__(executor=executor, max_sync_chunk_size=max_sync_chunk_size) |
| |
|
| | def decompress_sync( |
| | self, data: bytes, max_length: int = ZLIB_MAX_LENGTH_UNLIMITED |
| | ) -> bytes: |
| | |
| | |
| | zstd_max_length = ( |
| | ZSTD_MAX_LENGTH_UNLIMITED |
| | if max_length == ZLIB_MAX_LENGTH_UNLIMITED |
| | else max_length |
| | ) |
| | return self._obj.decompress(data, zstd_max_length) |
| |
|
| | def flush(self) -> bytes: |
| | return b"" |
| |
|