| |
| |
| |
| |
| |
|
|
| import typing |
| import zipfile |
|
|
| from dataclasses import dataclass |
| from functools import lru_cache |
| from typing_extensions import Literal |
|
|
|
|
| DEFAULT_SIZE = 32 |
| MODE = Literal['r', 'w', 'x', 'a'] |
|
|
|
|
| @dataclass(order=True) |
| class PathInZip: |
| """Class for holding a path of file within a zip file. |
| |
| Args: |
| path: The convention is <path_to_zip>:<relative_path_inside_zip> |
| Let's assume there is a zip file /some/location/foo.zip |
| and inside of it is a json file located at /data/file1.json, |
| Then we expect path = "/some/location/foo.zip:/data/file1.json" |
| """ |
|
|
| INFO_PATH_SEP = ':' |
| zip_path: str |
| file_path: str |
|
|
| def __init__(self, path: str) -> None: |
| split_path = path.split(self.INFO_PATH_SEP) |
| assert len(split_path) == 2 |
| self.zip_path, self.file_path = split_path |
|
|
| @classmethod |
| def from_paths(cls, zip_path: str, file_path: str): |
| return cls(zip_path + cls.INFO_PATH_SEP + file_path) |
|
|
| def __str__(self) -> str: |
| return self.zip_path + self.INFO_PATH_SEP + self.file_path |
|
|
|
|
| def _open_zip(path: str, mode: MODE = 'r'): |
| return zipfile.ZipFile(path, mode) |
|
|
|
|
| _cached_open_zip = lru_cache(DEFAULT_SIZE)(_open_zip) |
|
|
|
|
| def set_zip_cache_size(max_size: int): |
| """Sets the maximal LRU caching for zip file opening. |
| |
| Args: |
| max_size: the maximal LRU cache. |
| """ |
| global _cached_open_zip |
| _cached_open_zip = lru_cache(max_size)(_open_zip) |
|
|
|
|
| def open_file_in_zip(path_in_zip: PathInZip, mode: str = 'r') -> typing.IO: |
| """Opens a file stored inside a zip and returns a file-like object. |
| |
| Args: |
| path_in_zip: A PathInZip object representing the file to return a file-like object of. |
| mode: The mode in which to open the file with. |
| Returns: |
| A file-like object for PathInZip. |
| """ |
| zf = _cached_open_zip(path_in_zip.zip_path) |
| return zf.open(path_in_zip.file_path) |
|
|