| | """ |
| | This module contains SMBFileSystem class responsible for handling access to |
| | Windows Samba network shares by using package smbprotocol |
| | """ |
| |
|
| | import datetime |
| | import uuid |
| | from stat import S_ISDIR, S_ISLNK |
| |
|
| | import smbclient |
| |
|
| | from .. import AbstractFileSystem |
| | from ..utils import infer_storage_options |
| |
|
| | |
| |
|
| |
|
| | class SMBFileSystem(AbstractFileSystem): |
| | """Allow reading and writing to Windows and Samba network shares. |
| | |
| | When using `fsspec.open()` for getting a file-like object the URI |
| | should be specified as this format: |
| | ``smb://workgroup;user:password@server:port/share/folder/file.csv``. |
| | |
| | Example:: |
| | |
| | >>> import fsspec |
| | >>> with fsspec.open( |
| | ... 'smb://myuser:mypassword@myserver.com/' 'share/folder/file.csv' |
| | ... ) as smbfile: |
| | ... df = pd.read_csv(smbfile, sep='|', header=None) |
| | |
| | Note that you need to pass in a valid hostname or IP address for the host |
| | component of the URL. Do not use the Windows/NetBIOS machine name for the |
| | host component. |
| | |
| | The first component of the path in the URL points to the name of the shared |
| | folder. Subsequent path components will point to the directory/folder/file. |
| | |
| | The URL components ``workgroup`` , ``user``, ``password`` and ``port`` may be |
| | optional. |
| | |
| | .. note:: |
| | |
| | For working this source require `smbprotocol`_ to be installed, e.g.:: |
| | |
| | $ pip install smbprotocol |
| | # or |
| | # pip install smbprotocol[kerberos] |
| | |
| | .. _smbprotocol: https://github.com/jborean93/smbprotocol#requirements |
| | |
| | Note: if using this with the ``open`` or ``open_files``, with full URLs, |
| | there is no way to tell if a path is relative, so all paths are assumed |
| | to be absolute. |
| | """ |
| |
|
| | protocol = "smb" |
| |
|
| | |
| | def __init__( |
| | self, |
| | host, |
| | port=None, |
| | username=None, |
| | password=None, |
| | timeout=60, |
| | encrypt=None, |
| | share_access=None, |
| | **kwargs, |
| | ): |
| | """ |
| | You can use _get_kwargs_from_urls to get some kwargs from |
| | a reasonable SMB url. |
| | |
| | Authentication will be anonymous or integrated if username/password are not |
| | given. |
| | |
| | Parameters |
| | ---------- |
| | host: str |
| | The remote server name/ip to connect to |
| | port: int or None |
| | Port to connect with. Usually 445, sometimes 139. |
| | username: str or None |
| | Username to connect with. Required if Kerberos auth is not being used. |
| | password: str or None |
| | User's password on the server, if using username |
| | timeout: int |
| | Connection timeout in seconds |
| | encrypt: bool |
| | Whether to force encryption or not, once this has been set to True |
| | the session cannot be changed back to False. |
| | share_access: str or None |
| | Specifies the default access applied to file open operations |
| | performed with this file system object. |
| | This affects whether other processes can concurrently open a handle |
| | to the same file. |
| | |
| | - None (the default): exclusively locks the file until closed. |
| | - 'r': Allow other handles to be opened with read access. |
| | - 'w': Allow other handles to be opened with write access. |
| | - 'd': Allow other handles to be opened with delete access. |
| | """ |
| | super().__init__(**kwargs) |
| | self.host = host |
| | self.port = port |
| | self.username = username |
| | self.password = password |
| | self.timeout = timeout |
| | self.encrypt = encrypt |
| | self.temppath = kwargs.pop("temppath", "") |
| | self.share_access = share_access |
| | self._connect() |
| |
|
| | @property |
| | def _port(self): |
| | return 445 if self.port is None else self.port |
| |
|
| | def _connect(self): |
| | smbclient.register_session( |
| | self.host, |
| | username=self.username, |
| | password=self.password, |
| | port=self._port, |
| | encrypt=self.encrypt, |
| | connection_timeout=self.timeout, |
| | ) |
| |
|
| | @classmethod |
| | def _strip_protocol(cls, path): |
| | return infer_storage_options(path)["path"] |
| |
|
| | @staticmethod |
| | def _get_kwargs_from_urls(path): |
| | |
| | out = infer_storage_options(path) |
| | out.pop("path", None) |
| | out.pop("protocol", None) |
| | return out |
| |
|
| | def mkdir(self, path, create_parents=True, **kwargs): |
| | wpath = _as_unc_path(self.host, path) |
| | if create_parents: |
| | smbclient.makedirs(wpath, exist_ok=False, port=self._port, **kwargs) |
| | else: |
| | smbclient.mkdir(wpath, port=self._port, **kwargs) |
| |
|
| | def makedirs(self, path, exist_ok=False): |
| | if _share_has_path(path): |
| | wpath = _as_unc_path(self.host, path) |
| | smbclient.makedirs(wpath, exist_ok=exist_ok, port=self._port) |
| |
|
| | def rmdir(self, path): |
| | if _share_has_path(path): |
| | wpath = _as_unc_path(self.host, path) |
| | smbclient.rmdir(wpath, port=self._port) |
| |
|
| | def info(self, path, **kwargs): |
| | wpath = _as_unc_path(self.host, path) |
| | stats = smbclient.stat(wpath, port=self._port, **kwargs) |
| | if S_ISDIR(stats.st_mode): |
| | stype = "directory" |
| | elif S_ISLNK(stats.st_mode): |
| | stype = "link" |
| | else: |
| | stype = "file" |
| | res = { |
| | "name": path + "/" if stype == "directory" else path, |
| | "size": stats.st_size, |
| | "type": stype, |
| | "uid": stats.st_uid, |
| | "gid": stats.st_gid, |
| | "time": stats.st_atime, |
| | "mtime": stats.st_mtime, |
| | } |
| | return res |
| |
|
| | def created(self, path): |
| | """Return the created timestamp of a file as a datetime.datetime""" |
| | wpath = _as_unc_path(self.host, path) |
| | stats = smbclient.stat(wpath, port=self._port) |
| | return datetime.datetime.fromtimestamp(stats.st_ctime, tz=datetime.timezone.utc) |
| |
|
| | def modified(self, path): |
| | """Return the modified timestamp of a file as a datetime.datetime""" |
| | wpath = _as_unc_path(self.host, path) |
| | stats = smbclient.stat(wpath, port=self._port) |
| | return datetime.datetime.fromtimestamp(stats.st_mtime, tz=datetime.timezone.utc) |
| |
|
| | def ls(self, path, detail=True, **kwargs): |
| | unc = _as_unc_path(self.host, path) |
| | listed = smbclient.listdir(unc, port=self._port, **kwargs) |
| | dirs = ["/".join([path.rstrip("/"), p]) for p in listed] |
| | if detail: |
| | dirs = [self.info(d) for d in dirs] |
| | return dirs |
| |
|
| | |
| | def _open( |
| | self, |
| | path, |
| | mode="rb", |
| | block_size=-1, |
| | autocommit=True, |
| | cache_options=None, |
| | **kwargs, |
| | ): |
| | """ |
| | block_size: int or None |
| | If 0, no buffering, 1, line buffering, >1, buffer that many bytes |
| | |
| | Notes |
| | ----- |
| | By specifying 'share_access' in 'kwargs' it is possible to override the |
| | default shared access setting applied in the constructor of this object. |
| | """ |
| | bls = block_size if block_size is not None and block_size >= 0 else -1 |
| | wpath = _as_unc_path(self.host, path) |
| | share_access = kwargs.pop("share_access", self.share_access) |
| | if "w" in mode and autocommit is False: |
| | temp = _as_temp_path(self.host, path, self.temppath) |
| | return SMBFileOpener( |
| | wpath, temp, mode, port=self._port, block_size=bls, **kwargs |
| | ) |
| | return smbclient.open_file( |
| | wpath, |
| | mode, |
| | buffering=bls, |
| | share_access=share_access, |
| | port=self._port, |
| | **kwargs, |
| | ) |
| |
|
| | def copy(self, path1, path2, **kwargs): |
| | """Copy within two locations in the same filesystem""" |
| | wpath1 = _as_unc_path(self.host, path1) |
| | wpath2 = _as_unc_path(self.host, path2) |
| | smbclient.copyfile(wpath1, wpath2, port=self._port, **kwargs) |
| |
|
| | def _rm(self, path): |
| | if _share_has_path(path): |
| | wpath = _as_unc_path(self.host, path) |
| | stats = smbclient.stat(wpath, port=self._port) |
| | if S_ISDIR(stats.st_mode): |
| | smbclient.rmdir(wpath, port=self._port) |
| | else: |
| | smbclient.remove(wpath, port=self._port) |
| |
|
| | def mv(self, path1, path2, recursive=None, maxdepth=None, **kwargs): |
| | wpath1 = _as_unc_path(self.host, path1) |
| | wpath2 = _as_unc_path(self.host, path2) |
| | smbclient.rename(wpath1, wpath2, port=self._port, **kwargs) |
| |
|
| |
|
| | def _as_unc_path(host, path): |
| | rpath = path.replace("/", "\\") |
| | unc = f"\\\\{host}{rpath}" |
| | return unc |
| |
|
| |
|
| | def _as_temp_path(host, path, temppath): |
| | share = path.split("/")[1] |
| | temp_file = f"/{share}{temppath}/{uuid.uuid4()}" |
| | unc = _as_unc_path(host, temp_file) |
| | return unc |
| |
|
| |
|
| | def _share_has_path(path): |
| | parts = path.count("/") |
| | if path.endswith("/"): |
| | return parts > 2 |
| | return parts > 1 |
| |
|
| |
|
| | class SMBFileOpener: |
| | """writes to remote temporary file, move on commit""" |
| |
|
| | def __init__(self, path, temp, mode, port=445, block_size=-1, **kwargs): |
| | self.path = path |
| | self.temp = temp |
| | self.mode = mode |
| | self.block_size = block_size |
| | self.kwargs = kwargs |
| | self.smbfile = None |
| | self._incontext = False |
| | self.port = port |
| | self._open() |
| |
|
| | def _open(self): |
| | if self.smbfile is None or self.smbfile.closed: |
| | self.smbfile = smbclient.open_file( |
| | self.temp, |
| | self.mode, |
| | port=self.port, |
| | buffering=self.block_size, |
| | **self.kwargs, |
| | ) |
| |
|
| | def commit(self): |
| | """Move temp file to definitive on success.""" |
| | |
| | smbclient.replace(self.temp, self.path, port=self.port) |
| |
|
| | def discard(self): |
| | """Remove the temp file on failure.""" |
| | smbclient.remove(self.temp, port=self.port) |
| |
|
| | def __fspath__(self): |
| | return self.path |
| |
|
| | def __iter__(self): |
| | return self.smbfile.__iter__() |
| |
|
| | def __getattr__(self, item): |
| | return getattr(self.smbfile, item) |
| |
|
| | def __enter__(self): |
| | self._incontext = True |
| | return self.smbfile.__enter__() |
| |
|
| | def __exit__(self, exc_type, exc_value, traceback): |
| | self._incontext = False |
| | self.smbfile.__exit__(exc_type, exc_value, traceback) |
| |
|