Karim shoair commited on
Commit ·
4da1914
1
Parent(s): 31f70c8
docs: improve All storage classes doc strings
Browse files
scrapling/core/storage_adaptors.py
CHANGED
|
@@ -38,7 +38,7 @@ class StorageSystemMixin(ABC):
|
|
| 38 |
def save(self, element: html.HtmlElement, identifier: str) -> None:
|
| 39 |
"""Saves the element's unique properties to the storage for retrieval and relocation later
|
| 40 |
|
| 41 |
-
:param element: The element itself
|
| 42 |
:param identifier: This is the identifier that will be used to retrieve the element later from the storage. See
|
| 43 |
the docs for more info.
|
| 44 |
"""
|
|
@@ -70,12 +70,12 @@ class StorageSystemMixin(ABC):
|
|
| 70 |
@lru_cache(1, typed=True)
|
| 71 |
class SQLiteStorageSystem(StorageSystemMixin):
|
| 72 |
"""The recommended system to use, it's race condition safe and thread safe.
|
| 73 |
-
Mainly built so the library can run in threaded frameworks like scrapy or threaded tools
|
| 74 |
-
> It's optimized for threaded applications but running it without threads shouldn't make it slow."""
|
| 75 |
|
| 76 |
def __init__(self, storage_file: str, url: Union[str, None] = None):
|
| 77 |
"""
|
| 78 |
-
:param storage_file: File to be used to store elements
|
| 79 |
:param url: URL of the website we are working on to separate it from other websites data
|
| 80 |
|
| 81 |
"""
|
|
@@ -83,7 +83,7 @@ class SQLiteStorageSystem(StorageSystemMixin):
|
|
| 83 |
self.storage_file = storage_file
|
| 84 |
# We use a threading.Lock to ensure thread-safety instead of relying on thread-local storage.
|
| 85 |
self.lock = threading.Lock()
|
| 86 |
-
# >SQLite default mode in earlier version is 1 not 2 (1=thread-safe 2=serialized)
|
| 87 |
# `check_same_thread=False` to allow it to be used across different threads.
|
| 88 |
self.connection = sqlite3.connect(self.storage_file, check_same_thread=False)
|
| 89 |
# WAL (Write-Ahead Logging) allows for better concurrency.
|
|
@@ -109,7 +109,7 @@ class SQLiteStorageSystem(StorageSystemMixin):
|
|
| 109 |
def save(self, element: html.HtmlElement, identifier: str):
|
| 110 |
"""Saves the elements unique properties to the storage for retrieval and relocation later
|
| 111 |
|
| 112 |
-
:param element: The element itself
|
| 113 |
:param identifier: This is the identifier that will be used to retrieve the element later from the storage. See
|
| 114 |
the docs for more info.
|
| 115 |
"""
|
|
@@ -145,7 +145,7 @@ class SQLiteStorageSystem(StorageSystemMixin):
|
|
| 145 |
return None
|
| 146 |
|
| 147 |
def close(self):
|
| 148 |
-
"""Close all connections
|
| 149 |
with self.lock:
|
| 150 |
self.connection.commit()
|
| 151 |
self.cursor.close()
|
|
|
|
| 38 |
def save(self, element: html.HtmlElement, identifier: str) -> None:
|
| 39 |
"""Saves the element's unique properties to the storage for retrieval and relocation later
|
| 40 |
|
| 41 |
+
:param element: The element itself which we want to save to storage.
|
| 42 |
:param identifier: This is the identifier that will be used to retrieve the element later from the storage. See
|
| 43 |
the docs for more info.
|
| 44 |
"""
|
|
|
|
| 70 |
@lru_cache(1, typed=True)
|
| 71 |
class SQLiteStorageSystem(StorageSystemMixin):
|
| 72 |
"""The recommended system to use, it's race condition safe and thread safe.
|
| 73 |
+
Mainly built, so the library can run in threaded frameworks like scrapy or threaded tools
|
| 74 |
+
> It's optimized for threaded applications, but running it without threads shouldn't make it slow."""
|
| 75 |
|
| 76 |
def __init__(self, storage_file: str, url: Union[str, None] = None):
|
| 77 |
"""
|
| 78 |
+
:param storage_file: File to be used to store elements' data.
|
| 79 |
:param url: URL of the website we are working on to separate it from other websites data
|
| 80 |
|
| 81 |
"""
|
|
|
|
| 83 |
self.storage_file = storage_file
|
| 84 |
# We use a threading.Lock to ensure thread-safety instead of relying on thread-local storage.
|
| 85 |
self.lock = threading.Lock()
|
| 86 |
+
# >SQLite default mode in the earlier version is 1 not 2 (1=thread-safe 2=serialized)
|
| 87 |
# `check_same_thread=False` to allow it to be used across different threads.
|
| 88 |
self.connection = sqlite3.connect(self.storage_file, check_same_thread=False)
|
| 89 |
# WAL (Write-Ahead Logging) allows for better concurrency.
|
|
|
|
| 109 |
def save(self, element: html.HtmlElement, identifier: str):
|
| 110 |
"""Saves the elements unique properties to the storage for retrieval and relocation later
|
| 111 |
|
| 112 |
+
:param element: The element itself which we want to save to storage.
|
| 113 |
:param identifier: This is the identifier that will be used to retrieve the element later from the storage. See
|
| 114 |
the docs for more info.
|
| 115 |
"""
|
|
|
|
| 145 |
return None
|
| 146 |
|
| 147 |
def close(self):
|
| 148 |
+
"""Close all connections. It will be useful when with some things like scrapy Spider.closed() function/signal"""
|
| 149 |
with self.lock:
|
| 150 |
self.connection.commit()
|
| 151 |
self.cursor.close()
|