Merge pull request #43 from D4Vinci/dev
Browse files- README.md +1 -1
- scrapling/__init__.py +1 -1
- scrapling/cli.py +4 -3
- scrapling/core/custom_types.py +1 -12
- setup.cfg +1 -1
- setup.py +1 -1
README.md
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
# 🕷️ Scrapling: Undetectable, Lightning-Fast, and
|
| 2 |
[](https://github.com/D4Vinci/Scrapling/actions/workflows/tests.yml) [](https://badge.fury.io/py/Scrapling) [](https://pypi.org/project/scrapling/) [](https://pepy.tech/project/scrapling)
|
| 3 |
|
| 4 |
Dealing with failing web scrapers due to anti-bot protections or website changes? Meet Scrapling.
|
|
|
|
| 1 |
+
# 🕷️ Scrapling: Undetectable, Lightning-Fast, and Easy Web Scraping with Python
|
| 2 |
[](https://github.com/D4Vinci/Scrapling/actions/workflows/tests.yml) [](https://badge.fury.io/py/Scrapling) [](https://pypi.org/project/scrapling/) [](https://pepy.tech/project/scrapling)
|
| 3 |
|
| 4 |
Dealing with failing web scrapers due to anti-bot protections or website changes? Meet Scrapling.
|
scrapling/__init__.py
CHANGED
|
@@ -5,7 +5,7 @@ from scrapling.fetchers import (AsyncFetcher, CustomFetcher, Fetcher,
|
|
| 5 |
from scrapling.parser import Adaptor, Adaptors
|
| 6 |
|
| 7 |
__author__ = "Karim Shoair (karim.shoair@pm.me)"
|
| 8 |
-
__version__ = "0.2.
|
| 9 |
__copyright__ = "Copyright (c) 2024 Karim Shoair"
|
| 10 |
|
| 11 |
|
|
|
|
| 5 |
from scrapling.parser import Adaptor, Adaptors
|
| 6 |
|
| 7 |
__author__ = "Karim Shoair (karim.shoair@pm.me)"
|
| 8 |
+
__version__ = "0.2.96"
|
| 9 |
__copyright__ = "Copyright (c) 2024 Karim Shoair"
|
| 10 |
|
| 11 |
|
scrapling/cli.py
CHANGED
|
@@ -12,13 +12,14 @@ def get_package_dir():
|
|
| 12 |
|
| 13 |
def run_command(command, line):
|
| 14 |
print(f"Installing {line}...")
|
| 15 |
-
_ = subprocess.check_call(command, shell=True)
|
| 16 |
# I meant to not use try except here
|
| 17 |
|
| 18 |
|
| 19 |
@click.command(help="Install all Scrapling's Fetchers dependencies")
|
| 20 |
-
|
| 21 |
-
|
|
|
|
| 22 |
run_command([sys.executable, "-m", "playwright", "install", 'chromium'], 'Playwright browsers')
|
| 23 |
run_command([sys.executable, "-m", "playwright", "install-deps", 'chromium', 'firefox'], 'Playwright dependencies')
|
| 24 |
run_command([sys.executable, "-m", "camoufox", "fetch", '--browserforge'], 'Camoufox browser and databases')
|
|
|
|
| 12 |
|
| 13 |
def run_command(command, line):
|
| 14 |
print(f"Installing {line}...")
|
| 15 |
+
_ = subprocess.check_call(' '.join(command), shell=True)
|
| 16 |
# I meant to not use try except here
|
| 17 |
|
| 18 |
|
| 19 |
@click.command(help="Install all Scrapling's Fetchers dependencies")
|
| 20 |
+
@click.option('-f', '--force', 'force', is_flag=True, default=False, type=bool, help="Force Scrapling to reinstall all Fetchers dependencies")
|
| 21 |
+
def install(force):
|
| 22 |
+
if force or not get_package_dir().joinpath(".scrapling_dependencies_installed").exists():
|
| 23 |
run_command([sys.executable, "-m", "playwright", "install", 'chromium'], 'Playwright browsers')
|
| 24 |
run_command([sys.executable, "-m", "playwright", "install-deps", 'chromium', 'firefox'], 'Playwright dependencies')
|
| 25 |
run_command([sys.executable, "-m", "camoufox", "fetch", '--browserforge'], 'Camoufox browser and databases')
|
scrapling/core/custom_types.py
CHANGED
|
@@ -23,19 +23,8 @@ class TextHandler(str):
|
|
| 23 |
return super().__new__(cls, string)
|
| 24 |
return super().__new__(cls, '')
|
| 25 |
|
| 26 |
-
|
| 27 |
-
def __getitem__(self, key: SupportsIndex) -> 'TextHandler':
|
| 28 |
-
pass
|
| 29 |
-
|
| 30 |
-
@typing.overload
|
| 31 |
-
def __getitem__(self, key: slice) -> "TextHandlers":
|
| 32 |
-
pass
|
| 33 |
-
|
| 34 |
-
def __getitem__(self, key: Union[SupportsIndex, slice]) -> Union["TextHandler", "TextHandlers"]:
|
| 35 |
lst = super().__getitem__(key)
|
| 36 |
-
if isinstance(key, slice):
|
| 37 |
-
lst = [TextHandler(s) for s in lst]
|
| 38 |
-
return TextHandlers(typing.cast(List[_TextHandlerType], lst))
|
| 39 |
return typing.cast(_TextHandlerType, TextHandler(lst))
|
| 40 |
|
| 41 |
def split(self, sep: str = None, maxsplit: SupportsIndex = -1) -> 'TextHandlers':
|
|
|
|
| 23 |
return super().__new__(cls, string)
|
| 24 |
return super().__new__(cls, '')
|
| 25 |
|
| 26 |
+
def __getitem__(self, key: Union[SupportsIndex, slice]) -> "TextHandler":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
lst = super().__getitem__(key)
|
|
|
|
|
|
|
|
|
|
| 28 |
return typing.cast(_TextHandlerType, TextHandler(lst))
|
| 29 |
|
| 30 |
def split(self, sep: str = None, maxsplit: SupportsIndex = -1) -> 'TextHandlers':
|
setup.cfg
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
[metadata]
|
| 2 |
name = scrapling
|
| 3 |
-
version = 0.2.
|
| 4 |
author = Karim Shoair
|
| 5 |
author_email = karim.shoair@pm.me
|
| 6 |
description = Scrapling is an undetectable, powerful, flexible, high-performance Python library that makes Web Scraping easy again!
|
|
|
|
| 1 |
[metadata]
|
| 2 |
name = scrapling
|
| 3 |
+
version = 0.2.96
|
| 4 |
author = Karim Shoair
|
| 5 |
author_email = karim.shoair@pm.me
|
| 6 |
description = Scrapling is an undetectable, powerful, flexible, high-performance Python library that makes Web Scraping easy again!
|
setup.py
CHANGED
|
@@ -6,7 +6,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
|
|
| 6 |
|
| 7 |
setup(
|
| 8 |
name="scrapling",
|
| 9 |
-
version="0.2.
|
| 10 |
description="""Scrapling is an undetectable, powerful, flexible, high-performance Python library that makes Web Scraping easy again! In an internet filled with complications,
|
| 11 |
it simplifies web scraping, even when websites' design changes, while providing impressive speed that surpasses almost all alternatives.""",
|
| 12 |
long_description=long_description,
|
|
|
|
| 6 |
|
| 7 |
setup(
|
| 8 |
name="scrapling",
|
| 9 |
+
version="0.2.96",
|
| 10 |
description="""Scrapling is an undetectable, powerful, flexible, high-performance Python library that makes Web Scraping easy again! In an internet filled with complications,
|
| 11 |
it simplifies web scraping, even when websites' design changes, while providing impressive speed that surpasses almost all alternatives.""",
|
| 12 |
long_description=long_description,
|