Karim shoair commited on
Commit ·
ad1b1b0
1
Parent(s): afb246c
feat(cli): adding terminal command `scrapling install`
Browse files- .bandit.yml +2 -0
- MANIFEST.in +3 -0
- scrapling/cli.py +37 -0
- setup.py +6 -0
.bandit.yml
CHANGED
|
@@ -5,3 +5,5 @@ skips:
|
|
| 5 |
- B410
|
| 6 |
- B113 # `Requests call without timeout` these requests are done in the benchmark and examples scripts only
|
| 7 |
- B403 # We are using pickle for tests only
|
|
|
|
|
|
|
|
|
| 5 |
- B410
|
| 6 |
- B113 # `Requests call without timeout` these requests are done in the benchmark and examples scripts only
|
| 7 |
- B403 # We are using pickle for tests only
|
| 8 |
+
- B404 # Using subprocess library
|
| 9 |
+
- B602 # subprocess call with shell=True identified
|
MANIFEST.in
CHANGED
|
@@ -4,7 +4,10 @@ include *.js
|
|
| 4 |
include scrapling/engines/toolbelt/bypasses/*.js
|
| 5 |
include scrapling/*.db
|
| 6 |
include scrapling/*.db*
|
|
|
|
| 7 |
include scrapling/py.typed
|
|
|
|
|
|
|
| 8 |
|
| 9 |
recursive-exclude * __pycache__
|
| 10 |
recursive-exclude * *.py[co]
|
|
|
|
| 4 |
include scrapling/engines/toolbelt/bypasses/*.js
|
| 5 |
include scrapling/*.db
|
| 6 |
include scrapling/*.db*
|
| 7 |
+
include scrapling/*.db-*
|
| 8 |
include scrapling/py.typed
|
| 9 |
+
include scrapling/.scrapling_dependencies_installed
|
| 10 |
+
include .scrapling_dependencies_installed
|
| 11 |
|
| 12 |
recursive-exclude * __pycache__
|
| 13 |
recursive-exclude * *.py[co]
|
scrapling/cli.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import subprocess
|
| 3 |
+
import sys
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
|
| 6 |
+
import click
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def get_package_dir():
|
| 10 |
+
return Path(os.path.dirname(__file__))
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def run_command(command, line):
|
| 14 |
+
print(f"Installing {line}...")
|
| 15 |
+
_ = subprocess.check_call(command, shell=True)
|
| 16 |
+
# I meant to not use try except here
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
@click.command(help="Install all Scrapling's Fetchers dependencies")
|
| 20 |
+
def install():
|
| 21 |
+
if not get_package_dir().joinpath(".scrapling_dependencies_installed").exists():
|
| 22 |
+
run_command([sys.executable, "-m", "playwright", "install", 'chromium'], 'Playwright browsers')
|
| 23 |
+
run_command([sys.executable, "-m", "playwright", "install-deps", 'chromium', 'firefox'], 'Playwright dependencies')
|
| 24 |
+
run_command([sys.executable, "-m", "camoufox", "fetch", '--browserforge'], 'Camoufox browser and databases')
|
| 25 |
+
# if no errors raised by above commands, then we add below file
|
| 26 |
+
get_package_dir().joinpath(".scrapling_dependencies_installed").touch()
|
| 27 |
+
else:
|
| 28 |
+
print('The dependencies are already installed')
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
@click.group()
|
| 32 |
+
def main():
|
| 33 |
+
pass
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
# Adding commands
|
| 37 |
+
main.add_command(install)
|
setup.py
CHANGED
|
@@ -20,6 +20,11 @@ setup(
|
|
| 20 |
package_dir={
|
| 21 |
"scrapling": "scrapling",
|
| 22 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
include_package_data=True,
|
| 24 |
classifiers=[
|
| 25 |
"Operating System :: OS Independent",
|
|
@@ -50,6 +55,7 @@ setup(
|
|
| 50 |
"requests>=2.3",
|
| 51 |
"lxml>=4.5",
|
| 52 |
"cssselect>=1.2",
|
|
|
|
| 53 |
"w3lib",
|
| 54 |
"orjson>=3",
|
| 55 |
"tldextract",
|
|
|
|
| 20 |
package_dir={
|
| 21 |
"scrapling": "scrapling",
|
| 22 |
},
|
| 23 |
+
entry_points={
|
| 24 |
+
'console_scripts': [
|
| 25 |
+
'scrapling=scrapling.cli:main'
|
| 26 |
+
],
|
| 27 |
+
},
|
| 28 |
include_package_data=True,
|
| 29 |
classifiers=[
|
| 30 |
"Operating System :: OS Independent",
|
|
|
|
| 55 |
"requests>=2.3",
|
| 56 |
"lxml>=4.5",
|
| 57 |
"cssselect>=1.2",
|
| 58 |
+
'click',
|
| 59 |
"w3lib",
|
| 60 |
"orjson>=3",
|
| 61 |
"tldextract",
|