Karim shoair commited on
Commit
c2dbf4c
·
1 Parent(s): ac1e174

style: Use shorter and more accurate naming for constants

Browse files
scrapling/engines/_browsers/_base.py CHANGED
@@ -40,11 +40,7 @@ from scrapling.core._types import (
40
  Generator,
41
  AsyncGenerator,
42
  )
43
- from scrapling.engines.constants import (
44
- DEFAULT_STEALTH_FLAGS,
45
- HARMFUL_DEFAULT_ARGS,
46
- DEFAULT_FLAGS,
47
- )
48
 
49
 
50
  class SyncSession:
@@ -389,8 +385,8 @@ class BaseSessionMixin:
389
  # Dark color scheme bypasses the 'prefersLightColor' check in creepjs
390
  self._context_options: Dict[str, Any] = {"color_scheme": "dark", "device_scale_factor": 2}
391
  self._browser_options: Dict[str, Any] = {
392
- "args": DEFAULT_FLAGS,
393
- "ignore_default_args": HARMFUL_DEFAULT_ARGS,
394
  }
395
  if "__max_pages" in params:
396
  params["max_pages"] = params.pop("__max_pages")
@@ -484,7 +480,7 @@ class StealthySessionMixin(BaseSessionMixin):
484
  config = cast(StealthConfig, self._config)
485
  flags: Tuple[str, ...] = tuple()
486
  if not config.cdp_url:
487
- flags = DEFAULT_FLAGS + DEFAULT_STEALTH_FLAGS
488
 
489
  if config.block_webrtc:
490
  flags += (
 
40
  Generator,
41
  AsyncGenerator,
42
  )
43
+ from scrapling.engines.constants import STEALTH_ARGS, HARMFUL_ARGS, DEFAULT_ARGS
 
 
 
 
44
 
45
 
46
  class SyncSession:
 
385
  # Dark color scheme bypasses the 'prefersLightColor' check in creepjs
386
  self._context_options: Dict[str, Any] = {"color_scheme": "dark", "device_scale_factor": 2}
387
  self._browser_options: Dict[str, Any] = {
388
+ "args": DEFAULT_ARGS,
389
+ "ignore_default_args": HARMFUL_ARGS,
390
  }
391
  if "__max_pages" in params:
392
  params["max_pages"] = params.pop("__max_pages")
 
480
  config = cast(StealthConfig, self._config)
481
  flags: Tuple[str, ...] = tuple()
482
  if not config.cdp_url:
483
+ flags = DEFAULT_ARGS + STEALTH_ARGS
484
 
485
  if config.block_webrtc:
486
  flags += (
scrapling/engines/constants.py CHANGED
@@ -1,5 +1,5 @@
1
  # Disable loading these resources for speed
2
- DEFAULT_DISABLED_RESOURCES = {
3
  "font",
4
  "image",
5
  "media",
@@ -12,7 +12,7 @@ DEFAULT_DISABLED_RESOURCES = {
12
  "stylesheet",
13
  }
14
 
15
- HARMFUL_DEFAULT_ARGS = (
16
  # This will be ignored to avoid detection more and possibly avoid the popup crashing bug abuse: https://issues.chromium.org/issues/340836884
17
  "--enable-automation",
18
  "--disable-popup-blocking",
@@ -21,7 +21,7 @@ HARMFUL_DEFAULT_ARGS = (
21
  "--disable-extensions",
22
  )
23
 
24
- DEFAULT_FLAGS = (
25
  # Speed up chromium browsers by default
26
  "--no-pings",
27
  "--no-first-run",
@@ -36,7 +36,7 @@ DEFAULT_FLAGS = (
36
  "--disable-search-engine-choice-screen",
37
  )
38
 
39
- DEFAULT_STEALTH_FLAGS = (
40
  # Explanation: https://peter.sh/experiments/chromium-command-line-switches/
41
  # Generally this will make the browser faster and less detectable
42
  # "--incognito",
 
1
  # Disable loading these resources for speed
2
+ EXTRA_RESOURCES = {
3
  "font",
4
  "image",
5
  "media",
 
12
  "stylesheet",
13
  }
14
 
15
+ HARMFUL_ARGS = (
16
  # This will be ignored to avoid detection more and possibly avoid the popup crashing bug abuse: https://issues.chromium.org/issues/340836884
17
  "--enable-automation",
18
  "--disable-popup-blocking",
 
21
  "--disable-extensions",
22
  )
23
 
24
+ DEFAULT_ARGS = (
25
  # Speed up chromium browsers by default
26
  "--no-pings",
27
  "--no-first-run",
 
36
  "--disable-search-engine-choice-screen",
37
  )
38
 
39
+ STEALTH_ARGS = (
40
  # Explanation: https://peter.sh/experiments/chromium-command-line-switches/
41
  # Generally this will make the browser faster and less detectable
42
  # "--incognito",
scrapling/engines/toolbelt/navigation.py CHANGED
@@ -12,7 +12,7 @@ from playwright.sync_api import Route
12
 
13
  from scrapling.core.utils import log
14
  from scrapling.core._types import Dict, Set, Tuple, Optional, Callable
15
- from scrapling.engines.constants import DEFAULT_DISABLED_RESOURCES
16
 
17
  __BYPASSES_DIR__ = Path(__file__).parent / "bypasses"
18
 
@@ -30,7 +30,7 @@ def create_intercept_handler(disable_resources: bool, blocked_domains: Optional[
30
  :param blocked_domains: Set of domain names to block requests to.
31
  :return: A sync route handler function.
32
  """
33
- disabled_resources = DEFAULT_DISABLED_RESOURCES if disable_resources else set()
34
  domains = blocked_domains or set()
35
 
36
  def handler(route: Route):
@@ -57,7 +57,7 @@ def create_async_intercept_handler(disable_resources: bool, blocked_domains: Opt
57
  :param blocked_domains: Set of domain names to block requests to.
58
  :return: An async route handler function.
59
  """
60
- disabled_resources = DEFAULT_DISABLED_RESOURCES if disable_resources else set()
61
  domains = blocked_domains or set()
62
 
63
  async def handler(route: async_Route):
 
12
 
13
  from scrapling.core.utils import log
14
  from scrapling.core._types import Dict, Set, Tuple, Optional, Callable
15
+ from scrapling.engines.constants import EXTRA_RESOURCES
16
 
17
  __BYPASSES_DIR__ = Path(__file__).parent / "bypasses"
18
 
 
30
  :param blocked_domains: Set of domain names to block requests to.
31
  :return: A sync route handler function.
32
  """
33
+ disabled_resources = EXTRA_RESOURCES if disable_resources else set()
34
  domains = blocked_domains or set()
35
 
36
  def handler(route: Route):
 
57
  :param blocked_domains: Set of domain names to block requests to.
58
  :return: An async route handler function.
59
  """
60
+ disabled_resources = EXTRA_RESOURCES if disable_resources else set()
61
  domains = blocked_domains or set()
62
 
63
  async def handler(route: async_Route):
tests/fetchers/test_constants.py CHANGED
@@ -1,9 +1,4 @@
1
- from scrapling.engines.constants import (
2
- DEFAULT_DISABLED_RESOURCES,
3
- DEFAULT_STEALTH_FLAGS,
4
- HARMFUL_DEFAULT_ARGS,
5
- DEFAULT_FLAGS,
6
- )
7
 
8
 
9
  class TestConstants:
@@ -11,18 +6,18 @@ class TestConstants:
11
 
12
  def test_default_disabled_resources(self):
13
  """Test default disabled resources"""
14
- assert "image" in DEFAULT_DISABLED_RESOURCES
15
- assert "font" in DEFAULT_DISABLED_RESOURCES
16
- assert "stylesheet" in DEFAULT_DISABLED_RESOURCES
17
- assert "media" in DEFAULT_DISABLED_RESOURCES
18
 
19
  def test_harmful_default_args(self):
20
  """Test harmful default arguments"""
21
- assert "--enable-automation" in HARMFUL_DEFAULT_ARGS
22
- assert "--disable-popup-blocking" in HARMFUL_DEFAULT_ARGS
23
 
24
  def test_flags(self):
25
  """Test default stealth flags"""
26
- assert "--no-pings" in DEFAULT_FLAGS
27
- # assert "--incognito" in DEFAULT_STEALTH_FLAGS
28
- assert "--disable-blink-features=AutomationControlled" in DEFAULT_STEALTH_FLAGS
 
1
+ from scrapling.engines.constants import EXTRA_RESOURCES, STEALTH_ARGS, HARMFUL_ARGS, DEFAULT_ARGS
 
 
 
 
 
2
 
3
 
4
  class TestConstants:
 
6
 
7
  def test_default_disabled_resources(self):
8
  """Test default disabled resources"""
9
+ assert "image" in EXTRA_RESOURCES
10
+ assert "font" in EXTRA_RESOURCES
11
+ assert "stylesheet" in EXTRA_RESOURCES
12
+ assert "media" in EXTRA_RESOURCES
13
 
14
  def test_harmful_default_args(self):
15
  """Test harmful default arguments"""
16
+ assert "--enable-automation" in HARMFUL_ARGS
17
+ assert "--disable-popup-blocking" in HARMFUL_ARGS
18
 
19
  def test_flags(self):
20
  """Test default stealth flags"""
21
+ assert "--no-pings" in DEFAULT_ARGS
22
+ # assert "--incognito" in STEALTH_ARGS
23
+ assert "--disable-blink-features=AutomationControlled" in STEALTH_ARGS
tests/fetchers/test_utils.py CHANGED
@@ -8,7 +8,6 @@ from scrapling.engines.toolbelt.navigation import (
8
  create_async_intercept_handler,
9
  js_bypass_path,
10
  )
11
- from scrapling.engines.constants import DEFAULT_DISABLED_RESOURCES
12
  from scrapling.engines.toolbelt.fingerprints import (
13
  generate_convincing_referer,
14
  get_os_name,
 
8
  create_async_intercept_handler,
9
  js_bypass_path,
10
  )
 
11
  from scrapling.engines.toolbelt.fingerprints import (
12
  generate_convincing_referer,
13
  get_os_name,