Karim shoair commited on
Commit
7d2ea72
·
1 Parent(s): 73f9191

fix: make browser fetchers type hints always present

Browse files
Files changed (1) hide show
  1. scrapling/engines/_browsers/_types.py +90 -95
scrapling/engines/_browsers/_types.py CHANGED
@@ -19,7 +19,6 @@ from scrapling.core._types import (
19
  TypeAlias,
20
  SetCookieParam,
21
  SelectorWaitStates,
22
- TYPE_CHECKING,
23
  )
24
  from scrapling.engines.toolbelt.proxy_rotation import ProxyRotator
25
 
@@ -27,97 +26,93 @@ from scrapling.engines.toolbelt.proxy_rotation import ProxyRotator
27
  ImpersonateType: TypeAlias = BrowserTypeLiteral | List[BrowserTypeLiteral] | None
28
 
29
 
30
- if TYPE_CHECKING: # pragma: no cover
31
- # Types for session initialization
32
- class RequestsSession(TypedDict, total=False):
33
- impersonate: ImpersonateType
34
- http3: Optional[bool]
35
- stealthy_headers: Optional[bool]
36
- proxies: Optional[ProxySpec]
37
- proxy: Optional[str]
38
- proxy_auth: Optional[Tuple[str, str]]
39
- proxy_rotator: Optional[ProxyRotator]
40
- timeout: Optional[int | float]
41
- headers: Optional[Mapping[str, Optional[str]]]
42
- retries: Optional[int]
43
- retry_delay: Optional[int]
44
- follow_redirects: Optional[bool]
45
- max_redirects: Optional[int]
46
- verify: Optional[bool]
47
- cert: Optional[str | Tuple[str, str]]
48
- selector_config: Optional[Dict]
49
-
50
- # Types for GET request method parameters
51
- class GetRequestParams(RequestsSession, total=False):
52
- params: Optional[Dict | List | Tuple]
53
- cookies: Optional[CookieTypes]
54
- auth: Optional[Tuple[str, str]]
55
-
56
- # Types for POST/PUT/DELETE request method parameters
57
- class DataRequestParams(GetRequestParams, total=False):
58
- data: Optional[Dict[str, str] | List[Tuple] | str | BytesIO | bytes]
59
- json: Optional[Dict | List]
60
-
61
- # Types for browser session
62
- class PlaywrightSession(TypedDict, total=False):
63
- max_pages: int
64
- headless: bool
65
- disable_resources: bool
66
- network_idle: bool
67
- load_dom: bool
68
- wait_selector: Optional[str]
69
- wait_selector_state: SelectorWaitStates
70
- cookies: Sequence[SetCookieParam] | None
71
- google_search: bool
72
- wait: int | float
73
- timezone_id: str | None
74
- page_action: Optional[Callable]
75
- proxy: Optional[str | Dict[str, str] | Tuple]
76
- proxy_rotator: Optional[ProxyRotator]
77
- extra_headers: Optional[Dict[str, str]]
78
- timeout: int | float
79
- init_script: Optional[str]
80
- user_data_dir: str
81
- selector_config: Optional[Dict]
82
- additional_args: Optional[Dict]
83
- locale: Optional[str]
84
- real_chrome: bool
85
- cdp_url: Optional[str]
86
- useragent: Optional[str]
87
- extra_flags: Optional[List[str]]
88
- blocked_domains: Optional[Set[str]]
89
- retries: int
90
- retry_delay: int | float
91
-
92
- class PlaywrightFetchParams(TypedDict, total=False):
93
- load_dom: bool
94
- wait: int | float
95
- network_idle: bool
96
- google_search: bool
97
- timeout: int | float
98
- disable_resources: bool
99
- wait_selector: Optional[str]
100
- page_action: Optional[Callable]
101
- selector_config: Optional[Dict]
102
- extra_headers: Optional[Dict[str, str]]
103
- wait_selector_state: SelectorWaitStates
104
- blocked_domains: Optional[Set[str]]
105
- proxy: Optional[str | Dict[str, str]]
106
-
107
- class StealthSession(PlaywrightSession, total=False):
108
- allow_webgl: bool
109
- hide_canvas: bool
110
- block_webrtc: bool
111
- solve_cloudflare: bool
112
-
113
- class StealthFetchParams(PlaywrightFetchParams, total=False):
114
- solve_cloudflare: bool
115
-
116
- else: # pragma: no cover
117
- RequestsSession = TypedDict
118
- GetRequestParams = TypedDict
119
- DataRequestParams = TypedDict
120
- PlaywrightSession = TypedDict
121
- PlaywrightFetchParams = TypedDict
122
- StealthSession = TypedDict
123
- StealthFetchParams = TypedDict
 
19
  TypeAlias,
20
  SetCookieParam,
21
  SelectorWaitStates,
 
22
  )
23
  from scrapling.engines.toolbelt.proxy_rotation import ProxyRotator
24
 
 
26
  ImpersonateType: TypeAlias = BrowserTypeLiteral | List[BrowserTypeLiteral] | None
27
 
28
 
29
+ # Types for session initialization
30
+ class RequestsSession(TypedDict, total=False):
31
+ impersonate: ImpersonateType
32
+ http3: Optional[bool]
33
+ stealthy_headers: Optional[bool]
34
+ proxies: Optional[ProxySpec]
35
+ proxy: Optional[str]
36
+ proxy_auth: Optional[Tuple[str, str]]
37
+ proxy_rotator: Optional[ProxyRotator]
38
+ timeout: Optional[int | float]
39
+ headers: Optional[Mapping[str, Optional[str]]]
40
+ retries: Optional[int]
41
+ retry_delay: Optional[int]
42
+ follow_redirects: Optional[bool]
43
+ max_redirects: Optional[int]
44
+ verify: Optional[bool]
45
+ cert: Optional[str | Tuple[str, str]]
46
+ selector_config: Optional[Dict]
47
+
48
+
49
+ # Types for GET request method parameters
50
+ class GetRequestParams(RequestsSession, total=False):
51
+ params: Optional[Dict | List | Tuple]
52
+ cookies: Optional[CookieTypes]
53
+ auth: Optional[Tuple[str, str]]
54
+
55
+
56
+ # Types for POST/PUT/DELETE request method parameters
57
+ class DataRequestParams(GetRequestParams, total=False):
58
+ data: Optional[Dict[str, str] | List[Tuple] | str | BytesIO | bytes]
59
+ json: Optional[Dict | List]
60
+
61
+
62
+ # Types for browser session
63
+ class PlaywrightSession(TypedDict, total=False):
64
+ max_pages: int
65
+ headless: bool
66
+ disable_resources: bool
67
+ network_idle: bool
68
+ load_dom: bool
69
+ wait_selector: Optional[str]
70
+ wait_selector_state: SelectorWaitStates
71
+ cookies: Sequence[SetCookieParam] | None
72
+ google_search: bool
73
+ wait: int | float
74
+ timezone_id: str | None
75
+ page_action: Optional[Callable]
76
+ proxy: Optional[str | Dict[str, str] | Tuple]
77
+ proxy_rotator: Optional[ProxyRotator]
78
+ extra_headers: Optional[Dict[str, str]]
79
+ timeout: int | float
80
+ init_script: Optional[str]
81
+ user_data_dir: str
82
+ selector_config: Optional[Dict]
83
+ additional_args: Optional[Dict]
84
+ locale: Optional[str]
85
+ real_chrome: bool
86
+ cdp_url: Optional[str]
87
+ useragent: Optional[str]
88
+ extra_flags: Optional[List[str]]
89
+ blocked_domains: Optional[Set[str]]
90
+ retries: int
91
+ retry_delay: int | float
92
+
93
+
94
+ class PlaywrightFetchParams(TypedDict, total=False):
95
+ load_dom: bool
96
+ wait: int | float
97
+ network_idle: bool
98
+ google_search: bool
99
+ timeout: int | float
100
+ disable_resources: bool
101
+ wait_selector: Optional[str]
102
+ page_action: Optional[Callable]
103
+ selector_config: Optional[Dict]
104
+ extra_headers: Optional[Dict[str, str]]
105
+ wait_selector_state: SelectorWaitStates
106
+ blocked_domains: Optional[Set[str]]
107
+ proxy: Optional[str | Dict[str, str]]
108
+
109
+
110
+ class StealthSession(PlaywrightSession, total=False):
111
+ allow_webgl: bool
112
+ hide_canvas: bool
113
+ block_webrtc: bool
114
+ solve_cloudflare: bool
115
+
116
+
117
+ class StealthFetchParams(PlaywrightFetchParams, total=False):
118
+ solve_cloudflare: bool