Karim shoair commited on
Commit
e39bf62
·
1 Parent(s): 76e6484

refactor(DynamicSession): Optimization + Removing `max_pages` from sync version

Browse files
scrapling/engines/_browsers/_controllers.py CHANGED
@@ -80,7 +80,7 @@ class DynamicSession:
80
 
81
  def __init__(
82
  self,
83
- max_pages: int = 1,
84
  headless: bool = True,
85
  google_search: bool = True,
86
  hide_canvas: bool = False,
@@ -102,7 +102,7 @@ class DynamicSession:
102
  wait_selector_state: SelectorWaitStates = "attached",
103
  adaptor_arguments: Optional[Dict] = None,
104
  ):
105
- """A Browser session manager with page pooling
106
 
107
  :param headless: Run the browser in headless/hidden (default), or headful/visible mode.
108
  :param disable_resources: Drop requests of unnecessary resources for a speed boost. It depends, but it made requests ~25% faster in my tests for some websites.
@@ -125,12 +125,11 @@ class DynamicSession:
125
  :param google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search of this website's domain name.
126
  :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
127
  :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
128
- :param max_pages: The maximum number of tabs to be opened at the same time. It will be used in rotation through a PagePool.
129
  :param adaptor_arguments: The arguments that will be passed in the end while creating the final Adaptor's class.
130
  """
131
 
132
  params = {
133
- "max_pages": max_pages,
134
  "headless": headless,
135
  "google_search": google_search,
136
  "hide_canvas": hide_canvas,
@@ -188,38 +187,46 @@ class DynamicSession:
188
  self.__initiate_browser_options__()
189
 
190
  def __initiate_browser_options__(self):
191
- # `launch_options` is used with persistent context
192
- self.launch_options = dict(
193
- _launch_kwargs(
194
- self.headless,
195
- self.proxy,
196
- self.locale,
197
- tuple(self.extra_headers.items()) if self.extra_headers else tuple(),
198
- self.useragent,
199
- self.real_chrome,
200
- self.stealth,
201
- self.hide_canvas,
202
- self.disable_webgl,
 
 
 
 
203
  )
204
- )
205
- self.launch_options["extra_http_headers"] = dict(
206
- self.launch_options["extra_http_headers"]
207
- )
208
- self.launch_options["proxy"] = dict(self.launch_options["proxy"]) or None
209
- # while `context_options` is left to be used when cdp mode is enabled
210
- self.context_options = dict(
211
- _context_kwargs(
212
- self.proxy,
213
- self.locale,
214
- tuple(self.extra_headers.items()) if self.extra_headers else tuple(),
215
- self.useragent,
216
- self.stealth,
217
  )
218
- )
219
- self.context_options["extra_http_headers"] = dict(
220
- self.context_options["extra_http_headers"]
221
- )
222
- self.context_options["proxy"] = dict(self.context_options["proxy"]) or None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
 
224
  def __create__(self):
225
  """Create a browser for this instance and context."""
@@ -386,7 +393,7 @@ class DynamicSession:
386
 
387
 
388
  class AsyncDynamicSession(DynamicSession):
389
- """A Browser session manager with page pooling"""
390
 
391
  def __init__(
392
  self,
 
80
 
81
  def __init__(
82
  self,
83
+ __max_pages: int = 1,
84
  headless: bool = True,
85
  google_search: bool = True,
86
  hide_canvas: bool = False,
 
102
  wait_selector_state: SelectorWaitStates = "attached",
103
  adaptor_arguments: Optional[Dict] = None,
104
  ):
105
+ """A Browser session manager with page pooling, it's using a persistent browser Context by default with a temporary user profile directory.
106
 
107
  :param headless: Run the browser in headless/hidden (default), or headful/visible mode.
108
  :param disable_resources: Drop requests of unnecessary resources for a speed boost. It depends, but it made requests ~25% faster in my tests for some websites.
 
125
  :param google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search of this website's domain name.
126
  :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
127
  :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
 
128
  :param adaptor_arguments: The arguments that will be passed in the end while creating the final Adaptor's class.
129
  """
130
 
131
  params = {
132
+ "max_pages": __max_pages,
133
  "headless": headless,
134
  "google_search": google_search,
135
  "hide_canvas": hide_canvas,
 
187
  self.__initiate_browser_options__()
188
 
189
  def __initiate_browser_options__(self):
190
+ if self.cdp_url:
191
+ # `launch_options` is used with persistent context
192
+ self.launch_options = dict(
193
+ _launch_kwargs(
194
+ self.headless,
195
+ self.proxy,
196
+ self.locale,
197
+ tuple(self.extra_headers.items())
198
+ if self.extra_headers
199
+ else tuple(),
200
+ self.useragent,
201
+ self.real_chrome,
202
+ self.stealth,
203
+ self.hide_canvas,
204
+ self.disable_webgl,
205
+ )
206
  )
207
+ self.launch_options["extra_http_headers"] = dict(
208
+ self.launch_options["extra_http_headers"]
 
 
 
 
 
 
 
 
 
 
 
209
  )
210
+ self.launch_options["proxy"] = dict(self.launch_options["proxy"]) or None
211
+ self.context_options = dict()
212
+ else:
213
+ # while `context_options` is left to be used when cdp mode is enabled
214
+ self.launch_options = dict()
215
+ self.context_options = dict(
216
+ _context_kwargs(
217
+ self.proxy,
218
+ self.locale,
219
+ tuple(self.extra_headers.items())
220
+ if self.extra_headers
221
+ else tuple(),
222
+ self.useragent,
223
+ self.stealth,
224
+ )
225
+ )
226
+ self.context_options["extra_http_headers"] = dict(
227
+ self.context_options["extra_http_headers"]
228
+ )
229
+ self.context_options["proxy"] = dict(self.context_options["proxy"]) or None
230
 
231
  def __create__(self):
232
  """Create a browser for this instance and context."""
 
393
 
394
 
395
  class AsyncDynamicSession(DynamicSession):
396
+ """An async Browser session manager with page pooling, it's using a persistent browser Context by default with a temporary user profile directory."""
397
 
398
  def __init__(
399
  self,