Spaces:

lenson78
/

Scrapling

Paused

Karim shoair

fix: update code and docstrings to remove the old google referer logic

a28879b about 1 month ago

35.1 kB

	from asyncio import gather

	from mcp.server.fastmcp import FastMCP
	from pydantic import BaseModel, Field

	from scrapling.core.shell import Convertor
	from scrapling.engines.toolbelt.custom import Response as _ScraplingResponse
	from scrapling.engines.static import ImpersonateType
	from scrapling.fetchers import (
	Fetcher,
	FetcherSession,
	DynamicFetcher,
	AsyncDynamicSession,
	StealthyFetcher,
	AsyncStealthySession,
	)
	from scrapling.core._types import (
	Optional,
	Tuple,
	Mapping,
	Dict,
	List,
	Any,
	Generator,
	Sequence,
	SetCookieParam,
	extraction_types,
	SelectorWaitStates,
	)


	class ResponseModel(BaseModel):
	"""Request's response information structure."""

	status: int = Field(description="The status code returned by the website.")
	content: list[str] = Field(description="The content as Markdown/HTML or the text content of the page.")
	url: str = Field(description="The URL given by the user that resulted in this response.")


	def _content_translator(content: Generator[str, None, None], page: _ScraplingResponse) -> ResponseModel:
	"""Convert a content generator to a list of ResponseModel objects."""
	return ResponseModel(status=page.status, content=[result for result in content], url=page.url)


	def _normalize_credentials(credentials: Optional[Dict[str, str]]) -> Optional[Tuple[str, str]]:
	"""Convert a credentials dictionary to a tuple accepted by fetchers."""
	if not credentials:
	return None

	username = credentials.get("username")
	password = credentials.get("password")

	if username is None or password is None:
	raise ValueError("Credentials dictionary must contain both 'username' and 'password' keys")

	return username, password


	class ScraplingMCPServer:
	@staticmethod
	def get(
	url: str,
	impersonate: ImpersonateType = "chrome",
	extraction_type: extraction_types = "markdown",
	css_selector: Optional[str] = None,
	main_content_only: bool = True,
	params: Optional[Dict] = None,
	headers: Optional[Mapping[str, Optional[str]]] = None,
	cookies: Optional[Dict[str, str]] = None,
	timeout: Optional[int \| float] = 30,
	follow_redirects: bool = True,
	max_redirects: int = 30,
	retries: Optional[int] = 3,
	retry_delay: Optional[int] = 1,
	proxy: Optional[str] = None,
	proxy_auth: Optional[Dict[str, str]] = None,
	auth: Optional[Dict[str, str]] = None,
	verify: Optional[bool] = True,
	http3: Optional[bool] = False,
	stealthy_headers: Optional[bool] = True,
	) -> ResponseModel:
	"""Make GET HTTP request to a URL and return a structured output of the result.
	Note: This is only suitable for low-mid protection levels. For high-protection levels or websites that require JS loading, use the other tools directly.
	Note: If the `css_selector` resolves to more than one element, all the elements will be returned.

	:param url: The URL to request.
	:param impersonate: Browser version to impersonate its fingerprint. It's using the latest chrome version by default.
	:param extraction_type: The type of content to extract from the page. Defaults to "markdown". Options are:
	- Markdown will convert the page content to Markdown format.
	- HTML will return the raw HTML content of the page.
	- Text will return the text content of the page.
	:param css_selector: CSS selector to extract the content from the page. If main_content_only is True, then it will be executed on the main content of the page. Defaults to None.
	:param main_content_only: Whether to extract only the main content of the page. Defaults to True. The main content here is the data inside the `<body>` tag.
	:param params: Query string parameters for the request.
	:param headers: Headers to include in the request.
	:param cookies: Cookies to use in the request.
	:param timeout: Number of seconds to wait before timing out.
	:param follow_redirects: Whether to follow redirects. Defaults to True.
	:param max_redirects: Maximum number of redirects. Default 30, use -1 for unlimited.
	:param retries: Number of retry attempts. Defaults to 3.
	:param retry_delay: Number of seconds to wait between retry attempts. Defaults to 1 second.
	:param proxy: Proxy URL to use. Format: "http://username:password@localhost:8030".
	Cannot be used together with the `proxies` parameter.
	:param proxy_auth: HTTP basic auth for proxy in dictionary format with `username` and `password` keys.
	:param auth: HTTP basic auth in dictionary format with `username` and `password` keys.
	:param verify: Whether to verify HTTPS certificates.
	:param http3: Whether to use HTTP3. Defaults to False. It might be problematic if used it with `impersonate`.
	:param stealthy_headers: If enabled (default), it creates and adds real browser headers. It also sets a Google referer header.
	"""
	normalized_proxy_auth = _normalize_credentials(proxy_auth)
	normalized_auth = _normalize_credentials(auth)

	page = Fetcher.get(
	url,
	auth=normalized_auth,
	proxy=proxy,
	http3=http3,
	verify=verify,
	params=params,
	proxy_auth=normalized_proxy_auth,
	retry_delay=retry_delay,
	stealthy_headers=stealthy_headers,
	impersonate=impersonate,
	headers=headers,
	cookies=cookies,
	timeout=timeout,
	retries=retries,
	max_redirects=max_redirects,
	follow_redirects=follow_redirects,
	)
	return _content_translator(
	Convertor._extract_content(
	page,
	css_selector=css_selector,
	extraction_type=extraction_type,
	main_content_only=main_content_only,
	),
	page,
	)

	@staticmethod
	async def bulk_get(
	urls: List[str],
	impersonate: ImpersonateType = "chrome",
	extraction_type: extraction_types = "markdown",
	css_selector: Optional[str] = None,
	main_content_only: bool = True,
	params: Optional[Dict] = None,
	headers: Optional[Mapping[str, Optional[str]]] = None,
	cookies: Optional[Dict[str, str]] = None,
	timeout: Optional[int \| float] = 30,
	follow_redirects: bool = True,
	max_redirects: int = 30,
	retries: Optional[int] = 3,
	retry_delay: Optional[int] = 1,
	proxy: Optional[str] = None,
	proxy_auth: Optional[Dict[str, str]] = None,
	auth: Optional[Dict[str, str]] = None,
	verify: Optional[bool] = True,
	http3: Optional[bool] = False,
	stealthy_headers: Optional[bool] = True,
	) -> List[ResponseModel]:
	"""Make GET HTTP request to a group of URLs and for each URL, return a structured output of the result.
	Note: This is only suitable for low-mid protection levels. For high-protection levels or websites that require JS loading, use the other tools directly.
	Note: If the `css_selector` resolves to more than one element, all the elements will be returned.

	:param urls: A list of the URLs to request.
	:param impersonate: Browser version to impersonate its fingerprint. It's using the latest chrome version by default.
	:param extraction_type: The type of content to extract from the page. Defaults to "markdown". Options are:
	- Markdown will convert the page content to Markdown format.
	- HTML will return the raw HTML content of the page.
	- Text will return the text content of the page.
	:param css_selector: CSS selector to extract the content from the page. If main_content_only is True, then it will be executed on the main content of the page. Defaults to None.
	:param main_content_only: Whether to extract only the main content of the page. Defaults to True. The main content here is the data inside the `<body>` tag.
	:param params: Query string parameters for the request.
	:param headers: Headers to include in the request.
	:param cookies: Cookies to use in the request.
	:param timeout: Number of seconds to wait before timing out.
	:param follow_redirects: Whether to follow redirects. Defaults to True.
	:param max_redirects: Maximum number of redirects. Default 30, use -1 for unlimited.
	:param retries: Number of retry attempts. Defaults to 3.
	:param retry_delay: Number of seconds to wait between retry attempts. Defaults to 1 second.
	:param proxy: Proxy URL to use. Format: "http://username:password@localhost:8030".
	Cannot be used together with the `proxies` parameter.
	:param proxy_auth: HTTP basic auth for proxy in dictionary format with `username` and `password` keys.
	:param auth: HTTP basic auth in dictionary format with `username` and `password` keys.
	:param verify: Whether to verify HTTPS certificates.
	:param http3: Whether to use HTTP3. Defaults to False. It might be problematic if used it with `impersonate`.
	:param stealthy_headers: If enabled (default), it creates and adds real browser headers. It also sets a Google referer header.
	"""
	normalized_proxy_auth = _normalize_credentials(proxy_auth)
	normalized_auth = _normalize_credentials(auth)

	async with FetcherSession() as session:
	tasks: List[Any] = [
	session.get(
	url,
	auth=normalized_auth,
	proxy=proxy,
	http3=http3,
	verify=verify,
	params=params,
	headers=headers,
	cookies=cookies,
	timeout=timeout,
	retries=retries,
	proxy_auth=normalized_proxy_auth,
	retry_delay=retry_delay,
	impersonate=impersonate,
	max_redirects=max_redirects,
	follow_redirects=follow_redirects,
	stealthy_headers=stealthy_headers,
	)
	for url in urls
	]
	responses = await gather(*tasks)
	return [
	_content_translator(
	Convertor._extract_content(
	page,
	css_selector=css_selector,
	extraction_type=extraction_type,
	main_content_only=main_content_only,
	),
	page,
	)
	for page in responses
	]

	@staticmethod
	async def fetch(
	url: str,
	extraction_type: extraction_types = "markdown",
	css_selector: Optional[str] = None,
	main_content_only: bool = True,
	headless: bool = True, # noqa: F821
	google_search: bool = True,
	real_chrome: bool = False,
	wait: int \| float = 0,
	proxy: Optional[str \| Dict[str, str]] = None,
	timezone_id: str \| None = None,
	locale: str \| None = None,
	extra_headers: Optional[Dict[str, str]] = None,
	useragent: Optional[str] = None,
	cdp_url: Optional[str] = None,
	timeout: int \| float = 30000,
	disable_resources: bool = False,
	wait_selector: Optional[str] = None,
	cookies: Sequence[SetCookieParam] \| None = None,
	network_idle: bool = False,
	wait_selector_state: SelectorWaitStates = "attached",
	) -> ResponseModel:
	"""Use playwright to open a browser to fetch a URL and return a structured output of the result.
	Note: This is only suitable for low-mid protection levels.
	Note: If the `css_selector` resolves to more than one element, all the elements will be returned.

	:param url: The URL to request.
	:param extraction_type: The type of content to extract from the page. Defaults to "markdown". Options are:
	- Markdown will convert the page content to Markdown format.
	- HTML will return the raw HTML content of the page.
	- Text will return the text content of the page.
	:param css_selector: CSS selector to extract the content from the page. If main_content_only is True, then it will be executed on the main content of the page. Defaults to None.
	:param main_content_only: Whether to extract only the main content of the page. Defaults to True. The main content here is the data inside the `<body>` tag.
	:param headless: Run the browser in headless/hidden (default), or headful/visible mode.
	:param disable_resources: Drop requests for unnecessary resources for a speed boost.
	Requests dropped are of type `font`, `image`, `media`, `beacon`, `object`, `imageset`, `texttrack`, `websocket`, `csp_report`, and `stylesheet`.
	:param useragent: Pass a useragent string to be used. Otherwise the fetcher will generate a real Useragent of the same browser and use it.
	:param cookies: Set cookies for the next request. It should be in a dictionary format that Playwright accepts.
	:param network_idle: Wait for the page until there are no network connections for at least 500 ms.
	:param timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30,000
	:param wait: The time (milliseconds) the fetcher will wait after everything finishes before closing the page and returning the ` Response ` object.
	:param wait_selector: Wait for a specific CSS selector to be in a specific state.
	:param timezone_id: Changes the timezone of the browser. Defaults to the system timezone.
	:param locale: Specify user locale, for example, `en-GB`, `de-DE`, etc. Locale will affect navigator.language value, Accept-Language request header value as well as number and date formatting
	rules. Defaults to the system default locale.
	:param wait_selector_state: The state to wait for the selector given with `wait_selector`. The default state is `attached`.
	:param real_chrome: If you have a Chrome browser installed on your device, enable this, and the Fetcher will launch an instance of your browser and use it.
	:param cdp_url: Instead of launching a new browser instance, connect to this CDP URL to control real browsers through CDP.
	:param google_search: Enabled by default, Scrapling will set a Google referer header.
	:param extra_headers: A dictionary of extra headers to add to the request. _The referer set by `google_search` takes priority over the referer set here if used together._
	:param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
	"""
	page = await DynamicFetcher.async_fetch(
	url,
	wait=wait,
	proxy=proxy,
	locale=locale,
	timeout=timeout,
	cookies=cookies,
	cdp_url=cdp_url,
	headless=headless,
	useragent=useragent,
	timezone_id=timezone_id,
	real_chrome=real_chrome,
	network_idle=network_idle,
	wait_selector=wait_selector,
	extra_headers=extra_headers,
	google_search=google_search,
	disable_resources=disable_resources,
	wait_selector_state=wait_selector_state,
	)
	return _content_translator(
	Convertor._extract_content(
	page,
	css_selector=css_selector,
	extraction_type=extraction_type,
	main_content_only=main_content_only,
	),
	page,
	)

	@staticmethod
	async def bulk_fetch(
	urls: List[str],
	extraction_type: extraction_types = "markdown",
	css_selector: Optional[str] = None,
	main_content_only: bool = True,
	headless: bool = True, # noqa: F821
	google_search: bool = True,
	real_chrome: bool = False,
	wait: int \| float = 0,
	proxy: Optional[str \| Dict[str, str]] = None,
	timezone_id: str \| None = None,
	locale: str \| None = None,
	extra_headers: Optional[Dict[str, str]] = None,
	useragent: Optional[str] = None,
	cdp_url: Optional[str] = None,
	timeout: int \| float = 30000,
	disable_resources: bool = False,
	wait_selector: Optional[str] = None,
	cookies: Sequence[SetCookieParam] \| None = None,
	network_idle: bool = False,
	wait_selector_state: SelectorWaitStates = "attached",
	) -> List[ResponseModel]:
	"""Use playwright to open a browser, then fetch a group of URLs at the same time, and for each page return a structured output of the result.
	Note: This is only suitable for low-mid protection levels.
	Note: If the `css_selector` resolves to more than one element, all the elements will be returned.

	:param urls: A list of the URLs to request.
	:param extraction_type: The type of content to extract from the page. Defaults to "markdown". Options are:
	- Markdown will convert the page content to Markdown format.
	- HTML will return the raw HTML content of the page.
	- Text will return the text content of the page.
	:param css_selector: CSS selector to extract the content from the page. If main_content_only is True, then it will be executed on the main content of the page. Defaults to None.
	:param main_content_only: Whether to extract only the main content of the page. Defaults to True. The main content here is the data inside the `<body>` tag.
	:param headless: Run the browser in headless/hidden (default), or headful/visible mode.
	:param disable_resources: Drop requests for unnecessary resources for a speed boost.
	Requests dropped are of type `font`, `image`, `media`, `beacon`, `object`, `imageset`, `texttrack`, `websocket`, `csp_report`, and `stylesheet`.
	:param useragent: Pass a useragent string to be used. Otherwise the fetcher will generate a real Useragent of the same browser and use it.
	:param cookies: Set cookies for the next request. It should be in a dictionary format that Playwright accepts.
	:param network_idle: Wait for the page until there are no network connections for at least 500 ms.
	:param timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30,000
	:param wait: The time (milliseconds) the fetcher will wait after everything finishes before closing the page and returning the ` Response ` object.
	:param wait_selector: Wait for a specific CSS selector to be in a specific state.
	:param timezone_id: Changes the timezone of the browser. Defaults to the system timezone.
	:param locale: Specify user locale, for example, `en-GB`, `de-DE`, etc. Locale will affect navigator.language value, Accept-Language request header value as well as number and date formatting
	rules. Defaults to the system default locale.
	:param wait_selector_state: The state to wait for the selector given with `wait_selector`. The default state is `attached`.
	:param real_chrome: If you have a Chrome browser installed on your device, enable this, and the Fetcher will launch an instance of your browser and use it.
	:param cdp_url: Instead of launching a new browser instance, connect to this CDP URL to control real browsers through CDP.
	:param google_search: Enabled by default, Scrapling will set a Google referer header.
	:param extra_headers: A dictionary of extra headers to add to the request. _The referer set by `google_search` takes priority over the referer set here if used together._
	:param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
	"""
	async with AsyncDynamicSession(
	wait=wait,
	proxy=proxy,
	locale=locale,
	timeout=timeout,
	cookies=cookies,
	cdp_url=cdp_url,
	headless=headless,
	max_pages=len(urls),
	useragent=useragent,
	timezone_id=timezone_id,
	real_chrome=real_chrome,
	network_idle=network_idle,
	wait_selector=wait_selector,
	google_search=google_search,
	extra_headers=extra_headers,
	disable_resources=disable_resources,
	wait_selector_state=wait_selector_state,
	) as session:
	tasks = [session.fetch(url) for url in urls]
	responses = await gather(*tasks)
	return [
	_content_translator(
	Convertor._extract_content(
	page,
	css_selector=css_selector,
	extraction_type=extraction_type,
	main_content_only=main_content_only,
	),
	page,
	)
	for page in responses
	]

	@staticmethod
	async def stealthy_fetch(
	url: str,
	extraction_type: extraction_types = "markdown",
	css_selector: Optional[str] = None,
	main_content_only: bool = True,
	headless: bool = True, # noqa: F821
	google_search: bool = True,
	real_chrome: bool = False,
	wait: int \| float = 0,
	proxy: Optional[str \| Dict[str, str]] = None,
	timezone_id: str \| None = None,
	locale: str \| None = None,
	extra_headers: Optional[Dict[str, str]] = None,
	useragent: Optional[str] = None,
	hide_canvas: bool = False,
	cdp_url: Optional[str] = None,
	timeout: int \| float = 30000,
	disable_resources: bool = False,
	wait_selector: Optional[str] = None,
	cookies: Sequence[SetCookieParam] \| None = None,
	network_idle: bool = False,
	wait_selector_state: SelectorWaitStates = "attached",
	block_webrtc: bool = False,
	allow_webgl: bool = True,
	solve_cloudflare: bool = False,
	additional_args: Optional[Dict] = None,
	) -> ResponseModel:
	"""Use the stealthy fetcher to fetch a URL and return a structured output of the result.
	Note: This is the only suitable fetcher for high protection levels.
	Note: If the `css_selector` resolves to more than one element, all the elements will be returned.

	:param url: The URL to request.
	:param extraction_type: The type of content to extract from the page. Defaults to "markdown". Options are:
	- Markdown will convert the page content to Markdown format.
	- HTML will return the raw HTML content of the page.
	- Text will return the text content of the page.
	:param css_selector: CSS selector to extract the content from the page. If main_content_only is True, then it will be executed on the main content of the page. Defaults to None.
	:param main_content_only: Whether to extract only the main content of the page. Defaults to True. The main content here is the data inside the `<body>` tag.
	:param headless: Run the browser in headless/hidden (default), or headful/visible mode.
	:param disable_resources: Drop requests for unnecessary resources for a speed boost.
	Requests dropped are of type `font`, `image`, `media`, `beacon`, `object`, `imageset`, `texttrack`, `websocket`, `csp_report`, and `stylesheet`.
	:param useragent: Pass a useragent string to be used. Otherwise the fetcher will generate a real Useragent of the same browser and use it.
	:param cookies: Set cookies for the next request.
	:param solve_cloudflare: Solves all types of the Cloudflare's Turnstile/Interstitial challenges before returning the response to you.
	:param allow_webgl: Enabled by default. Disabling WebGL is not recommended as many WAFs now check if WebGL is enabled.
	:param network_idle: Wait for the page until there are no network connections for at least 500 ms.
	:param wait: The time (milliseconds) the fetcher will wait after everything finishes before closing the page and returning the ` Response ` object.
	:param timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30,000
	:param wait_selector: Wait for a specific CSS selector to be in a specific state.
	:param timezone_id: Changes the timezone of the browser. Defaults to the system timezone.
	:param locale: Specify user locale, for example, `en-GB`, `de-DE`, etc. Locale will affect navigator.language value, Accept-Language request header value as well as number and date formatting
	rules. Defaults to the system default locale.
	:param wait_selector_state: The state to wait for the selector given with `wait_selector`. The default state is `attached`.
	:param real_chrome: If you have a Chrome browser installed on your device, enable this, and the Fetcher will launch an instance of your browser and use it.
	:param hide_canvas: Add random noise to canvas operations to prevent fingerprinting.
	:param block_webrtc: Forces WebRTC to respect proxy settings to prevent local IP address leak.
	:param cdp_url: Instead of launching a new browser instance, connect to this CDP URL to control real browsers through CDP.
	:param google_search: Enabled by default, Scrapling will set a Google referer header.
	:param extra_headers: A dictionary of extra headers to add to the request. _The referer set by `google_search` takes priority over the referer set here if used together._
	:param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
	:param additional_args: Additional arguments to be passed to Playwright's context as additional settings, and it takes higher priority than Scrapling's settings.
	"""
	page = await StealthyFetcher.async_fetch(
	url,
	wait=wait,
	proxy=proxy,
	locale=locale,
	cdp_url=cdp_url,
	timeout=timeout,
	cookies=cookies,
	headless=headless,
	useragent=useragent,
	timezone_id=timezone_id,
	real_chrome=real_chrome,
	hide_canvas=hide_canvas,
	allow_webgl=allow_webgl,
	network_idle=network_idle,
	block_webrtc=block_webrtc,
	wait_selector=wait_selector,
	google_search=google_search,
	extra_headers=extra_headers,
	additional_args=additional_args,
	solve_cloudflare=solve_cloudflare,
	disable_resources=disable_resources,
	wait_selector_state=wait_selector_state,
	)
	return _content_translator(
	Convertor._extract_content(
	page,
	css_selector=css_selector,
	extraction_type=extraction_type,
	main_content_only=main_content_only,
	),
	page,
	)

	@staticmethod
	async def bulk_stealthy_fetch(
	urls: List[str],
	extraction_type: extraction_types = "markdown",
	css_selector: Optional[str] = None,
	main_content_only: bool = True,
	headless: bool = True, # noqa: F821
	google_search: bool = True,
	real_chrome: bool = False,
	wait: int \| float = 0,
	proxy: Optional[str \| Dict[str, str]] = None,
	timezone_id: str \| None = None,
	locale: str \| None = None,
	extra_headers: Optional[Dict[str, str]] = None,
	useragent: Optional[str] = None,
	hide_canvas: bool = False,
	cdp_url: Optional[str] = None,
	timeout: int \| float = 30000,
	disable_resources: bool = False,
	wait_selector: Optional[str] = None,
	cookies: Sequence[SetCookieParam] \| None = None,
	network_idle: bool = False,
	wait_selector_state: SelectorWaitStates = "attached",
	block_webrtc: bool = False,
	allow_webgl: bool = True,
	solve_cloudflare: bool = False,
	additional_args: Optional[Dict] = None,
	) -> List[ResponseModel]:
	"""Use the stealthy fetcher to fetch a group of URLs at the same time, and for each page return a structured output of the result.
	Note: This is the only suitable fetcher for high protection levels.
	Note: If the `css_selector` resolves to more than one element, all the elements will be returned.

	:param urls: A list of the URLs to request.
	:param extraction_type: The type of content to extract from the page. Defaults to "markdown". Options are:
	- Markdown will convert the page content to Markdown format.
	- HTML will return the raw HTML content of the page.
	- Text will return the text content of the page.
	:param css_selector: CSS selector to extract the content from the page. If main_content_only is True, then it will be executed on the main content of the page. Defaults to None.
	:param main_content_only: Whether to extract only the main content of the page. Defaults to True. The main content here is the data inside the `<body>` tag.
	:param headless: Run the browser in headless/hidden (default), or headful/visible mode.
	:param disable_resources: Drop requests for unnecessary resources for a speed boost.
	Requests dropped are of type `font`, `image`, `media`, `beacon`, `object`, `imageset`, `texttrack`, `websocket`, `csp_report`, and `stylesheet`.
	:param useragent: Pass a useragent string to be used. Otherwise the fetcher will generate a real Useragent of the same browser and use it.
	:param cookies: Set cookies for the next request.
	:param solve_cloudflare: Solves all types of the Cloudflare's Turnstile/Interstitial challenges before returning the response to you.
	:param allow_webgl: Enabled by default. Disabling WebGL is not recommended as many WAFs now check if WebGL is enabled.
	:param network_idle: Wait for the page until there are no network connections for at least 500 ms.
	:param wait: The time (milliseconds) the fetcher will wait after everything finishes before closing the page and returning the ` Response ` object.
	:param timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30,000
	:param wait_selector: Wait for a specific CSS selector to be in a specific state.
	:param timezone_id: Changes the timezone of the browser. Defaults to the system timezone.
	:param locale: Specify user locale, for example, `en-GB`, `de-DE`, etc. Locale will affect navigator.language value, Accept-Language request header value as well as number and date formatting
	rules. Defaults to the system default locale.
	:param wait_selector_state: The state to wait for the selector given with `wait_selector`. The default state is `attached`.
	:param real_chrome: If you have a Chrome browser installed on your device, enable this, and the Fetcher will launch an instance of your browser and use it.
	:param hide_canvas: Add random noise to canvas operations to prevent fingerprinting.
	:param block_webrtc: Forces WebRTC to respect proxy settings to prevent local IP address leak.
	:param cdp_url: Instead of launching a new browser instance, connect to this CDP URL to control real browsers through CDP.
	:param google_search: Enabled by default, Scrapling will set a Google referer header.
	:param extra_headers: A dictionary of extra headers to add to the request. _The referer set by `google_search` takes priority over the referer set here if used together._
	:param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
	:param additional_args: Additional arguments to be passed to Playwright's context as additional settings, and it takes higher priority than Scrapling's settings.
	"""
	async with AsyncStealthySession(
	wait=wait,
	proxy=proxy,
	locale=locale,
	cdp_url=cdp_url,
	timeout=timeout,
	cookies=cookies,
	headless=headless,
	useragent=useragent,
	timezone_id=timezone_id,
	real_chrome=real_chrome,
	hide_canvas=hide_canvas,
	allow_webgl=allow_webgl,
	network_idle=network_idle,
	block_webrtc=block_webrtc,
	wait_selector=wait_selector,
	google_search=google_search,
	extra_headers=extra_headers,
	additional_args=additional_args,
	solve_cloudflare=solve_cloudflare,
	disable_resources=disable_resources,
	wait_selector_state=wait_selector_state,
	) as session:
	tasks = [session.fetch(url) for url in urls]
	responses = await gather(*tasks)
	return [
	_content_translator(
	Convertor._extract_content(
	page,
	css_selector=css_selector,
	extraction_type=extraction_type,
	main_content_only=main_content_only,
	),
	page,
	)
	for page in responses
	]

	def serve(self, http: bool, host: str, port: int):
	"""Serve the MCP server."""
	server = FastMCP(name="Scrapling", host=host, port=port)
	server.add_tool(self.get, title="get", description=self.get.__doc__, structured_output=True)
	server.add_tool(self.bulk_get, title="bulk_get", description=self.bulk_get.__doc__, structured_output=True)
	server.add_tool(self.fetch, title="fetch", description=self.fetch.__doc__, structured_output=True)
	server.add_tool(
	self.bulk_fetch, title="bulk_fetch", description=self.bulk_fetch.__doc__, structured_output=True
	)
	server.add_tool(
	self.stealthy_fetch, title="stealthy_fetch", description=self.stealthy_fetch.__doc__, structured_output=True
	)
	server.add_tool(
	self.bulk_stealthy_fetch,
	title="bulk_stealthy_fetch",
	description=self.bulk_stealthy_fetch.__doc__,
	structured_output=True,
	)
	server.run(transport="stdio" if not http else "streamable-http")