Karim shoair commited on
Commit
0e7f15c
·
1 Parent(s): 33c6b04

fix(shell): dynamically build the signature of shortcuts after last changes

Browse files
scrapling/core/_shell_signatures.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from scrapling.core._types import (
2
+ Dict,
3
+ Any,
4
+ List,
5
+ Tuple,
6
+ Optional,
7
+ )
8
+
9
+ # Parameter definitions for shell function signatures (defined once at module level)
10
+ # Mirrors TypedDict definitions from _types.py but runtime-accessible for IPython introspection
11
+ _REQUESTS_PARAMS = {
12
+ "params": Optional[Dict | List | Tuple],
13
+ "cookies": Any,
14
+ "auth": Optional[Tuple[str, str]],
15
+ "impersonate": Any,
16
+ "http3": Optional[bool],
17
+ "stealthy_headers": Optional[bool],
18
+ "proxies": Any,
19
+ "proxy": Optional[str],
20
+ "proxy_auth": Optional[Tuple[str, str]],
21
+ "timeout": Optional[int | float],
22
+ "headers": Any,
23
+ "retries": Optional[int],
24
+ "retry_delay": Optional[int],
25
+ "follow_redirects": Optional[bool],
26
+ "max_redirects": Optional[int],
27
+ "verify": Optional[bool],
28
+ "cert": Optional[str | Tuple[str, str]],
29
+ "selector_config": Optional[Dict],
30
+ }
31
+
32
+ _FETCH_PARAMS = {
33
+ "headless": bool,
34
+ "google_search": bool,
35
+ "hide_canvas": bool,
36
+ "disable_webgl": bool,
37
+ "real_chrome": bool,
38
+ "stealth": bool,
39
+ "wait": int | float,
40
+ "page_action": Optional[Any],
41
+ "proxy": Optional[str | Dict],
42
+ "locale": str,
43
+ "extra_headers": Optional[Dict[str, str]],
44
+ "useragent": Optional[str],
45
+ "cdp_url": Optional[str],
46
+ "timeout": int | float,
47
+ "disable_resources": bool,
48
+ "wait_selector": Optional[str],
49
+ "init_script": Optional[str],
50
+ "cookies": Optional[List[Dict]],
51
+ "network_idle": bool,
52
+ "load_dom": bool,
53
+ "wait_selector_state": Any,
54
+ "extra_flags": Optional[List[str]],
55
+ "additional_args": Optional[Dict],
56
+ "custom_config": Optional[Dict],
57
+ }
58
+
59
+ _STEALTHY_FETCH_PARAMS = {
60
+ "headless": bool,
61
+ "block_images": bool,
62
+ "disable_resources": bool,
63
+ "block_webrtc": bool,
64
+ "allow_webgl": bool,
65
+ "network_idle": bool,
66
+ "load_dom": bool,
67
+ "humanize": bool | float,
68
+ "solve_cloudflare": bool,
69
+ "wait": int | float,
70
+ "timeout": int | float,
71
+ "page_action": Optional[Any],
72
+ "wait_selector": Optional[str],
73
+ "init_script": Optional[str],
74
+ "addons": Optional[List[str]],
75
+ "wait_selector_state": Any,
76
+ "cookies": Optional[List[Dict]],
77
+ "google_search": bool,
78
+ "extra_headers": Optional[Dict[str, str]],
79
+ "proxy": Optional[str | Dict],
80
+ "os_randomize": bool,
81
+ "disable_ads": bool,
82
+ "geoip": bool,
83
+ "custom_config": Optional[Dict],
84
+ "additional_args": Optional[Dict],
85
+ }
86
+
87
+ # Mapping of function names to their parameter definitions
88
+ Signatures_map = {
89
+ "get": _REQUESTS_PARAMS,
90
+ "post": {**_REQUESTS_PARAMS, "data": Optional[Dict | str], "json": Optional[Dict | List]},
91
+ "put": {**_REQUESTS_PARAMS, "data": Optional[Dict | str], "json": Optional[Dict | List]},
92
+ "delete": _REQUESTS_PARAMS,
93
+ "fetch": _FETCH_PARAMS,
94
+ "stealthy_fetch": _STEALTHY_FETCH_PARAMS,
95
+ }
scrapling/core/shell.py CHANGED
@@ -1,13 +1,14 @@
1
  # -*- coding: utf-8 -*-
2
- from re import sub as re_sub
3
  from sys import stderr
4
  from functools import wraps
 
5
  from collections import namedtuple
6
  from shlex import split as shlex_split
 
7
  from tempfile import mkstemp as make_temp_file
8
- from urllib.parse import urlparse, urlunparse, parse_qsl
9
  from argparse import ArgumentParser, SUPPRESS
10
  from webbrowser import open as open_in_browser
 
11
  from logging import (
12
  DEBUG,
13
  INFO,
@@ -21,6 +22,7 @@ from logging import (
21
 
22
  from orjson import loads as json_loads, JSONDecodeError
23
 
 
24
  from scrapling import __version__
25
  from scrapling.core.utils import log
26
  from scrapling.parser import Selector, Selectors
@@ -28,12 +30,12 @@ from scrapling.core.custom_types import TextHandler
28
  from scrapling.engines.toolbelt.custom import Response
29
  from scrapling.core.utils._shell import _ParseHeaders, _CookieParser
30
  from scrapling.core._types import (
31
- Optional,
32
  Dict,
33
  Any,
34
  cast,
35
- extraction_types,
36
  Generator,
 
37
  )
38
 
39
 
@@ -312,6 +314,40 @@ class CurlParser:
312
  return None
313
 
314
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
315
  def show_page_in_browser(page: Selector): # pragma: no cover
316
  if not page or not isinstance(page, Selector):
317
  log.error("Input must be of type `Selector`")
@@ -431,7 +467,7 @@ Type 'exit' or press Ctrl+D to exit.
431
 
432
  return result
433
 
434
- def create_wrapper(self, func):
435
  """Create a wrapper that preserves function signature but updates page"""
436
 
437
  @wraps(func)
@@ -439,6 +475,12 @@ Type 'exit' or press Ctrl+D to exit.
439
  result = func(*args, **kwargs)
440
  return self.update_page(result)
441
 
 
 
 
 
 
 
442
  return wrapper
443
 
444
  def get_namespace(self):
@@ -451,7 +493,7 @@ Type 'exit' or press Ctrl+D to exit.
451
  delete = self.create_wrapper(self.__Fetcher.delete)
452
  dynamic_fetch = self.create_wrapper(self.__DynamicFetcher.fetch)
453
  stealthy_fetch = self.create_wrapper(self.__StealthyFetcher.fetch)
454
- curl2fetcher = self.create_wrapper(self._curl_parser.convert2fetcher)
455
 
456
  # Create the namespace dictionary
457
  return {
 
1
  # -*- coding: utf-8 -*-
 
2
  from sys import stderr
3
  from functools import wraps
4
+ from re import sub as re_sub
5
  from collections import namedtuple
6
  from shlex import split as shlex_split
7
+ from inspect import signature, Parameter
8
  from tempfile import mkstemp as make_temp_file
 
9
  from argparse import ArgumentParser, SUPPRESS
10
  from webbrowser import open as open_in_browser
11
+ from urllib.parse import urlparse, urlunparse, parse_qsl
12
  from logging import (
13
  DEBUG,
14
  INFO,
 
22
 
23
  from orjson import loads as json_loads, JSONDecodeError
24
 
25
+ from ._shell_signatures import Signatures_map
26
  from scrapling import __version__
27
  from scrapling.core.utils import log
28
  from scrapling.parser import Selector, Selectors
 
30
  from scrapling.engines.toolbelt.custom import Response
31
  from scrapling.core.utils._shell import _ParseHeaders, _CookieParser
32
  from scrapling.core._types import (
 
33
  Dict,
34
  Any,
35
  cast,
36
+ Optional,
37
  Generator,
38
+ extraction_types,
39
  )
40
 
41
 
 
314
  return None
315
 
316
 
317
+ def _unpack_signature(func):
318
+ """
319
+ Unpack TypedDict from Unpack[TypedDict] annotations in **kwargs and reconstruct the signature.
320
+
321
+ This allows the interactive shell to show individual parameters instead of just **kwargs, similar to how IDEs display them.
322
+ """
323
+ try:
324
+ sig = signature(func)
325
+ func_name = getattr(func, "__name__", None)
326
+
327
+ # Check if this function has known parameters
328
+ if func_name not in Signatures_map:
329
+ return sig
330
+
331
+ new_params = []
332
+ for param in sig.parameters.values():
333
+ if param.kind == Parameter.VAR_KEYWORD:
334
+ # Replace **kwargs with individual keyword-only parameters
335
+ for field_name, field_type in Signatures_map[func_name].items():
336
+ new_params.append(
337
+ Parameter(field_name, Parameter.KEYWORD_ONLY, default=Parameter.empty, annotation=field_type)
338
+ )
339
+ else:
340
+ new_params.append(param)
341
+
342
+ # Reconstruct signature with unpacked parameters
343
+ if len(new_params) != len(sig.parameters):
344
+ return sig.replace(parameters=new_params)
345
+ return sig
346
+
347
+ except Exception: # pragma: no cover
348
+ return signature(func)
349
+
350
+
351
  def show_page_in_browser(page: Selector): # pragma: no cover
352
  if not page or not isinstance(page, Selector):
353
  log.error("Input must be of type `Selector`")
 
467
 
468
  return result
469
 
470
+ def create_wrapper(self, func, get_signature=True):
471
  """Create a wrapper that preserves function signature but updates page"""
472
 
473
  @wraps(func)
 
475
  result = func(*args, **kwargs)
476
  return self.update_page(result)
477
 
478
+ if get_signature:
479
+ # Explicitly preserve and unpack signature for IPython introspection and autocompletion
480
+ wrapper.__signature__ = _unpack_signature(func) # pyright: ignore
481
+ else:
482
+ wrapper.__signature__ = signature(func) # pyright: ignore
483
+
484
  return wrapper
485
 
486
  def get_namespace(self):
 
493
  delete = self.create_wrapper(self.__Fetcher.delete)
494
  dynamic_fetch = self.create_wrapper(self.__DynamicFetcher.fetch)
495
  stealthy_fetch = self.create_wrapper(self.__StealthyFetcher.fetch)
496
+ curl2fetcher = self.create_wrapper(self._curl_parser.convert2fetcher, get_signature=False)
497
 
498
  # Create the namespace dictionary
499
  return {