Karim shoair commited on
Commit ·
23b9523
1
Parent(s): e3b853c
refactor: optimize imports and docstrings correction
Browse files- scrapling/core/shell.py +10 -11
scrapling/core/shell.py
CHANGED
|
@@ -1,12 +1,11 @@
|
|
| 1 |
# -*- coding: utf-8 -*-
|
| 2 |
-
import os
|
| 3 |
-
import json
|
| 4 |
from sys import stderr
|
| 5 |
from functools import wraps
|
| 6 |
from http import cookies as Cookie
|
| 7 |
from collections import namedtuple
|
| 8 |
from shlex import split as shlex_split
|
| 9 |
from tempfile import mkstemp as make_temp_file
|
|
|
|
| 10 |
from urllib.parse import urlparse, urlunparse, parse_qsl
|
| 11 |
from argparse import ArgumentParser, SUPPRESS
|
| 12 |
from webbrowser import open as open_in_browser
|
|
@@ -22,6 +21,7 @@ from logging import (
|
|
| 22 |
)
|
| 23 |
|
| 24 |
from IPython.terminal.embed import InteractiveShellEmbed
|
|
|
|
| 25 |
|
| 26 |
from scrapling import __version__
|
| 27 |
from scrapling.core.utils import log
|
|
@@ -199,7 +199,7 @@ class CurlParser:
|
|
| 199 |
|
| 200 |
# --- Determine Method ---
|
| 201 |
method = "get" # Default
|
| 202 |
-
if parsed_args.get: # -G forces GET
|
| 203 |
method = "get"
|
| 204 |
|
| 205 |
elif parsed_args.method:
|
|
@@ -224,7 +224,7 @@ class CurlParser:
|
|
| 224 |
cookie_parser = Cookie.SimpleCookie()
|
| 225 |
cookie_parser.load(parsed_args.cookie)
|
| 226 |
for key, morsel in cookie_parser.items():
|
| 227 |
-
# Update the
|
| 228 |
# cookies with the same name from -H 'Cookie:'
|
| 229 |
cookies[key] = morsel.value
|
| 230 |
log.debug(f"Parsed cookies from -b argument: {list(cookies.keys())}")
|
|
@@ -270,14 +270,14 @@ class CurlParser:
|
|
| 270 |
# Check if raw data looks like JSON, prefer 'json' param if so
|
| 271 |
if isinstance(data_payload, str):
|
| 272 |
try:
|
| 273 |
-
maybe_json =
|
| 274 |
if isinstance(maybe_json, (dict, list)):
|
| 275 |
json_payload = maybe_json
|
| 276 |
data_payload = None
|
| 277 |
-
except
|
| 278 |
pass # Not JSON, keep it in data_payload
|
| 279 |
|
| 280 |
-
# Handle -G: Move data to params if method is GET
|
| 281 |
if method == "get" and data_payload:
|
| 282 |
if isinstance(data_payload, dict): # From --data-urlencode likely
|
| 283 |
params.update(data_payload)
|
|
@@ -340,7 +340,6 @@ class CurlParser:
|
|
| 340 |
)
|
| 341 |
|
| 342 |
def convert2fetcher(self, curl_command: Union[Request, str]) -> Optional[Response]:
|
| 343 |
-
request = None
|
| 344 |
if isinstance(curl_command, (Request, str)):
|
| 345 |
request = (
|
| 346 |
self.parse(curl_command)
|
|
@@ -387,8 +386,8 @@ def show_page_in_browser(page: Adaptor):
|
|
| 387 |
|
| 388 |
try:
|
| 389 |
fd, fname = make_temp_file(".html")
|
| 390 |
-
|
| 391 |
-
|
| 392 |
open_in_browser(f"file://{fname}")
|
| 393 |
except IOError as e:
|
| 394 |
log.error(f"Failed to write temporary file for viewing: {e}")
|
|
@@ -460,7 +459,7 @@ Type 'exit' or press Ctrl+D to exit.
|
|
| 460 |
"""
|
| 461 |
|
| 462 |
def update_page(self, result):
|
| 463 |
-
"""Update current page and add to pages history"""
|
| 464 |
self.page = result
|
| 465 |
if isinstance(result, (Response, Adaptor)):
|
| 466 |
self.pages.append(result)
|
|
|
|
| 1 |
# -*- coding: utf-8 -*-
|
|
|
|
|
|
|
| 2 |
from sys import stderr
|
| 3 |
from functools import wraps
|
| 4 |
from http import cookies as Cookie
|
| 5 |
from collections import namedtuple
|
| 6 |
from shlex import split as shlex_split
|
| 7 |
from tempfile import mkstemp as make_temp_file
|
| 8 |
+
from os import write as os_write, close as os_close
|
| 9 |
from urllib.parse import urlparse, urlunparse, parse_qsl
|
| 10 |
from argparse import ArgumentParser, SUPPRESS
|
| 11 |
from webbrowser import open as open_in_browser
|
|
|
|
| 21 |
)
|
| 22 |
|
| 23 |
from IPython.terminal.embed import InteractiveShellEmbed
|
| 24 |
+
from orjson import loads as json_loads, JSONDecodeError
|
| 25 |
|
| 26 |
from scrapling import __version__
|
| 27 |
from scrapling.core.utils import log
|
|
|
|
| 199 |
|
| 200 |
# --- Determine Method ---
|
| 201 |
method = "get" # Default
|
| 202 |
+
if parsed_args.get: # `-G` forces GET
|
| 203 |
method = "get"
|
| 204 |
|
| 205 |
elif parsed_args.method:
|
|
|
|
| 224 |
cookie_parser = Cookie.SimpleCookie()
|
| 225 |
cookie_parser.load(parsed_args.cookie)
|
| 226 |
for key, morsel in cookie_parser.items():
|
| 227 |
+
# Update the cookie dict, potentially overwriting
|
| 228 |
# cookies with the same name from -H 'Cookie:'
|
| 229 |
cookies[key] = morsel.value
|
| 230 |
log.debug(f"Parsed cookies from -b argument: {list(cookies.keys())}")
|
|
|
|
| 270 |
# Check if raw data looks like JSON, prefer 'json' param if so
|
| 271 |
if isinstance(data_payload, str):
|
| 272 |
try:
|
| 273 |
+
maybe_json = json_loads(data_payload)
|
| 274 |
if isinstance(maybe_json, (dict, list)):
|
| 275 |
json_payload = maybe_json
|
| 276 |
data_payload = None
|
| 277 |
+
except JSONDecodeError:
|
| 278 |
pass # Not JSON, keep it in data_payload
|
| 279 |
|
| 280 |
+
# Handle `-G`: Move data to params if the method is GET
|
| 281 |
if method == "get" and data_payload:
|
| 282 |
if isinstance(data_payload, dict): # From --data-urlencode likely
|
| 283 |
params.update(data_payload)
|
|
|
|
| 340 |
)
|
| 341 |
|
| 342 |
def convert2fetcher(self, curl_command: Union[Request, str]) -> Optional[Response]:
|
|
|
|
| 343 |
if isinstance(curl_command, (Request, str)):
|
| 344 |
request = (
|
| 345 |
self.parse(curl_command)
|
|
|
|
| 386 |
|
| 387 |
try:
|
| 388 |
fd, fname = make_temp_file(".html")
|
| 389 |
+
os_write(fd, page.body.encode("utf-8"))
|
| 390 |
+
os_close(fd)
|
| 391 |
open_in_browser(f"file://{fname}")
|
| 392 |
except IOError as e:
|
| 393 |
log.error(f"Failed to write temporary file for viewing: {e}")
|
|
|
|
| 459 |
"""
|
| 460 |
|
| 461 |
def update_page(self, result):
|
| 462 |
+
"""Update the current page and add to pages history"""
|
| 463 |
self.page = result
|
| 464 |
if isinstance(result, (Response, Adaptor)):
|
| 465 |
self.pages.append(result)
|