| """ |
| Internal cookie handling helpers. |
| |
| This module contains internal utilities for cookie parsing and manipulation. |
| These are not part of the public API and may change without notice. |
| """ |
|
|
| import re |
| import sys |
| from http.cookies import Morsel |
| from typing import List, Optional, Sequence, Tuple, cast |
|
|
| from .log import internal_logger |
|
|
| __all__ = ( |
| "parse_set_cookie_headers", |
| "parse_cookie_header", |
| "preserve_morsel_with_coded_value", |
| ) |
|
|
| |
| |
| |
| |
| |
| |
| |
| _COOKIE_NAME_RE = re.compile(r"^[!#$%&\'()*+\-./0-9:<=>?@A-Z\[\]^_`a-z{|}~]+$") |
| _COOKIE_KNOWN_ATTRS = frozenset( |
| ( |
| "path", |
| "domain", |
| "max-age", |
| "expires", |
| "secure", |
| "httponly", |
| "samesite", |
| "partitioned", |
| "version", |
| "comment", |
| ) |
| ) |
| _COOKIE_BOOL_ATTRS = frozenset( |
| ("secure", "httponly", "partitioned") |
| ) |
|
|
| |
| |
| |
| _COOKIE_PATTERN = re.compile( |
| r""" |
| \s* # Optional whitespace at start of cookie |
| (?P<key> # Start of group 'key' |
| # aiohttp has extended to include [] for compatibility with real-world cookies |
| [\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=\[\]]+? # Any word of at least one letter |
| ) # End of group 'key' |
| ( # Optional group: there may not be a value. |
| \s*=\s* # Equal Sign |
| (?P<val> # Start of group 'val' |
| "(?:[^\\"]|\\.)*" # Any double-quoted string (properly closed) |
| | # or |
| "[^";]* # Unmatched opening quote (differs from SimpleCookie - issue #7993) |
| | # or |
| # Special case for "expires" attr - RFC 822, RFC 850, RFC 1036, RFC 1123 |
| (\w{3,6}day|\w{3}),\s # Day of the week or abbreviated day (with comma) |
| [\w\d\s-]{9,11}\s[\d:]{8}\s # Date and time in specific format |
| (GMT|[+-]\d{4}) # Timezone: GMT or RFC 2822 offset like -0000, +0100 |
| # NOTE: RFC 2822 timezone support is an aiohttp extension |
| # for issue #4493 - SimpleCookie does NOT support this |
| | # or |
| # ANSI C asctime() format: "Wed Jun 9 10:18:14 2021" |
| # NOTE: This is an aiohttp extension for issue #4327 - SimpleCookie does NOT support this format |
| \w{3}\s+\w{3}\s+[\s\d]\d\s+\d{2}:\d{2}:\d{2}\s+\d{4} |
| | # or |
| [\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=\[\]]* # Any word or empty string |
| ) # End of group 'val' |
| )? # End of optional value group |
| \s* # Any number of spaces. |
| (\s+|;|$) # Ending either at space, semicolon, or EOS. |
| """, |
| re.VERBOSE | re.ASCII, |
| ) |
|
|
|
|
| def preserve_morsel_with_coded_value(cookie: Morsel[str]) -> Morsel[str]: |
| """ |
| Preserve a Morsel's coded_value exactly as received from the server. |
| |
| This function ensures that cookie encoding is preserved exactly as sent by |
| the server, which is critical for compatibility with old servers that have |
| strict requirements about cookie formats. |
| |
| This addresses the issue described in https://github.com/aio-libs/aiohttp/pull/1453 |
| where Python's SimpleCookie would re-encode cookies, breaking authentication |
| with certain servers. |
| |
| Args: |
| cookie: A Morsel object from SimpleCookie |
| |
| Returns: |
| A Morsel object with preserved coded_value |
| |
| """ |
| mrsl_val = cast("Morsel[str]", cookie.get(cookie.key, Morsel())) |
| |
| |
| |
| |
| mrsl_val.__setstate__( |
| {"key": cookie.key, "value": cookie.value, "coded_value": cookie.coded_value} |
| ) |
| return mrsl_val |
|
|
|
|
| _unquote_sub = re.compile(r"\\(?:([0-3][0-7][0-7])|(.))").sub |
|
|
|
|
| def _unquote_replace(m: re.Match[str]) -> str: |
| """ |
| Replace function for _unquote_sub regex substitution. |
| |
| Handles escaped characters in cookie values: |
| - Octal sequences are converted to their character representation |
| - Other escaped characters are unescaped by removing the backslash |
| """ |
| if m[1]: |
| return chr(int(m[1], 8)) |
| return m[2] |
|
|
|
|
| def _unquote(value: str) -> str: |
| """ |
| Unquote a cookie value. |
| |
| Vendored from http.cookies._unquote to ensure compatibility. |
| |
| Note: The original implementation checked for None, but we've removed |
| that check since all callers already ensure the value is not None. |
| """ |
| |
| |
| if len(value) < 2: |
| return value |
| if value[0] != '"' or value[-1] != '"': |
| return value |
|
|
| |
| |
|
|
| |
| value = value[1:-1] |
|
|
| |
| |
| |
| |
| return _unquote_sub(_unquote_replace, value) |
|
|
|
|
| def parse_cookie_header(header: str) -> List[Tuple[str, Morsel[str]]]: |
| """ |
| Parse a Cookie header according to RFC 6265 Section 5.4. |
| |
| Cookie headers contain only name-value pairs separated by semicolons. |
| There are no attributes in Cookie headers - even names that match |
| attribute names (like 'path' or 'secure') should be treated as cookies. |
| |
| This parser uses the same regex-based approach as parse_set_cookie_headers |
| to properly handle quoted values that may contain semicolons. |
| |
| Args: |
| header: The Cookie header value to parse |
| |
| Returns: |
| List of (name, Morsel) tuples for compatibility with SimpleCookie.update() |
| """ |
| if not header: |
| return [] |
|
|
| cookies: List[Tuple[str, Morsel[str]]] = [] |
| i = 0 |
| n = len(header) |
|
|
| while i < n: |
| |
| match = _COOKIE_PATTERN.match(header, i) |
| if not match: |
| break |
|
|
| key = match.group("key") |
| value = match.group("val") or "" |
| i = match.end(0) |
|
|
| |
| if not key or not _COOKIE_NAME_RE.match(key): |
| internal_logger.warning("Can not load cookie: Illegal cookie name %r", key) |
| continue |
|
|
| |
| morsel: Morsel[str] = Morsel() |
| |
| |
| |
| |
| |
| morsel.__setstate__( |
| {"key": key, "value": _unquote(value), "coded_value": value} |
| ) |
|
|
| cookies.append((key, morsel)) |
|
|
| return cookies |
|
|
|
|
| def parse_set_cookie_headers(headers: Sequence[str]) -> List[Tuple[str, Morsel[str]]]: |
| """ |
| Parse cookie headers using a vendored version of SimpleCookie parsing. |
| |
| This implementation is based on SimpleCookie.__parse_string to ensure |
| compatibility with how SimpleCookie parses cookies, including handling |
| of malformed cookies with missing semicolons. |
| |
| This function is used for both Cookie and Set-Cookie headers in order to be |
| forgiving. Ideally we would have followed RFC 6265 Section 5.2 (for Cookie |
| headers) and RFC 6265 Section 4.2.1 (for Set-Cookie headers), but the |
| real world data makes it impossible since we need to be a bit more forgiving. |
| |
| NOTE: This implementation differs from SimpleCookie in handling unmatched quotes. |
| SimpleCookie will stop parsing when it encounters a cookie value with an unmatched |
| quote (e.g., 'cookie="value'), causing subsequent cookies to be silently dropped. |
| This implementation handles unmatched quotes more gracefully to prevent cookie loss. |
| See https://github.com/aio-libs/aiohttp/issues/7993 |
| """ |
| parsed_cookies: List[Tuple[str, Morsel[str]]] = [] |
|
|
| for header in headers: |
| if not header: |
| continue |
|
|
| |
| i = 0 |
| n = len(header) |
| current_morsel: Optional[Morsel[str]] = None |
| morsel_seen = False |
|
|
| while 0 <= i < n: |
| |
| match = _COOKIE_PATTERN.match(header, i) |
| if not match: |
| |
| break |
|
|
| key, value = match.group("key"), match.group("val") |
| i = match.end(0) |
| lower_key = key.lower() |
|
|
| if key[0] == "$": |
| if not morsel_seen: |
| |
| |
| continue |
| |
| if current_morsel is not None: |
| attr_lower_key = lower_key[1:] |
| if attr_lower_key in _COOKIE_KNOWN_ATTRS: |
| current_morsel[attr_lower_key] = value or "" |
| elif lower_key in _COOKIE_KNOWN_ATTRS: |
| if not morsel_seen: |
| |
| break |
| if lower_key in _COOKIE_BOOL_ATTRS: |
| |
| if current_morsel is not None: |
| if lower_key == "partitioned" and sys.version_info < (3, 14): |
| dict.__setitem__(current_morsel, lower_key, True) |
| else: |
| current_morsel[lower_key] = True |
| elif value is None: |
| |
| break |
| elif current_morsel is not None: |
| |
| current_morsel[lower_key] = _unquote(value) |
| elif value is not None: |
| |
| |
| if key in _COOKIE_KNOWN_ATTRS or not _COOKIE_NAME_RE.match(key): |
| internal_logger.warning( |
| "Can not load cookies: Illegal cookie name %r", key |
| ) |
| current_morsel = None |
| else: |
| |
| current_morsel = Morsel() |
| |
| |
| |
| |
| |
| current_morsel.__setstate__( |
| {"key": key, "value": _unquote(value), "coded_value": value} |
| ) |
| parsed_cookies.append((key, current_morsel)) |
| morsel_seen = True |
| else: |
| |
| break |
|
|
| return parsed_cookies |
|
|