| | import base64 |
| | import binascii |
| | import json |
| | import re |
| | import sys |
| | import uuid |
| | import warnings |
| | from collections import deque |
| | from collections.abc import Mapping, Sequence |
| | from types import TracebackType |
| | from typing import ( |
| | TYPE_CHECKING, |
| | Any, |
| | Deque, |
| | Dict, |
| | Iterator, |
| | List, |
| | Optional, |
| | Tuple, |
| | Type, |
| | Union, |
| | cast, |
| | ) |
| | from urllib.parse import parse_qsl, unquote, urlencode |
| |
|
| | from multidict import CIMultiDict, CIMultiDictProxy |
| |
|
| | from .abc import AbstractStreamWriter |
| | from .compression_utils import ( |
| | DEFAULT_MAX_DECOMPRESS_SIZE, |
| | ZLibCompressor, |
| | ZLibDecompressor, |
| | ) |
| | from .hdrs import ( |
| | CONTENT_DISPOSITION, |
| | CONTENT_ENCODING, |
| | CONTENT_LENGTH, |
| | CONTENT_TRANSFER_ENCODING, |
| | CONTENT_TYPE, |
| | ) |
| | from .helpers import CHAR, TOKEN, parse_mimetype, reify |
| | from .http import HeadersParser |
| | from .log import internal_logger |
| | from .payload import ( |
| | JsonPayload, |
| | LookupError, |
| | Order, |
| | Payload, |
| | StringPayload, |
| | get_payload, |
| | payload_type, |
| | ) |
| | from .streams import StreamReader |
| |
|
| | if sys.version_info >= (3, 11): |
| | from typing import Self |
| | else: |
| | from typing import TypeVar |
| |
|
| | Self = TypeVar("Self", bound="BodyPartReader") |
| |
|
| | __all__ = ( |
| | "MultipartReader", |
| | "MultipartWriter", |
| | "BodyPartReader", |
| | "BadContentDispositionHeader", |
| | "BadContentDispositionParam", |
| | "parse_content_disposition", |
| | "content_disposition_filename", |
| | ) |
| |
|
| |
|
| | if TYPE_CHECKING: |
| | from .client_reqrep import ClientResponse |
| |
|
| |
|
| | class BadContentDispositionHeader(RuntimeWarning): |
| | pass |
| |
|
| |
|
| | class BadContentDispositionParam(RuntimeWarning): |
| | pass |
| |
|
| |
|
| | def parse_content_disposition( |
| | header: Optional[str], |
| | ) -> Tuple[Optional[str], Dict[str, str]]: |
| | def is_token(string: str) -> bool: |
| | return bool(string) and TOKEN >= set(string) |
| |
|
| | def is_quoted(string: str) -> bool: |
| | return string[0] == string[-1] == '"' |
| |
|
| | def is_rfc5987(string: str) -> bool: |
| | return is_token(string) and string.count("'") == 2 |
| |
|
| | def is_extended_param(string: str) -> bool: |
| | return string.endswith("*") |
| |
|
| | def is_continuous_param(string: str) -> bool: |
| | pos = string.find("*") + 1 |
| | if not pos: |
| | return False |
| | substring = string[pos:-1] if string.endswith("*") else string[pos:] |
| | return substring.isdigit() |
| |
|
| | def unescape(text: str, *, chars: str = "".join(map(re.escape, CHAR))) -> str: |
| | return re.sub(f"\\\\([{chars}])", "\\1", text) |
| |
|
| | if not header: |
| | return None, {} |
| |
|
| | disptype, *parts = header.split(";") |
| | if not is_token(disptype): |
| | warnings.warn(BadContentDispositionHeader(header)) |
| | return None, {} |
| |
|
| | params: Dict[str, str] = {} |
| | while parts: |
| | item = parts.pop(0) |
| |
|
| | if not item: |
| | warnings.warn(BadContentDispositionHeader(header)) |
| | continue |
| |
|
| | if "=" not in item: |
| | warnings.warn(BadContentDispositionHeader(header)) |
| | return None, {} |
| |
|
| | key, value = item.split("=", 1) |
| | key = key.lower().strip() |
| | value = value.lstrip() |
| |
|
| | if key in params: |
| | warnings.warn(BadContentDispositionHeader(header)) |
| | return None, {} |
| |
|
| | if not is_token(key): |
| | warnings.warn(BadContentDispositionParam(item)) |
| | continue |
| |
|
| | elif is_continuous_param(key): |
| | if is_quoted(value): |
| | value = unescape(value[1:-1]) |
| | elif not is_token(value): |
| | warnings.warn(BadContentDispositionParam(item)) |
| | continue |
| |
|
| | elif is_extended_param(key): |
| | if is_rfc5987(value): |
| | encoding, _, value = value.split("'", 2) |
| | encoding = encoding or "utf-8" |
| | else: |
| | warnings.warn(BadContentDispositionParam(item)) |
| | continue |
| |
|
| | try: |
| | value = unquote(value, encoding, "strict") |
| | except UnicodeDecodeError: |
| | warnings.warn(BadContentDispositionParam(item)) |
| | continue |
| |
|
| | else: |
| | failed = True |
| | if is_quoted(value): |
| | failed = False |
| | value = unescape(value[1:-1].lstrip("\\/")) |
| | elif is_token(value): |
| | failed = False |
| | elif parts: |
| | |
| | |
| | _value = f"{value};{parts[0]}" |
| | if is_quoted(_value): |
| | parts.pop(0) |
| | value = unescape(_value[1:-1].lstrip("\\/")) |
| | failed = False |
| |
|
| | if failed: |
| | warnings.warn(BadContentDispositionHeader(header)) |
| | return None, {} |
| |
|
| | params[key] = value |
| |
|
| | return disptype.lower(), params |
| |
|
| |
|
| | def content_disposition_filename( |
| | params: Mapping[str, str], name: str = "filename" |
| | ) -> Optional[str]: |
| | name_suf = "%s*" % name |
| | if not params: |
| | return None |
| | elif name_suf in params: |
| | return params[name_suf] |
| | elif name in params: |
| | return params[name] |
| | else: |
| | parts = [] |
| | fnparams = sorted( |
| | (key, value) for key, value in params.items() if key.startswith(name_suf) |
| | ) |
| | for num, (key, value) in enumerate(fnparams): |
| | _, tail = key.split("*", 1) |
| | if tail.endswith("*"): |
| | tail = tail[:-1] |
| | if tail == str(num): |
| | parts.append(value) |
| | else: |
| | break |
| | if not parts: |
| | return None |
| | value = "".join(parts) |
| | if "'" in value: |
| | encoding, _, value = value.split("'", 2) |
| | encoding = encoding or "utf-8" |
| | return unquote(value, encoding, "strict") |
| | return value |
| |
|
| |
|
| | class MultipartResponseWrapper: |
| | """Wrapper around the MultipartReader. |
| | |
| | It takes care about |
| | underlying connection and close it when it needs in. |
| | """ |
| |
|
| | def __init__( |
| | self, |
| | resp: "ClientResponse", |
| | stream: "MultipartReader", |
| | ) -> None: |
| | self.resp = resp |
| | self.stream = stream |
| |
|
| | def __aiter__(self) -> "MultipartResponseWrapper": |
| | return self |
| |
|
| | async def __anext__( |
| | self, |
| | ) -> Union["MultipartReader", "BodyPartReader"]: |
| | part = await self.next() |
| | if part is None: |
| | raise StopAsyncIteration |
| | return part |
| |
|
| | def at_eof(self) -> bool: |
| | """Returns True when all response data had been read.""" |
| | return self.resp.content.at_eof() |
| |
|
| | async def next( |
| | self, |
| | ) -> Optional[Union["MultipartReader", "BodyPartReader"]]: |
| | """Emits next multipart reader object.""" |
| | item = await self.stream.next() |
| | if self.stream.at_eof(): |
| | await self.release() |
| | return item |
| |
|
| | async def release(self) -> None: |
| | """Release the connection gracefully. |
| | |
| | All remaining content is read to the void. |
| | """ |
| | await self.resp.release() |
| |
|
| |
|
| | class BodyPartReader: |
| | """Multipart reader for single body part.""" |
| |
|
| | chunk_size = 8192 |
| |
|
| | def __init__( |
| | self, |
| | boundary: bytes, |
| | headers: "CIMultiDictProxy[str]", |
| | content: StreamReader, |
| | *, |
| | subtype: str = "mixed", |
| | default_charset: Optional[str] = None, |
| | max_decompress_size: int = DEFAULT_MAX_DECOMPRESS_SIZE, |
| | ) -> None: |
| | self.headers = headers |
| | self._boundary = boundary |
| | self._boundary_len = len(boundary) + 2 |
| | self._content = content |
| | self._default_charset = default_charset |
| | self._at_eof = False |
| | self._is_form_data = subtype == "form-data" |
| | |
| | length = None if self._is_form_data else self.headers.get(CONTENT_LENGTH, None) |
| | self._length = int(length) if length is not None else None |
| | self._read_bytes = 0 |
| | self._unread: Deque[bytes] = deque() |
| | self._prev_chunk: Optional[bytes] = None |
| | self._content_eof = 0 |
| | self._cache: Dict[str, Any] = {} |
| | self._max_decompress_size = max_decompress_size |
| |
|
| | def __aiter__(self: Self) -> Self: |
| | return self |
| |
|
| | async def __anext__(self) -> bytes: |
| | part = await self.next() |
| | if part is None: |
| | raise StopAsyncIteration |
| | return part |
| |
|
| | async def next(self) -> Optional[bytes]: |
| | item = await self.read() |
| | if not item: |
| | return None |
| | return item |
| |
|
| | async def read(self, *, decode: bool = False) -> bytes: |
| | """Reads body part data. |
| | |
| | decode: Decodes data following by encoding |
| | method from Content-Encoding header. If it missed |
| | data remains untouched |
| | """ |
| | if self._at_eof: |
| | return b"" |
| | data = bytearray() |
| | while not self._at_eof: |
| | data.extend(await self.read_chunk(self.chunk_size)) |
| | if decode: |
| | return await self.decode(data) |
| | return data |
| |
|
| | async def read_chunk(self, size: int = chunk_size) -> bytes: |
| | """Reads body part content chunk of the specified size. |
| | |
| | size: chunk size |
| | """ |
| | if self._at_eof: |
| | return b"" |
| | if self._length: |
| | chunk = await self._read_chunk_from_length(size) |
| | else: |
| | chunk = await self._read_chunk_from_stream(size) |
| |
|
| | |
| | |
| | encoding = self.headers.get(CONTENT_TRANSFER_ENCODING) |
| | if encoding and encoding.lower() == "base64": |
| | stripped_chunk = b"".join(chunk.split()) |
| | remainder = len(stripped_chunk) % 4 |
| |
|
| | while remainder != 0 and not self.at_eof(): |
| | over_chunk_size = 4 - remainder |
| | over_chunk = b"" |
| |
|
| | if self._prev_chunk: |
| | over_chunk = self._prev_chunk[:over_chunk_size] |
| | self._prev_chunk = self._prev_chunk[len(over_chunk) :] |
| |
|
| | if len(over_chunk) != over_chunk_size: |
| | over_chunk += await self._content.read(4 - len(over_chunk)) |
| |
|
| | if not over_chunk: |
| | self._at_eof = True |
| |
|
| | stripped_chunk += b"".join(over_chunk.split()) |
| | chunk += over_chunk |
| | remainder = len(stripped_chunk) % 4 |
| |
|
| | self._read_bytes += len(chunk) |
| | if self._read_bytes == self._length: |
| | self._at_eof = True |
| | if self._at_eof and await self._content.readline() != b"\r\n": |
| | raise ValueError("Reader did not read all the data or it is malformed") |
| | return chunk |
| |
|
| | async def _read_chunk_from_length(self, size: int) -> bytes: |
| | |
| | |
| | assert self._length is not None, "Content-Length required for chunked read" |
| | chunk_size = min(size, self._length - self._read_bytes) |
| | chunk = await self._content.read(chunk_size) |
| | if self._content.at_eof(): |
| | self._at_eof = True |
| | return chunk |
| |
|
| | async def _read_chunk_from_stream(self, size: int) -> bytes: |
| | |
| | |
| | assert ( |
| | size >= self._boundary_len |
| | ), "Chunk size must be greater or equal than boundary length + 2" |
| | first_chunk = self._prev_chunk is None |
| | if first_chunk: |
| | |
| | self._prev_chunk = b"\r\n" + await self._content.read(size) |
| |
|
| | chunk = b"" |
| | |
| | |
| | while len(chunk) < self._boundary_len: |
| | chunk += await self._content.read(size) |
| | self._content_eof += int(self._content.at_eof()) |
| | if self._content_eof > 2: |
| | raise ValueError("Reading after EOF") |
| | if self._content_eof: |
| | break |
| | if len(chunk) > size: |
| | self._content.unread_data(chunk[size:]) |
| | chunk = chunk[:size] |
| |
|
| | assert self._prev_chunk is not None |
| | window = self._prev_chunk + chunk |
| | sub = b"\r\n" + self._boundary |
| | if first_chunk: |
| | idx = window.find(sub) |
| | else: |
| | idx = window.find(sub, max(0, len(self._prev_chunk) - len(sub))) |
| | if idx >= 0: |
| | |
| | with warnings.catch_warnings(): |
| | warnings.filterwarnings("ignore", category=DeprecationWarning) |
| | self._content.unread_data(window[idx:]) |
| | self._prev_chunk = self._prev_chunk[:idx] |
| | chunk = window[len(self._prev_chunk) : idx] |
| | if not chunk: |
| | self._at_eof = True |
| | result = self._prev_chunk[2 if first_chunk else 0 :] |
| | self._prev_chunk = chunk |
| | return result |
| |
|
| | async def readline(self) -> bytes: |
| | """Reads body part by line by line.""" |
| | if self._at_eof: |
| | return b"" |
| |
|
| | if self._unread: |
| | line = self._unread.popleft() |
| | else: |
| | line = await self._content.readline() |
| |
|
| | if line.startswith(self._boundary): |
| | |
| | |
| | sline = line.rstrip(b"\r\n") |
| | boundary = self._boundary |
| | last_boundary = self._boundary + b"--" |
| | |
| | if sline == boundary or sline == last_boundary: |
| | self._at_eof = True |
| | self._unread.append(line) |
| | return b"" |
| | else: |
| | next_line = await self._content.readline() |
| | if next_line.startswith(self._boundary): |
| | line = line[:-2] |
| | self._unread.append(next_line) |
| |
|
| | return line |
| |
|
| | async def release(self) -> None: |
| | """Like read(), but reads all the data to the void.""" |
| | if self._at_eof: |
| | return |
| | while not self._at_eof: |
| | await self.read_chunk(self.chunk_size) |
| |
|
| | async def text(self, *, encoding: Optional[str] = None) -> str: |
| | """Like read(), but assumes that body part contains text data.""" |
| | data = await self.read(decode=True) |
| | |
| | |
| | encoding = encoding or self.get_charset(default="utf-8") |
| | return data.decode(encoding) |
| |
|
| | async def json(self, *, encoding: Optional[str] = None) -> Optional[Dict[str, Any]]: |
| | """Like read(), but assumes that body parts contains JSON data.""" |
| | data = await self.read(decode=True) |
| | if not data: |
| | return None |
| | encoding = encoding or self.get_charset(default="utf-8") |
| | return cast(Dict[str, Any], json.loads(data.decode(encoding))) |
| |
|
| | async def form(self, *, encoding: Optional[str] = None) -> List[Tuple[str, str]]: |
| | """Like read(), but assumes that body parts contain form urlencoded data.""" |
| | data = await self.read(decode=True) |
| | if not data: |
| | return [] |
| | if encoding is not None: |
| | real_encoding = encoding |
| | else: |
| | real_encoding = self.get_charset(default="utf-8") |
| | try: |
| | decoded_data = data.rstrip().decode(real_encoding) |
| | except UnicodeDecodeError: |
| | raise ValueError("data cannot be decoded with %s encoding" % real_encoding) |
| |
|
| | return parse_qsl( |
| | decoded_data, |
| | keep_blank_values=True, |
| | encoding=real_encoding, |
| | ) |
| |
|
| | def at_eof(self) -> bool: |
| | """Returns True if the boundary was reached or False otherwise.""" |
| | return self._at_eof |
| |
|
| | async def decode(self, data: bytes) -> bytes: |
| | """Decodes data. |
| | |
| | Decoding is done according the specified Content-Encoding |
| | or Content-Transfer-Encoding headers value. |
| | """ |
| | if CONTENT_TRANSFER_ENCODING in self.headers: |
| | data = self._decode_content_transfer(data) |
| | |
| | if not self._is_form_data and CONTENT_ENCODING in self.headers: |
| | return await self._decode_content(data) |
| | return data |
| |
|
| | async def _decode_content(self, data: bytes) -> bytes: |
| | encoding = self.headers.get(CONTENT_ENCODING, "").lower() |
| | if encoding == "identity": |
| | return data |
| | if encoding in {"deflate", "gzip"}: |
| | return await ZLibDecompressor( |
| | encoding=encoding, |
| | suppress_deflate_header=True, |
| | ).decompress(data, max_length=self._max_decompress_size) |
| |
|
| | raise RuntimeError(f"unknown content encoding: {encoding}") |
| |
|
| | def _decode_content_transfer(self, data: bytes) -> bytes: |
| | encoding = self.headers.get(CONTENT_TRANSFER_ENCODING, "").lower() |
| |
|
| | if encoding == "base64": |
| | return base64.b64decode(data) |
| | elif encoding == "quoted-printable": |
| | return binascii.a2b_qp(data) |
| | elif encoding in ("binary", "8bit", "7bit"): |
| | return data |
| | else: |
| | raise RuntimeError(f"unknown content transfer encoding: {encoding}") |
| |
|
| | def get_charset(self, default: str) -> str: |
| | """Returns charset parameter from Content-Type header or default.""" |
| | ctype = self.headers.get(CONTENT_TYPE, "") |
| | mimetype = parse_mimetype(ctype) |
| | return mimetype.parameters.get("charset", self._default_charset or default) |
| |
|
| | @reify |
| | def name(self) -> Optional[str]: |
| | """Returns name specified in Content-Disposition header. |
| | |
| | If the header is missing or malformed, returns None. |
| | """ |
| | _, params = parse_content_disposition(self.headers.get(CONTENT_DISPOSITION)) |
| | return content_disposition_filename(params, "name") |
| |
|
| | @reify |
| | def filename(self) -> Optional[str]: |
| | """Returns filename specified in Content-Disposition header. |
| | |
| | Returns None if the header is missing or malformed. |
| | """ |
| | _, params = parse_content_disposition(self.headers.get(CONTENT_DISPOSITION)) |
| | return content_disposition_filename(params, "filename") |
| |
|
| |
|
| | @payload_type(BodyPartReader, order=Order.try_first) |
| | class BodyPartReaderPayload(Payload): |
| | _value: BodyPartReader |
| | |
| |
|
| | def __init__(self, value: BodyPartReader, *args: Any, **kwargs: Any) -> None: |
| | super().__init__(value, *args, **kwargs) |
| |
|
| | params: Dict[str, str] = {} |
| | if value.name is not None: |
| | params["name"] = value.name |
| | if value.filename is not None: |
| | params["filename"] = value.filename |
| |
|
| | if params: |
| | self.set_content_disposition("attachment", True, **params) |
| |
|
| | def decode(self, encoding: str = "utf-8", errors: str = "strict") -> str: |
| | raise TypeError("Unable to decode.") |
| |
|
| | async def as_bytes(self, encoding: str = "utf-8", errors: str = "strict") -> bytes: |
| | """Raises TypeError as body parts should be consumed via write(). |
| | |
| | This is intentional: BodyPartReader payloads are designed for streaming |
| | large data (potentially gigabytes) and must be consumed only once via |
| | the write() method to avoid memory exhaustion. They cannot be buffered |
| | in memory for reuse. |
| | """ |
| | raise TypeError("Unable to read body part as bytes. Use write() to consume.") |
| |
|
| | async def write(self, writer: AbstractStreamWriter) -> None: |
| | field = self._value |
| | chunk = await field.read_chunk(size=2**16) |
| | while chunk: |
| | await writer.write(await field.decode(chunk)) |
| | chunk = await field.read_chunk(size=2**16) |
| |
|
| |
|
| | class MultipartReader: |
| | """Multipart body reader.""" |
| |
|
| | |
| | response_wrapper_cls = MultipartResponseWrapper |
| | |
| | |
| | multipart_reader_cls: Optional[Type["MultipartReader"]] = None |
| | |
| | part_reader_cls = BodyPartReader |
| |
|
| | def __init__(self, headers: Mapping[str, str], content: StreamReader) -> None: |
| | self._mimetype = parse_mimetype(headers[CONTENT_TYPE]) |
| | assert self._mimetype.type == "multipart", "multipart/* content type expected" |
| | if "boundary" not in self._mimetype.parameters: |
| | raise ValueError( |
| | "boundary missed for Content-Type: %s" % headers[CONTENT_TYPE] |
| | ) |
| |
|
| | self.headers = headers |
| | self._boundary = ("--" + self._get_boundary()).encode() |
| | self._content = content |
| | self._default_charset: Optional[str] = None |
| | self._last_part: Optional[Union["MultipartReader", BodyPartReader]] = None |
| | self._at_eof = False |
| | self._at_bof = True |
| | self._unread: List[bytes] = [] |
| |
|
| | def __aiter__(self: Self) -> Self: |
| | return self |
| |
|
| | async def __anext__( |
| | self, |
| | ) -> Optional[Union["MultipartReader", BodyPartReader]]: |
| | part = await self.next() |
| | if part is None: |
| | raise StopAsyncIteration |
| | return part |
| |
|
| | @classmethod |
| | def from_response( |
| | cls, |
| | response: "ClientResponse", |
| | ) -> MultipartResponseWrapper: |
| | """Constructs reader instance from HTTP response. |
| | |
| | :param response: :class:`~aiohttp.client.ClientResponse` instance |
| | """ |
| | obj = cls.response_wrapper_cls( |
| | response, cls(response.headers, response.content) |
| | ) |
| | return obj |
| |
|
| | def at_eof(self) -> bool: |
| | """Returns True if the final boundary was reached, false otherwise.""" |
| | return self._at_eof |
| |
|
| | async def next( |
| | self, |
| | ) -> Optional[Union["MultipartReader", BodyPartReader]]: |
| | """Emits the next multipart body part.""" |
| | |
| | if self._at_eof: |
| | return None |
| | await self._maybe_release_last_part() |
| | if self._at_bof: |
| | await self._read_until_first_boundary() |
| | self._at_bof = False |
| | else: |
| | await self._read_boundary() |
| | if self._at_eof: |
| | return None |
| |
|
| | part = await self.fetch_next_part() |
| | |
| | if ( |
| | self._last_part is None |
| | and self._mimetype.subtype == "form-data" |
| | and isinstance(part, BodyPartReader) |
| | ): |
| | _, params = parse_content_disposition(part.headers.get(CONTENT_DISPOSITION)) |
| | if params.get("name") == "_charset_": |
| | |
| | |
| | charset = await part.read_chunk(32) |
| | if len(charset) > 31: |
| | raise RuntimeError("Invalid default charset") |
| | self._default_charset = charset.strip().decode() |
| | part = await self.fetch_next_part() |
| | self._last_part = part |
| | return self._last_part |
| |
|
| | async def release(self) -> None: |
| | """Reads all the body parts to the void till the final boundary.""" |
| | while not self._at_eof: |
| | item = await self.next() |
| | if item is None: |
| | break |
| | await item.release() |
| |
|
| | async def fetch_next_part( |
| | self, |
| | ) -> Union["MultipartReader", BodyPartReader]: |
| | """Returns the next body part reader.""" |
| | headers = await self._read_headers() |
| | return self._get_part_reader(headers) |
| |
|
| | def _get_part_reader( |
| | self, |
| | headers: "CIMultiDictProxy[str]", |
| | ) -> Union["MultipartReader", BodyPartReader]: |
| | """Dispatches the response by the `Content-Type` header. |
| | |
| | Returns a suitable reader instance. |
| | |
| | :param dict headers: Response headers |
| | """ |
| | ctype = headers.get(CONTENT_TYPE, "") |
| | mimetype = parse_mimetype(ctype) |
| |
|
| | if mimetype.type == "multipart": |
| | if self.multipart_reader_cls is None: |
| | return type(self)(headers, self._content) |
| | return self.multipart_reader_cls(headers, self._content) |
| | else: |
| | return self.part_reader_cls( |
| | self._boundary, |
| | headers, |
| | self._content, |
| | subtype=self._mimetype.subtype, |
| | default_charset=self._default_charset, |
| | ) |
| |
|
| | def _get_boundary(self) -> str: |
| | boundary = self._mimetype.parameters["boundary"] |
| | if len(boundary) > 70: |
| | raise ValueError("boundary %r is too long (70 chars max)" % boundary) |
| |
|
| | return boundary |
| |
|
| | async def _readline(self) -> bytes: |
| | if self._unread: |
| | return self._unread.pop() |
| | return await self._content.readline() |
| |
|
| | async def _read_until_first_boundary(self) -> None: |
| | while True: |
| | chunk = await self._readline() |
| | if chunk == b"": |
| | raise ValueError( |
| | "Could not find starting boundary %r" % (self._boundary) |
| | ) |
| | chunk = chunk.rstrip() |
| | if chunk == self._boundary: |
| | return |
| | elif chunk == self._boundary + b"--": |
| | self._at_eof = True |
| | return |
| |
|
| | async def _read_boundary(self) -> None: |
| | chunk = (await self._readline()).rstrip() |
| | if chunk == self._boundary: |
| | pass |
| | elif chunk == self._boundary + b"--": |
| | self._at_eof = True |
| | epilogue = await self._readline() |
| | next_line = await self._readline() |
| |
|
| | |
| | |
| | |
| | |
| | if next_line[:2] == b"--": |
| | self._unread.append(next_line) |
| | |
| | |
| | |
| | else: |
| | self._unread.extend([next_line, epilogue]) |
| | else: |
| | raise ValueError(f"Invalid boundary {chunk!r}, expected {self._boundary!r}") |
| |
|
| | async def _read_headers(self) -> "CIMultiDictProxy[str]": |
| | lines = [] |
| | while True: |
| | chunk = await self._content.readline() |
| | chunk = chunk.rstrip(b"\r\n") |
| | lines.append(chunk) |
| | if not chunk: |
| | break |
| | parser = HeadersParser() |
| | headers, raw_headers = parser.parse_headers(lines) |
| | return headers |
| |
|
| | async def _maybe_release_last_part(self) -> None: |
| | """Ensures that the last read body part is read completely.""" |
| | if self._last_part is not None: |
| | if not self._last_part.at_eof(): |
| | await self._last_part.release() |
| | self._unread.extend(self._last_part._unread) |
| | self._last_part = None |
| |
|
| |
|
| | _Part = Tuple[Payload, str, str] |
| |
|
| |
|
| | class MultipartWriter(Payload): |
| | """Multipart body writer.""" |
| |
|
| | _value: None |
| | |
| | _autoclose = True |
| |
|
| | def __init__(self, subtype: str = "mixed", boundary: Optional[str] = None) -> None: |
| | boundary = boundary if boundary is not None else uuid.uuid4().hex |
| | |
| | |
| | |
| | |
| |
|
| | try: |
| | self._boundary = boundary.encode("ascii") |
| | except UnicodeEncodeError: |
| | raise ValueError("boundary should contain ASCII only chars") from None |
| | ctype = f"multipart/{subtype}; boundary={self._boundary_value}" |
| |
|
| | super().__init__(None, content_type=ctype) |
| |
|
| | self._parts: List[_Part] = [] |
| | self._is_form_data = subtype == "form-data" |
| |
|
| | def __enter__(self) -> "MultipartWriter": |
| | return self |
| |
|
| | def __exit__( |
| | self, |
| | exc_type: Optional[Type[BaseException]], |
| | exc_val: Optional[BaseException], |
| | exc_tb: Optional[TracebackType], |
| | ) -> None: |
| | pass |
| |
|
| | def __iter__(self) -> Iterator[_Part]: |
| | return iter(self._parts) |
| |
|
| | def __len__(self) -> int: |
| | return len(self._parts) |
| |
|
| | def __bool__(self) -> bool: |
| | return True |
| |
|
| | _valid_tchar_regex = re.compile(rb"\A[!#$%&'*+\-.^_`|~\w]+\Z") |
| | _invalid_qdtext_char_regex = re.compile(rb"[\x00-\x08\x0A-\x1F\x7F]") |
| |
|
| | @property |
| | def _boundary_value(self) -> str: |
| | """Wrap boundary parameter value in quotes, if necessary. |
| | |
| | Reads self.boundary and returns a unicode string. |
| | """ |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | value = self._boundary |
| | if re.match(self._valid_tchar_regex, value): |
| | return value.decode("ascii") |
| |
|
| | if re.search(self._invalid_qdtext_char_regex, value): |
| | raise ValueError("boundary value contains invalid characters") |
| |
|
| | |
| | quoted_value_content = value.replace(b"\\", b"\\\\") |
| | quoted_value_content = quoted_value_content.replace(b'"', b'\\"') |
| |
|
| | return '"' + quoted_value_content.decode("ascii") + '"' |
| |
|
| | @property |
| | def boundary(self) -> str: |
| | return self._boundary.decode("ascii") |
| |
|
| | def append(self, obj: Any, headers: Optional[Mapping[str, str]] = None) -> Payload: |
| | if headers is None: |
| | headers = CIMultiDict() |
| |
|
| | if isinstance(obj, Payload): |
| | obj.headers.update(headers) |
| | return self.append_payload(obj) |
| | else: |
| | try: |
| | payload = get_payload(obj, headers=headers) |
| | except LookupError: |
| | raise TypeError("Cannot create payload from %r" % obj) |
| | else: |
| | return self.append_payload(payload) |
| |
|
| | def append_payload(self, payload: Payload) -> Payload: |
| | """Adds a new body part to multipart writer.""" |
| | encoding: Optional[str] = None |
| | te_encoding: Optional[str] = None |
| | if self._is_form_data: |
| | |
| | |
| | assert ( |
| | not {CONTENT_ENCODING, CONTENT_LENGTH, CONTENT_TRANSFER_ENCODING} |
| | & payload.headers.keys() |
| | ) |
| | |
| | if CONTENT_DISPOSITION not in payload.headers: |
| | name = f"section-{len(self._parts)}" |
| | payload.set_content_disposition("form-data", name=name) |
| | else: |
| | |
| | encoding = payload.headers.get(CONTENT_ENCODING, "").lower() |
| | if encoding and encoding not in ("deflate", "gzip", "identity"): |
| | raise RuntimeError(f"unknown content encoding: {encoding}") |
| | if encoding == "identity": |
| | encoding = None |
| |
|
| | |
| | te_encoding = payload.headers.get(CONTENT_TRANSFER_ENCODING, "").lower() |
| | if te_encoding not in ("", "base64", "quoted-printable", "binary"): |
| | raise RuntimeError(f"unknown content transfer encoding: {te_encoding}") |
| | if te_encoding == "binary": |
| | te_encoding = None |
| |
|
| | |
| | size = payload.size |
| | if size is not None and not (encoding or te_encoding): |
| | payload.headers[CONTENT_LENGTH] = str(size) |
| |
|
| | self._parts.append((payload, encoding, te_encoding)) |
| | return payload |
| |
|
| | def append_json( |
| | self, obj: Any, headers: Optional[Mapping[str, str]] = None |
| | ) -> Payload: |
| | """Helper to append JSON part.""" |
| | if headers is None: |
| | headers = CIMultiDict() |
| |
|
| | return self.append_payload(JsonPayload(obj, headers=headers)) |
| |
|
| | def append_form( |
| | self, |
| | obj: Union[Sequence[Tuple[str, str]], Mapping[str, str]], |
| | headers: Optional[Mapping[str, str]] = None, |
| | ) -> Payload: |
| | """Helper to append form urlencoded part.""" |
| | assert isinstance(obj, (Sequence, Mapping)) |
| |
|
| | if headers is None: |
| | headers = CIMultiDict() |
| |
|
| | if isinstance(obj, Mapping): |
| | obj = list(obj.items()) |
| | data = urlencode(obj, doseq=True) |
| |
|
| | return self.append_payload( |
| | StringPayload( |
| | data, headers=headers, content_type="application/x-www-form-urlencoded" |
| | ) |
| | ) |
| |
|
| | @property |
| | def size(self) -> Optional[int]: |
| | """Size of the payload.""" |
| | total = 0 |
| | for part, encoding, te_encoding in self._parts: |
| | part_size = part.size |
| | if encoding or te_encoding or part_size is None: |
| | return None |
| |
|
| | total += int( |
| | 2 |
| | + len(self._boundary) |
| | + 2 |
| | + part_size |
| | + len(part._binary_headers) |
| | + 2 |
| | ) |
| |
|
| | total += 2 + len(self._boundary) + 4 |
| | return total |
| |
|
| | def decode(self, encoding: str = "utf-8", errors: str = "strict") -> str: |
| | """Return string representation of the multipart data. |
| | |
| | WARNING: This method may do blocking I/O if parts contain file payloads. |
| | It should not be called in the event loop. Use as_bytes().decode() instead. |
| | """ |
| | return "".join( |
| | "--" |
| | + self.boundary |
| | + "\r\n" |
| | + part._binary_headers.decode(encoding, errors) |
| | + part.decode() |
| | for part, _e, _te in self._parts |
| | ) |
| |
|
| | async def as_bytes(self, encoding: str = "utf-8", errors: str = "strict") -> bytes: |
| | """Return bytes representation of the multipart data. |
| | |
| | This method is async-safe and calls as_bytes on underlying payloads. |
| | """ |
| | parts: List[bytes] = [] |
| |
|
| | |
| | for part, _e, _te in self._parts: |
| | |
| | parts.append(b"--" + self._boundary + b"\r\n") |
| |
|
| | |
| | parts.append(part._binary_headers) |
| |
|
| | |
| | part_bytes = await part.as_bytes(encoding, errors) |
| | parts.append(part_bytes) |
| |
|
| | |
| | parts.append(b"\r\n") |
| |
|
| | |
| | parts.append(b"--" + self._boundary + b"--\r\n") |
| |
|
| | return b"".join(parts) |
| |
|
| | async def write( |
| | self, writer: AbstractStreamWriter, close_boundary: bool = True |
| | ) -> None: |
| | """Write body.""" |
| | for part, encoding, te_encoding in self._parts: |
| | if self._is_form_data: |
| | |
| | assert CONTENT_DISPOSITION in part.headers |
| | assert "name=" in part.headers[CONTENT_DISPOSITION] |
| |
|
| | await writer.write(b"--" + self._boundary + b"\r\n") |
| | await writer.write(part._binary_headers) |
| |
|
| | if encoding or te_encoding: |
| | w = MultipartPayloadWriter(writer) |
| | if encoding: |
| | w.enable_compression(encoding) |
| | if te_encoding: |
| | w.enable_encoding(te_encoding) |
| | await part.write(w) |
| | await w.write_eof() |
| | else: |
| | await part.write(writer) |
| |
|
| | await writer.write(b"\r\n") |
| |
|
| | if close_boundary: |
| | await writer.write(b"--" + self._boundary + b"--\r\n") |
| |
|
| | async def close(self) -> None: |
| | """ |
| | Close all part payloads that need explicit closing. |
| | |
| | IMPORTANT: This method must not await anything that might not finish |
| | immediately, as it may be called during cleanup/cancellation. Schedule |
| | any long-running operations without awaiting them. |
| | """ |
| | if self._consumed: |
| | return |
| | self._consumed = True |
| |
|
| | |
| | |
| | |
| | |
| | for idx, (part, _, _) in enumerate(self._parts): |
| | if not part.autoclose and not part.consumed: |
| | try: |
| | await part.close() |
| | except Exception as exc: |
| | internal_logger.error( |
| | "Failed to close multipart part %d: %s", idx, exc, exc_info=True |
| | ) |
| |
|
| |
|
| | class MultipartPayloadWriter: |
| | def __init__(self, writer: AbstractStreamWriter) -> None: |
| | self._writer = writer |
| | self._encoding: Optional[str] = None |
| | self._compress: Optional[ZLibCompressor] = None |
| | self._encoding_buffer: Optional[bytearray] = None |
| |
|
| | def enable_encoding(self, encoding: str) -> None: |
| | if encoding == "base64": |
| | self._encoding = encoding |
| | self._encoding_buffer = bytearray() |
| | elif encoding == "quoted-printable": |
| | self._encoding = "quoted-printable" |
| |
|
| | def enable_compression( |
| | self, encoding: str = "deflate", strategy: Optional[int] = None |
| | ) -> None: |
| | self._compress = ZLibCompressor( |
| | encoding=encoding, |
| | suppress_deflate_header=True, |
| | strategy=strategy, |
| | ) |
| |
|
| | async def write_eof(self) -> None: |
| | if self._compress is not None: |
| | chunk = self._compress.flush() |
| | if chunk: |
| | self._compress = None |
| | await self.write(chunk) |
| |
|
| | if self._encoding == "base64": |
| | if self._encoding_buffer: |
| | await self._writer.write(base64.b64encode(self._encoding_buffer)) |
| |
|
| | async def write(self, chunk: bytes) -> None: |
| | if self._compress is not None: |
| | if chunk: |
| | chunk = await self._compress.compress(chunk) |
| | if not chunk: |
| | return |
| |
|
| | if self._encoding == "base64": |
| | buf = self._encoding_buffer |
| | assert buf is not None |
| | buf.extend(chunk) |
| |
|
| | if buf: |
| | div, mod = divmod(len(buf), 3) |
| | enc_chunk, self._encoding_buffer = (buf[: div * 3], buf[div * 3 :]) |
| | if enc_chunk: |
| | b64chunk = base64.b64encode(enc_chunk) |
| | await self._writer.write(b64chunk) |
| | elif self._encoding == "quoted-printable": |
| | await self._writer.write(binascii.b2a_qp(chunk)) |
| | else: |
| | await self._writer.write(chunk) |
| |
|