6.43 kB

	from __future__ import annotations

	import codecs
	import re
	import typing as t
	import urllib.parse
	from urllib.parse import quote
	from urllib.parse import unquote
	from urllib.parse import urlencode
	from urllib.parse import urlsplit
	from urllib.parse import urlunsplit

	from .datastructures import iter_multi_items


	def _codec_error_url_quote(e: UnicodeError) -> tuple[str, int]:
	"""Used in :func:`uri_to_iri` after unquoting to re-quote any
	invalid bytes.
	"""
	# the docs state that UnicodeError does have these attributes,
	# but mypy isn't picking them up
	out = quote(e.object[e.start : e.end], safe="") # type: ignore
	return out, e.end # type: ignore


	codecs.register_error("werkzeug.url_quote", _codec_error_url_quote)


	def _make_unquote_part(name: str, chars: str) -> t.Callable[[str], str]:
	"""Create a function that unquotes all percent encoded characters except those
	given. This allows working with unquoted characters if possible while not changing
	the meaning of a given part of a URL.
	"""
	choices = "\|".join(f"{ord(c):02X}" for c in sorted(chars))
	pattern = re.compile(f"((?:%(?:{choices}))+)", re.I)

	def _unquote_partial(value: str) -> str:
	parts = iter(pattern.split(value))
	out = []

	for part in parts:
	out.append(unquote(part, "utf-8", "werkzeug.url_quote"))
	out.append(next(parts, ""))

	return "".join(out)

	_unquote_partial.__name__ = f"_unquote_{name}"
	return _unquote_partial


	# characters that should remain quoted in URL parts
	# based on https://url.spec.whatwg.org/#percent-encoded-bytes
	# always keep all controls, space, and % quoted
	_always_unsafe = bytes((*range(0x21), 0x25, 0x7F)).decode()
	_unquote_fragment = _make_unquote_part("fragment", _always_unsafe)
	_unquote_query = _make_unquote_part("query", _always_unsafe + "&=+#")
	_unquote_path = _make_unquote_part("path", _always_unsafe + "/?#")
	_unquote_user = _make_unquote_part("user", _always_unsafe + ":@/?#")


	def uri_to_iri(uri: str) -> str:
	"""Convert a URI to an IRI. All valid UTF-8 characters are unquoted,
	leaving all reserved and invalid characters quoted. If the URL has
	a domain, it is decoded from Punycode.

	>>> uri_to_iri("http://xn--n3h.net/p%C3%A5th?q=%C3%A8ry%DF")
	'http://\\u2603.net/p\\xe5th?q=\\xe8ry%DF'

	:param uri: The URI to convert.

	.. versionchanged:: 3.0
	Passing a tuple or bytes, and the ``charset`` and ``errors`` parameters,
	are removed.

	.. versionchanged:: 2.3
	Which characters remain quoted is specific to each part of the URL.

	.. versionchanged:: 0.15
	All reserved and invalid characters remain quoted. Previously,
	only some reserved characters were preserved, and invalid bytes
	were replaced instead of left quoted.

	.. versionadded:: 0.6
	"""
	parts = urlsplit(uri)
	path = _unquote_path(parts.path)
	query = _unquote_query(parts.query)
	fragment = _unquote_fragment(parts.fragment)

	if parts.hostname:
	netloc = _decode_idna(parts.hostname)
	else:
	netloc = ""

	if ":" in netloc:
	netloc = f"[{netloc}]"

	if parts.port:
	netloc = f"{netloc}:{parts.port}"

	if parts.username:
	auth = _unquote_user(parts.username)

	if parts.password:
	password = _unquote_user(parts.password)
	auth = f"{auth}:{password}"

	netloc = f"{auth}@{netloc}"

	return urlunsplit((parts.scheme, netloc, path, query, fragment))


	def iri_to_uri(iri: str) -> str:
	"""Convert an IRI to a URI. All non-ASCII and unsafe characters are
	quoted. If the URL has a domain, it is encoded to Punycode.

	>>> iri_to_uri('http://\\u2603.net/p\\xe5th?q=\\xe8ry%DF')
	'http://xn--n3h.net/p%C3%A5th?q=%C3%A8ry%DF'

	:param iri: The IRI to convert.

	.. versionchanged:: 3.0
	Passing a tuple or bytes, the ``charset`` and ``errors`` parameters,
	and the ``safe_conversion`` parameter, are removed.

	.. versionchanged:: 2.3
	Which characters remain unquoted is specific to each part of the URL.

	.. versionchanged:: 0.15
	All reserved characters remain unquoted. Previously, only some reserved
	characters were left unquoted.

	.. versionchanged:: 0.9.6
	The ``safe_conversion`` parameter was added.

	.. versionadded:: 0.6
	"""
	parts = urlsplit(iri)
	# safe = https://url.spec.whatwg.org/#url-path-segment-string
	# as well as percent for things that are already quoted
	path = quote(parts.path, safe="%!$&'()*+,/:;=@")
	query = quote(parts.query, safe="%!$&'()*+,/:;=?@")
	fragment = quote(parts.fragment, safe="%!#$&'()*+,/:;=?@")

	if parts.hostname:
	netloc = parts.hostname.encode("idna").decode("ascii")
	else:
	netloc = ""

	if ":" in netloc:
	netloc = f"[{netloc}]"

	if parts.port:
	netloc = f"{netloc}:{parts.port}"

	if parts.username:
	auth = quote(parts.username, safe="%!$&'()*+,;=")

	if parts.password:
	password = quote(parts.password, safe="%!$&'()*+,;=")
	auth = f"{auth}:{password}"

	netloc = f"{auth}@{netloc}"

	return urlunsplit((parts.scheme, netloc, path, query, fragment))


	# Python < 3.12
	# itms-services was worked around in previous iri_to_uri implementations, but
	# we can tell Python directly that it needs to preserve the //.
	if "itms-services" not in urllib.parse.uses_netloc:
	urllib.parse.uses_netloc.append("itms-services")


	def _decode_idna(domain: str) -> str:
	try:
	data = domain.encode("ascii")
	except UnicodeEncodeError:
	# If the domain is not ASCII, it's decoded already.
	return domain

	try:
	# Try decoding in one shot.
	return data.decode("idna")
	except UnicodeDecodeError:
	pass

	# Decode each part separately, leaving invalid parts as punycode.
	parts = []

	for part in data.split(b"."):
	try:
	parts.append(part.decode("idna"))
	except UnicodeDecodeError:
	parts.append(part.decode("ascii"))

	return ".".join(parts)


	def _urlencode(query: t.Mapping[str, str] \| t.Iterable[tuple[str, str]]) -> str:
	items = [x for x in iter_multi_items(query) if x[1] is not None]
	# safe = https://url.spec.whatwg.org/#percent-encoded-bytes
	return urlencode(items, safe="!$'()*,/:;?@")

Xet Storage Details

Size:: 6.43 kB
Xet hash:: 7bb14c557a218bf4ac373a58e71b68bfa272c9d81cd74047ae85fc2a6de91d2c

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.