Upload folder using huggingface_hub

a06facb verified almost 2 years ago

5.47 kB

	import asyncio

	import aiohttp
	import aiohttp.client_exceptions
	import wrapt
	from botocore.response import (
	IncompleteReadError,
	ReadTimeoutError,
	ResponseStreamingError,
	)

	from aiobotocore import parsers


	class AioReadTimeoutError(ReadTimeoutError, asyncio.TimeoutError):
	pass


	class StreamingBody(wrapt.ObjectProxy):
	"""Wrapper class for an http response body.

	This provides a few additional conveniences that do not exist
	in the urllib3 model:

	* Auto validation of content length, if the amount of bytes
	we read does not match the content length, an exception
	is raised.
	"""

	_DEFAULT_CHUNK_SIZE = 1024

	def __init__(self, raw_stream: aiohttp.StreamReader, content_length: str):
	super().__init__(raw_stream)
	self._self_content_length = content_length
	self._self_amount_read = 0

	# https://github.com/GrahamDumpleton/wrapt/issues/73
	async def __aenter__(self):
	return await self.__wrapped__.__aenter__()

	async def __aexit__(self, exc_type, exc_val, exc_tb):
	return await self.__wrapped__.__aexit__(exc_type, exc_val, exc_tb)

	# NOTE: set_socket_timeout was only for when requests didn't support
	# read timeouts, so not needed
	def readable(self):
	return not self.at_eof()

	async def read(self, amt=None):
	"""Read at most amt bytes from the stream.

	If the amt argument is omitted, read all data.
	"""
	# botocore to aiohttp mapping
	try:
	chunk = await self.__wrapped__.content.read(
	amt if amt is not None else -1
	)
	except asyncio.TimeoutError as e:
	raise AioReadTimeoutError(
	endpoint_url=self.__wrapped__.url, error=e
	)
	except aiohttp.client_exceptions.ClientConnectionError as e:
	raise ResponseStreamingError(error=e)

	self._self_amount_read += len(chunk)
	if amt is None or (not chunk and amt > 0):
	# If the server sends empty contents or
	# we ask to read all of the contents, then we know
	# we need to verify the content length.
	self._verify_content_length()
	return chunk

	async def readlines(self):
	# assuming this is not an iterator
	lines = [line async for line in self.iter_lines()]
	return lines

	def __aiter__(self):
	"""Return an iterator to yield 1k chunks from the raw stream."""
	return self.iter_chunks(self._DEFAULT_CHUNK_SIZE)

	async def __anext__(self):
	"""Return the next 1k chunk from the raw stream."""
	current_chunk = await self.read(self._DEFAULT_CHUNK_SIZE)
	if current_chunk:
	return current_chunk
	raise StopAsyncIteration

	anext = __anext__

	async def iter_lines(self, chunk_size=_DEFAULT_CHUNK_SIZE, keepends=False):
	"""Return an iterator to yield lines from the raw stream.

	This is achieved by reading chunk of bytes (of size chunk_size) at a
	time from the raw stream, and then yielding lines from there.
	"""
	pending = b''
	async for chunk in self.iter_chunks(chunk_size):
	lines = (pending + chunk).splitlines(True)
	for line in lines[:-1]:
	yield line.splitlines(keepends)[0]
	pending = lines[-1]
	if pending:
	yield pending.splitlines(keepends)[0]

	async def iter_chunks(self, chunk_size=_DEFAULT_CHUNK_SIZE):
	"""Return an iterator to yield chunks of chunk_size bytes from the raw
	stream.
	"""
	while True:
	current_chunk = await self.read(chunk_size)
	if current_chunk == b"":
	break
	yield current_chunk

	def _verify_content_length(self):
	# See: https://github.com/kennethreitz/requests/issues/1855
	# Basically, our http library doesn't do this for us, so we have
	# to do this our self.
	if (
	self._self_content_length is not None
	and self._self_amount_read != int(self._self_content_length)
	):
	raise IncompleteReadError(
	actual_bytes=self._self_amount_read,
	expected_bytes=int(self._self_content_length),
	)

	def tell(self):
	return self._self_amount_read


	async def get_response(operation_model, http_response):
	protocol = operation_model.metadata['protocol']
	response_dict = {
	'headers': http_response.headers,
	'status_code': http_response.status_code,
	}
	# TODO: Unfortunately, we have to have error logic here.
	# If it looks like an error, in the streaming response case we
	# need to actually grab the contents.
	if response_dict['status_code'] >= 300:
	response_dict['body'] = await http_response.content
	elif operation_model.has_streaming_output:
	response_dict['body'] = StreamingBody(
	http_response.raw, response_dict['headers'].get('content-length')
	)
	else:
	response_dict['body'] = await http_response.content

	parser = parsers.create_parser(protocol)
	if asyncio.iscoroutinefunction(parser.parse):
	parsed = await parser.parse(
	response_dict, operation_model.output_shape
	)
	else:
	parsed = parser.parse(response_dict, operation_model.output_shape)
	return http_response, parsed