Sam Chaudry

Upload folder using huggingface_hub

7885a28 verified 12 months ago

8.11 kB

	from __future__ import annotations

	from collections.abc import (
	Hashable,
	Iterable,
	MutableMapping,
	Sequence,
	)
	from typing import (
	TYPE_CHECKING,
	Any,
	Callable,
	Literal,
	TypeVar,
	overload,
	)

	from pandas.compat._optional import import_optional_dependency

	from pandas.core.dtypes.common import (
	is_integer,
	is_list_like,
	)

	if TYPE_CHECKING:
	from pandas.io.excel._base import ExcelWriter

	ExcelWriter_t = type[ExcelWriter]
	usecols_func = TypeVar("usecols_func", bound=Callable[[Hashable], object])

	_writers: MutableMapping[str, ExcelWriter_t] = {}


	def register_writer(klass: ExcelWriter_t) -> None:
	"""
	Add engine to the excel writer registry.io.excel.

	You must use this method to integrate with ``to_excel``.

	Parameters
	----------
	klass : ExcelWriter
	"""
	if not callable(klass):
	raise ValueError("Can only register callables as engines")
	engine_name = klass._engine
	_writers[engine_name] = klass


	def get_default_engine(ext: str, mode: Literal["reader", "writer"] = "reader") -> str:
	"""
	Return the default reader/writer for the given extension.

	Parameters
	----------
	ext : str
	The excel file extension for which to get the default engine.
	mode : str {'reader', 'writer'}
	Whether to get the default engine for reading or writing.
	Either 'reader' or 'writer'

	Returns
	-------
	str
	The default engine for the extension.
	"""
	_default_readers = {
	"xlsx": "openpyxl",
	"xlsm": "openpyxl",
	"xlsb": "pyxlsb",
	"xls": "xlrd",
	"ods": "odf",
	}
	_default_writers = {
	"xlsx": "openpyxl",
	"xlsm": "openpyxl",
	"xlsb": "pyxlsb",
	"ods": "odf",
	}
	assert mode in ["reader", "writer"]
	if mode == "writer":
	# Prefer xlsxwriter over openpyxl if installed
	xlsxwriter = import_optional_dependency("xlsxwriter", errors="warn")
	if xlsxwriter:
	_default_writers["xlsx"] = "xlsxwriter"
	return _default_writers[ext]
	else:
	return _default_readers[ext]


	def get_writer(engine_name: str) -> ExcelWriter_t:
	try:
	return _writers[engine_name]
	except KeyError as err:
	raise ValueError(f"No Excel writer '{engine_name}'") from err


	def _excel2num(x: str) -> int:
	"""
	Convert Excel column name like 'AB' to 0-based column index.

	Parameters
	----------
	x : str
	The Excel column name to convert to a 0-based column index.

	Returns
	-------
	num : int
	The column index corresponding to the name.

	Raises
	------
	ValueError
	Part of the Excel column name was invalid.
	"""
	index = 0

	for c in x.upper().strip():
	cp = ord(c)

	if cp < ord("A") or cp > ord("Z"):
	raise ValueError(f"Invalid column name: {x}")

	index = index * 26 + cp - ord("A") + 1

	return index - 1


	def _range2cols(areas: str) -> list[int]:
	"""
	Convert comma separated list of column names and ranges to indices.

	Parameters
	----------
	areas : str
	A string containing a sequence of column ranges (or areas).

	Returns
	-------
	cols : list
	A list of 0-based column indices.

	Examples
	--------
	>>> _range2cols('A:E')
	[0, 1, 2, 3, 4]
	>>> _range2cols('A,C,Z:AB')
	[0, 2, 25, 26, 27]
	"""
	cols: list[int] = []

	for rng in areas.split(","):
	if ":" in rng:
	rngs = rng.split(":")
	cols.extend(range(_excel2num(rngs[0]), _excel2num(rngs[1]) + 1))
	else:
	cols.append(_excel2num(rng))

	return cols


	@overload
	def maybe_convert_usecols(usecols: str \| list[int]) -> list[int]:
	...


	@overload
	def maybe_convert_usecols(usecols: list[str]) -> list[str]:
	...


	@overload
	def maybe_convert_usecols(usecols: usecols_func) -> usecols_func:
	...


	@overload
	def maybe_convert_usecols(usecols: None) -> None:
	...


	def maybe_convert_usecols(
	usecols: str \| list[int] \| list[str] \| usecols_func \| None,
	) -> None \| list[int] \| list[str] \| usecols_func:
	"""
	Convert `usecols` into a compatible format for parsing in `parsers.py`.

	Parameters
	----------
	usecols : object
	The use-columns object to potentially convert.

	Returns
	-------
	converted : object
	The compatible format of `usecols`.
	"""
	if usecols is None:
	return usecols

	if is_integer(usecols):
	raise ValueError(
	"Passing an integer for `usecols` is no longer supported. "
	"Please pass in a list of int from 0 to `usecols` inclusive instead."
	)

	if isinstance(usecols, str):
	return _range2cols(usecols)

	return usecols


	@overload
	def validate_freeze_panes(freeze_panes: tuple[int, int]) -> Literal[True]:
	...


	@overload
	def validate_freeze_panes(freeze_panes: None) -> Literal[False]:
	...


	def validate_freeze_panes(freeze_panes: tuple[int, int] \| None) -> bool:
	if freeze_panes is not None:
	if len(freeze_panes) == 2 and all(
	isinstance(item, int) for item in freeze_panes
	):
	return True

	raise ValueError(
	"freeze_panes must be of form (row, column) "
	"where row and column are integers"
	)

	# freeze_panes wasn't specified, return False so it won't be applied
	# to output sheet
	return False


	def fill_mi_header(
	row: list[Hashable], control_row: list[bool]
	) -> tuple[list[Hashable], list[bool]]:
	"""
	Forward fill blank entries in row but only inside the same parent index.

	Used for creating headers in Multiindex.

	Parameters
	----------
	row : list
	List of items in a single row.
	control_row : list of bool
	Helps to determine if particular column is in same parent index as the
	previous value. Used to stop propagation of empty cells between
	different indexes.

	Returns
	-------
	Returns changed row and control_row
	"""
	last = row[0]
	for i in range(1, len(row)):
	if not control_row[i]:
	last = row[i]

	if row[i] == "" or row[i] is None:
	row[i] = last
	else:
	control_row[i] = False
	last = row[i]

	return row, control_row


	def pop_header_name(
	row: list[Hashable], index_col: int \| Sequence[int]
	) -> tuple[Hashable \| None, list[Hashable]]:
	"""
	Pop the header name for MultiIndex parsing.

	Parameters
	----------
	row : list
	The data row to parse for the header name.
	index_col : int, list
	The index columns for our data. Assumed to be non-null.

	Returns
	-------
	header_name : str
	The extracted header name.
	trimmed_row : list
	The original data row with the header name removed.
	"""
	# Pop out header name and fill w/blank.
	if is_list_like(index_col):
	assert isinstance(index_col, Iterable)
	i = max(index_col)
	else:
	assert not isinstance(index_col, Iterable)
	i = index_col

	header_name = row[i]
	header_name = None if header_name == "" else header_name

	return header_name, row[:i] + [""] + row[i + 1 :]


	def combine_kwargs(engine_kwargs: dict[str, Any] \| None, kwargs: dict) -> dict:
	"""
	Used to combine two sources of kwargs for the backend engine.

	Use of kwargs is deprecated, this function is solely for use in 1.3 and should
	be removed in 1.4/2.0. Also _base.ExcelWriter.__new__ ensures either engine_kwargs
	or kwargs must be None or empty respectively.

	Parameters
	----------
	engine_kwargs: dict
	kwargs to be passed through to the engine.
	kwargs: dict
	kwargs to be psased through to the engine (deprecated)

	Returns
	-------
	engine_kwargs combined with kwargs
	"""
	if engine_kwargs is None:
	result = {}
	else:
	result = engine_kwargs.copy()
	result.update(kwargs)
	return result