| """ |
| Module that contains many useful utilities |
| for validating data or function arguments |
| """ |
| from __future__ import annotations |
|
|
| from collections.abc import ( |
| Iterable, |
| Sequence, |
| ) |
| from typing import ( |
| TypeVar, |
| overload, |
| ) |
|
|
| import numpy as np |
|
|
| from pandas._libs import lib |
|
|
| from pandas.core.dtypes.common import ( |
| is_bool, |
| is_integer, |
| ) |
|
|
| BoolishT = TypeVar("BoolishT", bool, int) |
| BoolishNoneT = TypeVar("BoolishNoneT", bool, int, None) |
|
|
|
|
| def _check_arg_length(fname, args, max_fname_arg_count, compat_args) -> None: |
| """ |
| Checks whether 'args' has length of at most 'compat_args'. Raises |
| a TypeError if that is not the case, similar to in Python when a |
| function is called with too many arguments. |
| """ |
| if max_fname_arg_count < 0: |
| raise ValueError("'max_fname_arg_count' must be non-negative") |
|
|
| if len(args) > len(compat_args): |
| max_arg_count = len(compat_args) + max_fname_arg_count |
| actual_arg_count = len(args) + max_fname_arg_count |
| argument = "argument" if max_arg_count == 1 else "arguments" |
|
|
| raise TypeError( |
| f"{fname}() takes at most {max_arg_count} {argument} " |
| f"({actual_arg_count} given)" |
| ) |
|
|
|
|
| def _check_for_default_values(fname, arg_val_dict, compat_args) -> None: |
| """ |
| Check that the keys in `arg_val_dict` are mapped to their |
| default values as specified in `compat_args`. |
| |
| Note that this function is to be called only when it has been |
| checked that arg_val_dict.keys() is a subset of compat_args |
| """ |
| for key in arg_val_dict: |
| |
| |
| |
| try: |
| v1 = arg_val_dict[key] |
| v2 = compat_args[key] |
|
|
| |
| |
| if (v1 is not None and v2 is None) or (v1 is None and v2 is not None): |
| match = False |
| else: |
| match = v1 == v2 |
|
|
| if not is_bool(match): |
| raise ValueError("'match' is not a boolean") |
|
|
| |
| |
| except ValueError: |
| match = arg_val_dict[key] is compat_args[key] |
|
|
| if not match: |
| raise ValueError( |
| f"the '{key}' parameter is not supported in " |
| f"the pandas implementation of {fname}()" |
| ) |
|
|
|
|
| def validate_args(fname, args, max_fname_arg_count, compat_args) -> None: |
| """ |
| Checks whether the length of the `*args` argument passed into a function |
| has at most `len(compat_args)` arguments and whether or not all of these |
| elements in `args` are set to their default values. |
| |
| Parameters |
| ---------- |
| fname : str |
| The name of the function being passed the `*args` parameter |
| args : tuple |
| The `*args` parameter passed into a function |
| max_fname_arg_count : int |
| The maximum number of arguments that the function `fname` |
| can accept, excluding those in `args`. Used for displaying |
| appropriate error messages. Must be non-negative. |
| compat_args : dict |
| A dictionary of keys and their associated default values. |
| In order to accommodate buggy behaviour in some versions of `numpy`, |
| where a signature displayed keyword arguments but then passed those |
| arguments **positionally** internally when calling downstream |
| implementations, a dict ensures that the original |
| order of the keyword arguments is enforced. |
| |
| Raises |
| ------ |
| TypeError |
| If `args` contains more values than there are `compat_args` |
| ValueError |
| If `args` contains values that do not correspond to those |
| of the default values specified in `compat_args` |
| """ |
| _check_arg_length(fname, args, max_fname_arg_count, compat_args) |
|
|
| |
| |
| |
| kwargs = dict(zip(compat_args, args)) |
| _check_for_default_values(fname, kwargs, compat_args) |
|
|
|
|
| def _check_for_invalid_keys(fname, kwargs, compat_args) -> None: |
| """ |
| Checks whether 'kwargs' contains any keys that are not |
| in 'compat_args' and raises a TypeError if there is one. |
| """ |
| |
| diff = set(kwargs) - set(compat_args) |
|
|
| if diff: |
| bad_arg = next(iter(diff)) |
| raise TypeError(f"{fname}() got an unexpected keyword argument '{bad_arg}'") |
|
|
|
|
| def validate_kwargs(fname, kwargs, compat_args) -> None: |
| """ |
| Checks whether parameters passed to the **kwargs argument in a |
| function `fname` are valid parameters as specified in `*compat_args` |
| and whether or not they are set to their default values. |
| |
| Parameters |
| ---------- |
| fname : str |
| The name of the function being passed the `**kwargs` parameter |
| kwargs : dict |
| The `**kwargs` parameter passed into `fname` |
| compat_args: dict |
| A dictionary of keys that `kwargs` is allowed to have and their |
| associated default values |
| |
| Raises |
| ------ |
| TypeError if `kwargs` contains keys not in `compat_args` |
| ValueError if `kwargs` contains keys in `compat_args` that do not |
| map to the default values specified in `compat_args` |
| """ |
| kwds = kwargs.copy() |
| _check_for_invalid_keys(fname, kwargs, compat_args) |
| _check_for_default_values(fname, kwds, compat_args) |
|
|
|
|
| def validate_args_and_kwargs( |
| fname, args, kwargs, max_fname_arg_count, compat_args |
| ) -> None: |
| """ |
| Checks whether parameters passed to the *args and **kwargs argument in a |
| function `fname` are valid parameters as specified in `*compat_args` |
| and whether or not they are set to their default values. |
| |
| Parameters |
| ---------- |
| fname: str |
| The name of the function being passed the `**kwargs` parameter |
| args: tuple |
| The `*args` parameter passed into a function |
| kwargs: dict |
| The `**kwargs` parameter passed into `fname` |
| max_fname_arg_count: int |
| The minimum number of arguments that the function `fname` |
| requires, excluding those in `args`. Used for displaying |
| appropriate error messages. Must be non-negative. |
| compat_args: dict |
| A dictionary of keys that `kwargs` is allowed to |
| have and their associated default values. |
| |
| Raises |
| ------ |
| TypeError if `args` contains more values than there are |
| `compat_args` OR `kwargs` contains keys not in `compat_args` |
| ValueError if `args` contains values not at the default value (`None`) |
| `kwargs` contains keys in `compat_args` that do not map to the default |
| value as specified in `compat_args` |
| |
| See Also |
| -------- |
| validate_args : Purely args validation. |
| validate_kwargs : Purely kwargs validation. |
| |
| """ |
| |
| |
| _check_arg_length( |
| fname, args + tuple(kwargs.values()), max_fname_arg_count, compat_args |
| ) |
|
|
| |
| |
| args_dict = dict(zip(compat_args, args)) |
|
|
| for key in args_dict: |
| if key in kwargs: |
| raise TypeError( |
| f"{fname}() got multiple values for keyword argument '{key}'" |
| ) |
|
|
| kwargs.update(args_dict) |
| validate_kwargs(fname, kwargs, compat_args) |
|
|
|
|
| def validate_bool_kwarg( |
| value: BoolishNoneT, |
| arg_name: str, |
| none_allowed: bool = True, |
| int_allowed: bool = False, |
| ) -> BoolishNoneT: |
| """ |
| Ensure that argument passed in arg_name can be interpreted as boolean. |
| |
| Parameters |
| ---------- |
| value : bool |
| Value to be validated. |
| arg_name : str |
| Name of the argument. To be reflected in the error message. |
| none_allowed : bool, default True |
| Whether to consider None to be a valid boolean. |
| int_allowed : bool, default False |
| Whether to consider integer value to be a valid boolean. |
| |
| Returns |
| ------- |
| value |
| The same value as input. |
| |
| Raises |
| ------ |
| ValueError |
| If the value is not a valid boolean. |
| """ |
| good_value = is_bool(value) |
| if none_allowed: |
| good_value = good_value or (value is None) |
|
|
| if int_allowed: |
| good_value = good_value or isinstance(value, int) |
|
|
| if not good_value: |
| raise ValueError( |
| f'For argument "{arg_name}" expected type bool, received ' |
| f"type {type(value).__name__}." |
| ) |
| return value |
|
|
|
|
| def validate_fillna_kwargs(value, method, validate_scalar_dict_value: bool = True): |
| """ |
| Validate the keyword arguments to 'fillna'. |
| |
| This checks that exactly one of 'value' and 'method' is specified. |
| If 'method' is specified, this validates that it's a valid method. |
| |
| Parameters |
| ---------- |
| value, method : object |
| The 'value' and 'method' keyword arguments for 'fillna'. |
| validate_scalar_dict_value : bool, default True |
| Whether to validate that 'value' is a scalar or dict. Specifically, |
| validate that it is not a list or tuple. |
| |
| Returns |
| ------- |
| value, method : object |
| """ |
| from pandas.core.missing import clean_fill_method |
|
|
| if value is None and method is None: |
| raise ValueError("Must specify a fill 'value' or 'method'.") |
| if value is None and method is not None: |
| method = clean_fill_method(method) |
|
|
| elif value is not None and method is None: |
| if validate_scalar_dict_value and isinstance(value, (list, tuple)): |
| raise TypeError( |
| '"value" parameter must be a scalar or dict, but ' |
| f'you passed a "{type(value).__name__}"' |
| ) |
|
|
| elif value is not None and method is not None: |
| raise ValueError("Cannot specify both 'value' and 'method'.") |
|
|
| return value, method |
|
|
|
|
| def validate_percentile(q: float | Iterable[float]) -> np.ndarray: |
| """ |
| Validate percentiles (used by describe and quantile). |
| |
| This function checks if the given float or iterable of floats is a valid percentile |
| otherwise raises a ValueError. |
| |
| Parameters |
| ---------- |
| q: float or iterable of floats |
| A single percentile or an iterable of percentiles. |
| |
| Returns |
| ------- |
| ndarray |
| An ndarray of the percentiles if valid. |
| |
| Raises |
| ------ |
| ValueError if percentiles are not in given interval([0, 1]). |
| """ |
| q_arr = np.asarray(q) |
| |
| |
| msg = "percentiles should all be in the interval [0, 1]" |
| if q_arr.ndim == 0: |
| if not 0 <= q_arr <= 1: |
| raise ValueError(msg) |
| else: |
| if not all(0 <= qs <= 1 for qs in q_arr): |
| raise ValueError(msg) |
| return q_arr |
|
|
|
|
| @overload |
| def validate_ascending(ascending: BoolishT) -> BoolishT: |
| ... |
|
|
|
|
| @overload |
| def validate_ascending(ascending: Sequence[BoolishT]) -> list[BoolishT]: |
| ... |
|
|
|
|
| def validate_ascending( |
| ascending: bool | int | Sequence[BoolishT], |
| ) -> bool | int | list[BoolishT]: |
| """Validate ``ascending`` kwargs for ``sort_index`` method.""" |
| kwargs = {"none_allowed": False, "int_allowed": True} |
| if not isinstance(ascending, Sequence): |
| return validate_bool_kwarg(ascending, "ascending", **kwargs) |
|
|
| return [validate_bool_kwarg(item, "ascending", **kwargs) for item in ascending] |
|
|
|
|
| def validate_endpoints(closed: str | None) -> tuple[bool, bool]: |
| """ |
| Check that the `closed` argument is among [None, "left", "right"] |
| |
| Parameters |
| ---------- |
| closed : {None, "left", "right"} |
| |
| Returns |
| ------- |
| left_closed : bool |
| right_closed : bool |
| |
| Raises |
| ------ |
| ValueError : if argument is not among valid values |
| """ |
| left_closed = False |
| right_closed = False |
|
|
| if closed is None: |
| left_closed = True |
| right_closed = True |
| elif closed == "left": |
| left_closed = True |
| elif closed == "right": |
| right_closed = True |
| else: |
| raise ValueError("Closed has to be either 'left', 'right' or None") |
|
|
| return left_closed, right_closed |
|
|
|
|
| def validate_inclusive(inclusive: str | None) -> tuple[bool, bool]: |
| """ |
| Check that the `inclusive` argument is among {"both", "neither", "left", "right"}. |
| |
| Parameters |
| ---------- |
| inclusive : {"both", "neither", "left", "right"} |
| |
| Returns |
| ------- |
| left_right_inclusive : tuple[bool, bool] |
| |
| Raises |
| ------ |
| ValueError : if argument is not among valid values |
| """ |
| left_right_inclusive: tuple[bool, bool] | None = None |
|
|
| if isinstance(inclusive, str): |
| left_right_inclusive = { |
| "both": (True, True), |
| "left": (True, False), |
| "right": (False, True), |
| "neither": (False, False), |
| }.get(inclusive) |
|
|
| if left_right_inclusive is None: |
| raise ValueError( |
| "Inclusive has to be either 'both', 'neither', 'left' or 'right'" |
| ) |
|
|
| return left_right_inclusive |
|
|
|
|
| def validate_insert_loc(loc: int, length: int) -> int: |
| """ |
| Check that we have an integer between -length and length, inclusive. |
| |
| Standardize negative loc to within [0, length]. |
| |
| The exceptions we raise on failure match np.insert. |
| """ |
| if not is_integer(loc): |
| raise TypeError(f"loc must be an integer between -{length} and {length}") |
|
|
| if loc < 0: |
| loc += length |
| if not 0 <= loc <= length: |
| raise IndexError(f"loc must be an integer between -{length} and {length}") |
| return loc |
|
|
|
|
| def check_dtype_backend(dtype_backend) -> None: |
| if dtype_backend is not lib.no_default: |
| if dtype_backend not in ["numpy_nullable", "pyarrow"]: |
| raise ValueError( |
| f"dtype_backend {dtype_backend} is invalid, only 'numpy_nullable' and " |
| f"'pyarrow' are allowed.", |
| ) |
|
|