File size: 17,655 Bytes
a366dd4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 | """
Methods that can be shared by many array-like classes or subclasses:
Series
Index
ExtensionArray
"""
from __future__ import annotations
import operator
from typing import Any
import numpy as np
from pandas._libs import lib
from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op
from pandas.core.dtypes.generic import ABCNDFrame
from pandas.core import roperator
from pandas.core.construction import extract_array
from pandas.core.ops.common import unpack_zerodim_and_defer
REDUCTION_ALIASES = {
"maximum": "max",
"minimum": "min",
"add": "sum",
"multiply": "prod",
}
class OpsMixin:
# -------------------------------------------------------------
# Comparisons
def _cmp_method(self, other, op):
return NotImplemented
@unpack_zerodim_and_defer("__eq__")
def __eq__(self, other):
return self._cmp_method(other, operator.eq)
@unpack_zerodim_and_defer("__ne__")
def __ne__(self, other):
return self._cmp_method(other, operator.ne)
@unpack_zerodim_and_defer("__lt__")
def __lt__(self, other):
return self._cmp_method(other, operator.lt)
@unpack_zerodim_and_defer("__le__")
def __le__(self, other):
return self._cmp_method(other, operator.le)
@unpack_zerodim_and_defer("__gt__")
def __gt__(self, other):
return self._cmp_method(other, operator.gt)
@unpack_zerodim_and_defer("__ge__")
def __ge__(self, other):
return self._cmp_method(other, operator.ge)
# -------------------------------------------------------------
# Logical Methods
def _logical_method(self, other, op):
return NotImplemented
@unpack_zerodim_and_defer("__and__")
def __and__(self, other):
return self._logical_method(other, operator.and_)
@unpack_zerodim_and_defer("__rand__")
def __rand__(self, other):
return self._logical_method(other, roperator.rand_)
@unpack_zerodim_and_defer("__or__")
def __or__(self, other):
return self._logical_method(other, operator.or_)
@unpack_zerodim_and_defer("__ror__")
def __ror__(self, other):
return self._logical_method(other, roperator.ror_)
@unpack_zerodim_and_defer("__xor__")
def __xor__(self, other):
return self._logical_method(other, operator.xor)
@unpack_zerodim_and_defer("__rxor__")
def __rxor__(self, other):
return self._logical_method(other, roperator.rxor)
# -------------------------------------------------------------
# Arithmetic Methods
def _arith_method(self, other, op):
return NotImplemented
@unpack_zerodim_and_defer("__add__")
def __add__(self, other):
"""
Get Addition of DataFrame and other, column-wise.
Equivalent to ``DataFrame.add(other)``.
Parameters
----------
other : scalar, sequence, Series, dict or DataFrame
Object to be added to the DataFrame.
Returns
-------
DataFrame
The result of adding ``other`` to DataFrame.
See Also
--------
DataFrame.add : Add a DataFrame and another object, with option for index-
or column-oriented addition.
Examples
--------
>>> df = pd.DataFrame({'height': [1.5, 2.6], 'weight': [500, 800]},
... index=['elk', 'moose'])
>>> df
height weight
elk 1.5 500
moose 2.6 800
Adding a scalar affects all rows and columns.
>>> df[['height', 'weight']] + 1.5
height weight
elk 3.0 501.5
moose 4.1 801.5
Each element of a list is added to a column of the DataFrame, in order.
>>> df[['height', 'weight']] + [0.5, 1.5]
height weight
elk 2.0 501.5
moose 3.1 801.5
Keys of a dictionary are aligned to the DataFrame, based on column names;
each value in the dictionary is added to the corresponding column.
>>> df[['height', 'weight']] + {'height': 0.5, 'weight': 1.5}
height weight
elk 2.0 501.5
moose 3.1 801.5
When `other` is a :class:`Series`, the index of `other` is aligned with the
columns of the DataFrame.
>>> s1 = pd.Series([0.5, 1.5], index=['weight', 'height'])
>>> df[['height', 'weight']] + s1
height weight
elk 3.0 500.5
moose 4.1 800.5
Even when the index of `other` is the same as the index of the DataFrame,
the :class:`Series` will not be reoriented. If index-wise alignment is desired,
:meth:`DataFrame.add` should be used with `axis='index'`.
>>> s2 = pd.Series([0.5, 1.5], index=['elk', 'moose'])
>>> df[['height', 'weight']] + s2
elk height moose weight
elk NaN NaN NaN NaN
moose NaN NaN NaN NaN
>>> df[['height', 'weight']].add(s2, axis='index')
height weight
elk 2.0 500.5
moose 4.1 801.5
When `other` is a :class:`DataFrame`, both columns names and the
index are aligned.
>>> other = pd.DataFrame({'height': [0.2, 0.4, 0.6]},
... index=['elk', 'moose', 'deer'])
>>> df[['height', 'weight']] + other
height weight
deer NaN NaN
elk 1.7 NaN
moose 3.0 NaN
"""
return self._arith_method(other, operator.add)
@unpack_zerodim_and_defer("__radd__")
def __radd__(self, other):
return self._arith_method(other, roperator.radd)
@unpack_zerodim_and_defer("__sub__")
def __sub__(self, other):
return self._arith_method(other, operator.sub)
@unpack_zerodim_and_defer("__rsub__")
def __rsub__(self, other):
return self._arith_method(other, roperator.rsub)
@unpack_zerodim_and_defer("__mul__")
def __mul__(self, other):
return self._arith_method(other, operator.mul)
@unpack_zerodim_and_defer("__rmul__")
def __rmul__(self, other):
return self._arith_method(other, roperator.rmul)
@unpack_zerodim_and_defer("__truediv__")
def __truediv__(self, other):
return self._arith_method(other, operator.truediv)
@unpack_zerodim_and_defer("__rtruediv__")
def __rtruediv__(self, other):
return self._arith_method(other, roperator.rtruediv)
@unpack_zerodim_and_defer("__floordiv__")
def __floordiv__(self, other):
return self._arith_method(other, operator.floordiv)
@unpack_zerodim_and_defer("__rfloordiv")
def __rfloordiv__(self, other):
return self._arith_method(other, roperator.rfloordiv)
@unpack_zerodim_and_defer("__mod__")
def __mod__(self, other):
return self._arith_method(other, operator.mod)
@unpack_zerodim_and_defer("__rmod__")
def __rmod__(self, other):
return self._arith_method(other, roperator.rmod)
@unpack_zerodim_and_defer("__divmod__")
def __divmod__(self, other):
return self._arith_method(other, divmod)
@unpack_zerodim_and_defer("__rdivmod__")
def __rdivmod__(self, other):
return self._arith_method(other, roperator.rdivmod)
@unpack_zerodim_and_defer("__pow__")
def __pow__(self, other):
return self._arith_method(other, operator.pow)
@unpack_zerodim_and_defer("__rpow__")
def __rpow__(self, other):
return self._arith_method(other, roperator.rpow)
# -----------------------------------------------------------------------------
# Helpers to implement __array_ufunc__
def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any):
"""
Compatibility with numpy ufuncs.
See also
--------
numpy.org/doc/stable/reference/arrays.classes.html#numpy.class.__array_ufunc__
"""
from pandas.core.frame import (
DataFrame,
Series,
)
from pandas.core.generic import NDFrame
from pandas.core.internals import (
ArrayManager,
BlockManager,
)
cls = type(self)
kwargs = _standardize_out_kwarg(**kwargs)
# for binary ops, use our custom dunder methods
result = maybe_dispatch_ufunc_to_dunder_op(self, ufunc, method, *inputs, **kwargs)
if result is not NotImplemented:
return result
# Determine if we should defer.
no_defer = (
np.ndarray.__array_ufunc__,
cls.__array_ufunc__,
)
for item in inputs:
higher_priority = (
hasattr(item, "__array_priority__")
and item.__array_priority__ > self.__array_priority__
)
has_array_ufunc = (
hasattr(item, "__array_ufunc__")
and type(item).__array_ufunc__ not in no_defer
and not isinstance(item, self._HANDLED_TYPES)
)
if higher_priority or has_array_ufunc:
return NotImplemented
# align all the inputs.
types = tuple(type(x) for x in inputs)
alignable = [x for x, t in zip(inputs, types) if issubclass(t, NDFrame)]
if len(alignable) > 1:
# This triggers alignment.
# At the moment, there aren't any ufuncs with more than two inputs
# so this ends up just being x1.index | x2.index, but we write
# it to handle *args.
set_types = set(types)
if len(set_types) > 1 and {DataFrame, Series}.issubset(set_types):
# We currently don't handle ufunc(DataFrame, Series)
# well. Previously this raised an internal ValueError. We might
# support it someday, so raise a NotImplementedError.
raise NotImplementedError(
f"Cannot apply ufunc {ufunc} to mixed DataFrame and Series inputs."
)
axes = self.axes
for obj in alignable[1:]:
# this relies on the fact that we aren't handling mixed
# series / frame ufuncs.
for i, (ax1, ax2) in enumerate(zip(axes, obj.axes)):
axes[i] = ax1.union(ax2)
reconstruct_axes = dict(zip(self._AXIS_ORDERS, axes))
inputs = tuple(
x.reindex(**reconstruct_axes) if issubclass(t, NDFrame) else x
for x, t in zip(inputs, types)
)
else:
reconstruct_axes = dict(zip(self._AXIS_ORDERS, self.axes))
if self.ndim == 1:
names = [getattr(x, "name") for x in inputs if hasattr(x, "name")]
name = names[0] if len(set(names)) == 1 else None
reconstruct_kwargs = {"name": name}
else:
reconstruct_kwargs = {}
def reconstruct(result):
if ufunc.nout > 1:
# np.modf, np.frexp, np.divmod
return tuple(_reconstruct(x) for x in result)
return _reconstruct(result)
def _reconstruct(result):
if lib.is_scalar(result):
return result
if result.ndim != self.ndim:
if method == "outer":
raise NotImplementedError
return result
if isinstance(result, (BlockManager, ArrayManager)):
# we went through BlockManager.apply e.g. np.sqrt
result = self._constructor_from_mgr(result, axes=result.axes)
else:
# we converted an array, lost our axes
result = self._constructor(
result, **reconstruct_axes, **reconstruct_kwargs, copy=False
)
# TODO: When we support multiple values in __finalize__, this
# should pass alignable to `__finalize__` instead of self.
# Then `np.add(a, b)` would consider attrs from both a and b
# when a and b are NDFrames.
if len(alignable) == 1:
result = result.__finalize__(self)
return result
if "out" in kwargs:
# e.g. test_multiindex_get_loc
result = dispatch_ufunc_with_out(self, ufunc, method, *inputs, **kwargs)
return reconstruct(result)
if method == "reduce":
# e.g. test.series.test_ufunc.test_reduce
result = dispatch_reduction_ufunc(self, ufunc, method, *inputs, **kwargs)
if result is not NotImplemented:
return result
# We still get here with kwargs `axis` for e.g. np.maximum.accumulate
# and `dtype` and `keepdims` for np.ptp
if self.ndim > 1 and (len(inputs) > 1 or ufunc.nout > 1):
# Just give up on preserving types in the complex case.
# In theory we could preserve them for them.
# * nout>1 is doable if BlockManager.apply took nout and
# returned a Tuple[BlockManager].
# * len(inputs) > 1 is doable when we know that we have
# aligned blocks / dtypes.
# e.g. my_ufunc, modf, logaddexp, heaviside, subtract, add
inputs = tuple(np.asarray(x) for x in inputs)
# Note: we can't use default_array_ufunc here bc reindexing means
# that `self` may not be among `inputs`
result = getattr(ufunc, method)(*inputs, **kwargs)
elif self.ndim == 1:
# ufunc(series, ...)
inputs = tuple(extract_array(x, extract_numpy=True) for x in inputs)
result = getattr(ufunc, method)(*inputs, **kwargs)
else:
# ufunc(dataframe)
if method == "__call__" and not kwargs:
# for np.<ufunc>(..) calls
# kwargs cannot necessarily be handled block-by-block, so only
# take this path if there are no kwargs
mgr = inputs[0]._mgr
result = mgr.apply(getattr(ufunc, method))
else:
# otherwise specific ufunc methods (eg np.<ufunc>.accumulate(..))
# Those can have an axis keyword and thus can't be called block-by-block
result = default_array_ufunc(inputs[0], ufunc, method, *inputs, **kwargs)
# e.g. np.negative (only one reached), with "where" and "out" in kwargs
result = reconstruct(result)
return result
def _standardize_out_kwarg(**kwargs) -> dict:
"""
If kwargs contain "out1" and "out2", replace that with a tuple "out"
np.divmod, np.modf, np.frexp can have either `out=(out1, out2)` or
`out1=out1, out2=out2)`
"""
if "out" not in kwargs and "out1" in kwargs and "out2" in kwargs:
out1 = kwargs.pop("out1")
out2 = kwargs.pop("out2")
out = (out1, out2)
kwargs["out"] = out
return kwargs
def dispatch_ufunc_with_out(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
"""
If we have an `out` keyword, then call the ufunc without `out` and then
set the result into the given `out`.
"""
# Note: we assume _standardize_out_kwarg has already been called.
out = kwargs.pop("out")
where = kwargs.pop("where", None)
result = getattr(ufunc, method)(*inputs, **kwargs)
if result is NotImplemented:
return NotImplemented
if isinstance(result, tuple):
# i.e. np.divmod, np.modf, np.frexp
if not isinstance(out, tuple) or len(out) != len(result):
raise NotImplementedError
for arr, res in zip(out, result):
_assign_where(arr, res, where)
return out
if isinstance(out, tuple):
if len(out) == 1:
out = out[0]
else:
raise NotImplementedError
_assign_where(out, result, where)
return out
def _assign_where(out, result, where) -> None:
"""
Set a ufunc result into 'out', masking with a 'where' argument if necessary.
"""
if where is None:
# no 'where' arg passed to ufunc
out[:] = result
else:
np.putmask(out, where, result)
def default_array_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
"""
Fallback to the behavior we would get if we did not define __array_ufunc__.
Notes
-----
We are assuming that `self` is among `inputs`.
"""
if not any(x is self for x in inputs):
raise NotImplementedError
new_inputs = [x if x is not self else np.asarray(x) for x in inputs]
return getattr(ufunc, method)(*new_inputs, **kwargs)
def dispatch_reduction_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
"""
Dispatch ufunc reductions to self's reduction methods.
"""
assert method == "reduce"
if len(inputs) != 1 or inputs[0] is not self:
return NotImplemented
if ufunc.__name__ not in REDUCTION_ALIASES:
return NotImplemented
method_name = REDUCTION_ALIASES[ufunc.__name__]
# NB: we are assuming that min/max represent minimum/maximum methods,
# which would not be accurate for e.g. Timestamp.min
if not hasattr(self, method_name):
return NotImplemented
if self.ndim > 1:
if isinstance(self, ABCNDFrame):
# TODO: test cases where this doesn't hold, i.e. 2D DTA/TDA
kwargs["numeric_only"] = False
if "axis" not in kwargs:
# For DataFrame reductions we don't want the default axis=0
# Note: np.min is not a ufunc, but uses array_function_dispatch,
# so calls DataFrame.min (without ever getting here) with the np.min
# default of axis=None, which DataFrame.min catches and changes to axis=0.
# np.minimum.reduce(df) gets here bc axis is not in kwargs,
# so we set axis=0 to match the behaviorof np.minimum.reduce(df.values)
kwargs["axis"] = 0
# By default, numpy's reductions do not skip NaNs, so we have to
# pass skipna=False
return getattr(self, method_name)(skipna=False, **kwargs)
|