Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/_testing/__pycache__/_warnings.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/_testing/__pycache__/contexts.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/arrays/__pycache__/__init__.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/arrays/__init__.py +43 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/arrays/_arrow_string_mixins.py +84 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/arrays/_mixins.py +547 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/arrays/_ranges.py +207 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/arrays/base.py +2588 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/arrays/boolean.py +407 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/arrays/categorical.py +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/arrays/datetimelike.py +2556 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/arrays/datetimes.py +2820 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/arrays/numeric.py +286 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/arrays/numpy_.py +563 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/arrays/period.py +1313 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/arrays/string_.py +657 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/arrays/string_arrow.py +719 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/arrays/timedeltas.py +1185 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/internals/__pycache__/__init__.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/internals/__pycache__/api.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/internals/__pycache__/array_manager.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/internals/__pycache__/base.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/internals/__pycache__/blocks.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/internals/__pycache__/concat.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/internals/__pycache__/construction.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/internals/__pycache__/managers.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/internals/__pycache__/ops.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/methods/__pycache__/__init__.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/methods/__pycache__/describe.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/methods/__pycache__/selectn.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/methods/__pycache__/to_dict.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/ops/__pycache__/__init__.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/ops/__pycache__/array_ops.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/ops/__pycache__/common.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/ops/__pycache__/dispatch.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/ops/__pycache__/docstrings.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/ops/__pycache__/invalid.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/ops/__pycache__/mask_ops.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/ops/__pycache__/missing.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/tools/__pycache__/datetimes.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/tools/__pycache__/numeric.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/tools/__pycache__/timedeltas.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/window/__pycache__/online.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/errors/__pycache__/__init__.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/io/clipboard/__pycache__/__init__.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/io/excel/__pycache__/__init__.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/io/excel/__pycache__/_base.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/io/excel/__pycache__/_calamine.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/io/excel/__pycache__/_odfreader.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/io/excel/__pycache__/_odswriter.cpython-312.pyc +0 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/_testing/__pycache__/_warnings.cpython-312.pyc
ADDED
|
Binary file (8.6 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/_testing/__pycache__/contexts.cpython-312.pyc
ADDED
|
Binary file (8.22 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/arrays/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (1.39 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/arrays/__init__.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pandas.core.arrays.arrow import ArrowExtensionArray
|
| 2 |
+
from pandas.core.arrays.base import (
|
| 3 |
+
ExtensionArray,
|
| 4 |
+
ExtensionOpsMixin,
|
| 5 |
+
ExtensionScalarOpsMixin,
|
| 6 |
+
)
|
| 7 |
+
from pandas.core.arrays.boolean import BooleanArray
|
| 8 |
+
from pandas.core.arrays.categorical import Categorical
|
| 9 |
+
from pandas.core.arrays.datetimes import DatetimeArray
|
| 10 |
+
from pandas.core.arrays.floating import FloatingArray
|
| 11 |
+
from pandas.core.arrays.integer import IntegerArray
|
| 12 |
+
from pandas.core.arrays.interval import IntervalArray
|
| 13 |
+
from pandas.core.arrays.masked import BaseMaskedArray
|
| 14 |
+
from pandas.core.arrays.numpy_ import NumpyExtensionArray
|
| 15 |
+
from pandas.core.arrays.period import (
|
| 16 |
+
PeriodArray,
|
| 17 |
+
period_array,
|
| 18 |
+
)
|
| 19 |
+
from pandas.core.arrays.sparse import SparseArray
|
| 20 |
+
from pandas.core.arrays.string_ import StringArray
|
| 21 |
+
from pandas.core.arrays.string_arrow import ArrowStringArray
|
| 22 |
+
from pandas.core.arrays.timedeltas import TimedeltaArray
|
| 23 |
+
|
| 24 |
+
__all__ = [
|
| 25 |
+
"ArrowExtensionArray",
|
| 26 |
+
"ExtensionArray",
|
| 27 |
+
"ExtensionOpsMixin",
|
| 28 |
+
"ExtensionScalarOpsMixin",
|
| 29 |
+
"ArrowStringArray",
|
| 30 |
+
"BaseMaskedArray",
|
| 31 |
+
"BooleanArray",
|
| 32 |
+
"Categorical",
|
| 33 |
+
"DatetimeArray",
|
| 34 |
+
"FloatingArray",
|
| 35 |
+
"IntegerArray",
|
| 36 |
+
"IntervalArray",
|
| 37 |
+
"NumpyExtensionArray",
|
| 38 |
+
"PeriodArray",
|
| 39 |
+
"period_array",
|
| 40 |
+
"SparseArray",
|
| 41 |
+
"StringArray",
|
| 42 |
+
"TimedeltaArray",
|
| 43 |
+
]
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/arrays/_arrow_string_mixins.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from typing import Literal
|
| 4 |
+
|
| 5 |
+
import numpy as np
|
| 6 |
+
|
| 7 |
+
from pandas.compat import pa_version_under10p1
|
| 8 |
+
|
| 9 |
+
if not pa_version_under10p1:
|
| 10 |
+
import pyarrow as pa
|
| 11 |
+
import pyarrow.compute as pc
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class ArrowStringArrayMixin:
|
| 15 |
+
_pa_array = None
|
| 16 |
+
|
| 17 |
+
def __init__(self, *args, **kwargs) -> None:
|
| 18 |
+
raise NotImplementedError
|
| 19 |
+
|
| 20 |
+
def _str_pad(
|
| 21 |
+
self,
|
| 22 |
+
width: int,
|
| 23 |
+
side: Literal["left", "right", "both"] = "left",
|
| 24 |
+
fillchar: str = " ",
|
| 25 |
+
):
|
| 26 |
+
if side == "left":
|
| 27 |
+
pa_pad = pc.utf8_lpad
|
| 28 |
+
elif side == "right":
|
| 29 |
+
pa_pad = pc.utf8_rpad
|
| 30 |
+
elif side == "both":
|
| 31 |
+
pa_pad = pc.utf8_center
|
| 32 |
+
else:
|
| 33 |
+
raise ValueError(
|
| 34 |
+
f"Invalid side: {side}. Side must be one of 'left', 'right', 'both'"
|
| 35 |
+
)
|
| 36 |
+
return type(self)(pa_pad(self._pa_array, width=width, padding=fillchar))
|
| 37 |
+
|
| 38 |
+
def _str_get(self, i: int):
|
| 39 |
+
lengths = pc.utf8_length(self._pa_array)
|
| 40 |
+
if i >= 0:
|
| 41 |
+
out_of_bounds = pc.greater_equal(i, lengths)
|
| 42 |
+
start = i
|
| 43 |
+
stop = i + 1
|
| 44 |
+
step = 1
|
| 45 |
+
else:
|
| 46 |
+
out_of_bounds = pc.greater(-i, lengths)
|
| 47 |
+
start = i
|
| 48 |
+
stop = i - 1
|
| 49 |
+
step = -1
|
| 50 |
+
not_out_of_bounds = pc.invert(out_of_bounds.fill_null(True))
|
| 51 |
+
selected = pc.utf8_slice_codeunits(
|
| 52 |
+
self._pa_array, start=start, stop=stop, step=step
|
| 53 |
+
)
|
| 54 |
+
null_value = pa.scalar(
|
| 55 |
+
None, type=self._pa_array.type # type: ignore[attr-defined]
|
| 56 |
+
)
|
| 57 |
+
result = pc.if_else(not_out_of_bounds, selected, null_value)
|
| 58 |
+
return type(self)(result)
|
| 59 |
+
|
| 60 |
+
def _str_slice_replace(
|
| 61 |
+
self, start: int | None = None, stop: int | None = None, repl: str | None = None
|
| 62 |
+
):
|
| 63 |
+
if repl is None:
|
| 64 |
+
repl = ""
|
| 65 |
+
if start is None:
|
| 66 |
+
start = 0
|
| 67 |
+
if stop is None:
|
| 68 |
+
stop = np.iinfo(np.int64).max
|
| 69 |
+
return type(self)(pc.utf8_replace_slice(self._pa_array, start, stop, repl))
|
| 70 |
+
|
| 71 |
+
def _str_capitalize(self):
|
| 72 |
+
return type(self)(pc.utf8_capitalize(self._pa_array))
|
| 73 |
+
|
| 74 |
+
def _str_title(self):
|
| 75 |
+
return type(self)(pc.utf8_title(self._pa_array))
|
| 76 |
+
|
| 77 |
+
def _str_swapcase(self):
|
| 78 |
+
return type(self)(pc.utf8_swapcase(self._pa_array))
|
| 79 |
+
|
| 80 |
+
def _str_removesuffix(self, suffix: str):
|
| 81 |
+
ends_with = pc.ends_with(self._pa_array, pattern=suffix)
|
| 82 |
+
removed = pc.utf8_slice_codeunits(self._pa_array, 0, stop=-len(suffix))
|
| 83 |
+
result = pc.if_else(ends_with, removed, self._pa_array)
|
| 84 |
+
return type(self)(result)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/arrays/_mixins.py
ADDED
|
@@ -0,0 +1,547 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from functools import wraps
|
| 4 |
+
from typing import (
|
| 5 |
+
TYPE_CHECKING,
|
| 6 |
+
Any,
|
| 7 |
+
Literal,
|
| 8 |
+
cast,
|
| 9 |
+
overload,
|
| 10 |
+
)
|
| 11 |
+
|
| 12 |
+
import numpy as np
|
| 13 |
+
|
| 14 |
+
from pandas._libs import lib
|
| 15 |
+
from pandas._libs.arrays import NDArrayBacked
|
| 16 |
+
from pandas._libs.tslibs import is_supported_dtype
|
| 17 |
+
from pandas._typing import (
|
| 18 |
+
ArrayLike,
|
| 19 |
+
AxisInt,
|
| 20 |
+
Dtype,
|
| 21 |
+
F,
|
| 22 |
+
FillnaOptions,
|
| 23 |
+
PositionalIndexer2D,
|
| 24 |
+
PositionalIndexerTuple,
|
| 25 |
+
ScalarIndexer,
|
| 26 |
+
Self,
|
| 27 |
+
SequenceIndexer,
|
| 28 |
+
Shape,
|
| 29 |
+
TakeIndexer,
|
| 30 |
+
npt,
|
| 31 |
+
)
|
| 32 |
+
from pandas.errors import AbstractMethodError
|
| 33 |
+
from pandas.util._decorators import doc
|
| 34 |
+
from pandas.util._validators import (
|
| 35 |
+
validate_bool_kwarg,
|
| 36 |
+
validate_fillna_kwargs,
|
| 37 |
+
validate_insert_loc,
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
from pandas.core.dtypes.common import pandas_dtype
|
| 41 |
+
from pandas.core.dtypes.dtypes import (
|
| 42 |
+
DatetimeTZDtype,
|
| 43 |
+
ExtensionDtype,
|
| 44 |
+
PeriodDtype,
|
| 45 |
+
)
|
| 46 |
+
from pandas.core.dtypes.missing import array_equivalent
|
| 47 |
+
|
| 48 |
+
from pandas.core import missing
|
| 49 |
+
from pandas.core.algorithms import (
|
| 50 |
+
take,
|
| 51 |
+
unique,
|
| 52 |
+
value_counts_internal as value_counts,
|
| 53 |
+
)
|
| 54 |
+
from pandas.core.array_algos.quantile import quantile_with_mask
|
| 55 |
+
from pandas.core.array_algos.transforms import shift
|
| 56 |
+
from pandas.core.arrays.base import ExtensionArray
|
| 57 |
+
from pandas.core.construction import extract_array
|
| 58 |
+
from pandas.core.indexers import check_array_indexer
|
| 59 |
+
from pandas.core.sorting import nargminmax
|
| 60 |
+
|
| 61 |
+
if TYPE_CHECKING:
|
| 62 |
+
from collections.abc import Sequence
|
| 63 |
+
|
| 64 |
+
from pandas._typing import (
|
| 65 |
+
NumpySorter,
|
| 66 |
+
NumpyValueArrayLike,
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
from pandas import Series
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def ravel_compat(meth: F) -> F:
|
| 73 |
+
"""
|
| 74 |
+
Decorator to ravel a 2D array before passing it to a cython operation,
|
| 75 |
+
then reshape the result to our own shape.
|
| 76 |
+
"""
|
| 77 |
+
|
| 78 |
+
@wraps(meth)
|
| 79 |
+
def method(self, *args, **kwargs):
|
| 80 |
+
if self.ndim == 1:
|
| 81 |
+
return meth(self, *args, **kwargs)
|
| 82 |
+
|
| 83 |
+
flags = self._ndarray.flags
|
| 84 |
+
flat = self.ravel("K")
|
| 85 |
+
result = meth(flat, *args, **kwargs)
|
| 86 |
+
order = "F" if flags.f_contiguous else "C"
|
| 87 |
+
return result.reshape(self.shape, order=order)
|
| 88 |
+
|
| 89 |
+
return cast(F, method)
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
class NDArrayBackedExtensionArray(NDArrayBacked, ExtensionArray):
|
| 93 |
+
"""
|
| 94 |
+
ExtensionArray that is backed by a single NumPy ndarray.
|
| 95 |
+
"""
|
| 96 |
+
|
| 97 |
+
_ndarray: np.ndarray
|
| 98 |
+
|
| 99 |
+
# scalar used to denote NA value inside our self._ndarray, e.g. -1
|
| 100 |
+
# for Categorical, iNaT for Period. Outside of object dtype,
|
| 101 |
+
# self.isna() should be exactly locations in self._ndarray with
|
| 102 |
+
# _internal_fill_value.
|
| 103 |
+
_internal_fill_value: Any
|
| 104 |
+
|
| 105 |
+
def _box_func(self, x):
|
| 106 |
+
"""
|
| 107 |
+
Wrap numpy type in our dtype.type if necessary.
|
| 108 |
+
"""
|
| 109 |
+
return x
|
| 110 |
+
|
| 111 |
+
def _validate_scalar(self, value):
|
| 112 |
+
# used by NDArrayBackedExtensionIndex.insert
|
| 113 |
+
raise AbstractMethodError(self)
|
| 114 |
+
|
| 115 |
+
# ------------------------------------------------------------------------
|
| 116 |
+
|
| 117 |
+
def view(self, dtype: Dtype | None = None) -> ArrayLike:
|
| 118 |
+
# We handle datetime64, datetime64tz, timedelta64, and period
|
| 119 |
+
# dtypes here. Everything else we pass through to the underlying
|
| 120 |
+
# ndarray.
|
| 121 |
+
if dtype is None or dtype is self.dtype:
|
| 122 |
+
return self._from_backing_data(self._ndarray)
|
| 123 |
+
|
| 124 |
+
if isinstance(dtype, type):
|
| 125 |
+
# we sometimes pass non-dtype objects, e.g np.ndarray;
|
| 126 |
+
# pass those through to the underlying ndarray
|
| 127 |
+
return self._ndarray.view(dtype)
|
| 128 |
+
|
| 129 |
+
dtype = pandas_dtype(dtype)
|
| 130 |
+
arr = self._ndarray
|
| 131 |
+
|
| 132 |
+
if isinstance(dtype, PeriodDtype):
|
| 133 |
+
cls = dtype.construct_array_type()
|
| 134 |
+
return cls(arr.view("i8"), dtype=dtype)
|
| 135 |
+
elif isinstance(dtype, DatetimeTZDtype):
|
| 136 |
+
dt_cls = dtype.construct_array_type()
|
| 137 |
+
dt64_values = arr.view(f"M8[{dtype.unit}]")
|
| 138 |
+
return dt_cls._simple_new(dt64_values, dtype=dtype)
|
| 139 |
+
elif lib.is_np_dtype(dtype, "M") and is_supported_dtype(dtype):
|
| 140 |
+
from pandas.core.arrays import DatetimeArray
|
| 141 |
+
|
| 142 |
+
dt64_values = arr.view(dtype)
|
| 143 |
+
return DatetimeArray._simple_new(dt64_values, dtype=dtype)
|
| 144 |
+
|
| 145 |
+
elif lib.is_np_dtype(dtype, "m") and is_supported_dtype(dtype):
|
| 146 |
+
from pandas.core.arrays import TimedeltaArray
|
| 147 |
+
|
| 148 |
+
td64_values = arr.view(dtype)
|
| 149 |
+
return TimedeltaArray._simple_new(td64_values, dtype=dtype)
|
| 150 |
+
|
| 151 |
+
# error: Argument "dtype" to "view" of "_ArrayOrScalarCommon" has incompatible
|
| 152 |
+
# type "Union[ExtensionDtype, dtype[Any]]"; expected "Union[dtype[Any], None,
|
| 153 |
+
# type, _SupportsDType, str, Union[Tuple[Any, int], Tuple[Any, Union[int,
|
| 154 |
+
# Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]]"
|
| 155 |
+
return arr.view(dtype=dtype) # type: ignore[arg-type]
|
| 156 |
+
|
| 157 |
+
def take(
|
| 158 |
+
self,
|
| 159 |
+
indices: TakeIndexer,
|
| 160 |
+
*,
|
| 161 |
+
allow_fill: bool = False,
|
| 162 |
+
fill_value: Any = None,
|
| 163 |
+
axis: AxisInt = 0,
|
| 164 |
+
) -> Self:
|
| 165 |
+
if allow_fill:
|
| 166 |
+
fill_value = self._validate_scalar(fill_value)
|
| 167 |
+
|
| 168 |
+
new_data = take(
|
| 169 |
+
self._ndarray,
|
| 170 |
+
indices,
|
| 171 |
+
allow_fill=allow_fill,
|
| 172 |
+
fill_value=fill_value,
|
| 173 |
+
axis=axis,
|
| 174 |
+
)
|
| 175 |
+
return self._from_backing_data(new_data)
|
| 176 |
+
|
| 177 |
+
# ------------------------------------------------------------------------
|
| 178 |
+
|
| 179 |
+
def equals(self, other) -> bool:
|
| 180 |
+
if type(self) is not type(other):
|
| 181 |
+
return False
|
| 182 |
+
if self.dtype != other.dtype:
|
| 183 |
+
return False
|
| 184 |
+
return bool(array_equivalent(self._ndarray, other._ndarray, dtype_equal=True))
|
| 185 |
+
|
| 186 |
+
@classmethod
|
| 187 |
+
def _from_factorized(cls, values, original):
|
| 188 |
+
assert values.dtype == original._ndarray.dtype
|
| 189 |
+
return original._from_backing_data(values)
|
| 190 |
+
|
| 191 |
+
def _values_for_argsort(self) -> np.ndarray:
|
| 192 |
+
return self._ndarray
|
| 193 |
+
|
| 194 |
+
def _values_for_factorize(self):
|
| 195 |
+
return self._ndarray, self._internal_fill_value
|
| 196 |
+
|
| 197 |
+
def _hash_pandas_object(
|
| 198 |
+
self, *, encoding: str, hash_key: str, categorize: bool
|
| 199 |
+
) -> npt.NDArray[np.uint64]:
|
| 200 |
+
from pandas.core.util.hashing import hash_array
|
| 201 |
+
|
| 202 |
+
values = self._ndarray
|
| 203 |
+
return hash_array(
|
| 204 |
+
values, encoding=encoding, hash_key=hash_key, categorize=categorize
|
| 205 |
+
)
|
| 206 |
+
|
| 207 |
+
# Signature of "argmin" incompatible with supertype "ExtensionArray"
|
| 208 |
+
def argmin(self, axis: AxisInt = 0, skipna: bool = True): # type: ignore[override]
|
| 209 |
+
# override base class by adding axis keyword
|
| 210 |
+
validate_bool_kwarg(skipna, "skipna")
|
| 211 |
+
if not skipna and self._hasna:
|
| 212 |
+
raise NotImplementedError
|
| 213 |
+
return nargminmax(self, "argmin", axis=axis)
|
| 214 |
+
|
| 215 |
+
# Signature of "argmax" incompatible with supertype "ExtensionArray"
|
| 216 |
+
def argmax(self, axis: AxisInt = 0, skipna: bool = True): # type: ignore[override]
|
| 217 |
+
# override base class by adding axis keyword
|
| 218 |
+
validate_bool_kwarg(skipna, "skipna")
|
| 219 |
+
if not skipna and self._hasna:
|
| 220 |
+
raise NotImplementedError
|
| 221 |
+
return nargminmax(self, "argmax", axis=axis)
|
| 222 |
+
|
| 223 |
+
def unique(self) -> Self:
|
| 224 |
+
new_data = unique(self._ndarray)
|
| 225 |
+
return self._from_backing_data(new_data)
|
| 226 |
+
|
| 227 |
+
@classmethod
|
| 228 |
+
@doc(ExtensionArray._concat_same_type)
|
| 229 |
+
def _concat_same_type(
|
| 230 |
+
cls,
|
| 231 |
+
to_concat: Sequence[Self],
|
| 232 |
+
axis: AxisInt = 0,
|
| 233 |
+
) -> Self:
|
| 234 |
+
if not lib.dtypes_all_equal([x.dtype for x in to_concat]):
|
| 235 |
+
dtypes = {str(x.dtype) for x in to_concat}
|
| 236 |
+
raise ValueError("to_concat must have the same dtype", dtypes)
|
| 237 |
+
|
| 238 |
+
return super()._concat_same_type(to_concat, axis=axis)
|
| 239 |
+
|
| 240 |
+
@doc(ExtensionArray.searchsorted)
|
| 241 |
+
def searchsorted(
|
| 242 |
+
self,
|
| 243 |
+
value: NumpyValueArrayLike | ExtensionArray,
|
| 244 |
+
side: Literal["left", "right"] = "left",
|
| 245 |
+
sorter: NumpySorter | None = None,
|
| 246 |
+
) -> npt.NDArray[np.intp] | np.intp:
|
| 247 |
+
npvalue = self._validate_setitem_value(value)
|
| 248 |
+
return self._ndarray.searchsorted(npvalue, side=side, sorter=sorter)
|
| 249 |
+
|
| 250 |
+
@doc(ExtensionArray.shift)
|
| 251 |
+
def shift(self, periods: int = 1, fill_value=None):
|
| 252 |
+
# NB: shift is always along axis=0
|
| 253 |
+
axis = 0
|
| 254 |
+
fill_value = self._validate_scalar(fill_value)
|
| 255 |
+
new_values = shift(self._ndarray, periods, axis, fill_value)
|
| 256 |
+
|
| 257 |
+
return self._from_backing_data(new_values)
|
| 258 |
+
|
| 259 |
+
def __setitem__(self, key, value) -> None:
|
| 260 |
+
key = check_array_indexer(self, key)
|
| 261 |
+
value = self._validate_setitem_value(value)
|
| 262 |
+
self._ndarray[key] = value
|
| 263 |
+
|
| 264 |
+
def _validate_setitem_value(self, value):
|
| 265 |
+
return value
|
| 266 |
+
|
| 267 |
+
@overload
|
| 268 |
+
def __getitem__(self, key: ScalarIndexer) -> Any:
|
| 269 |
+
...
|
| 270 |
+
|
| 271 |
+
@overload
|
| 272 |
+
def __getitem__(
|
| 273 |
+
self,
|
| 274 |
+
key: SequenceIndexer | PositionalIndexerTuple,
|
| 275 |
+
) -> Self:
|
| 276 |
+
...
|
| 277 |
+
|
| 278 |
+
def __getitem__(
|
| 279 |
+
self,
|
| 280 |
+
key: PositionalIndexer2D,
|
| 281 |
+
) -> Self | Any:
|
| 282 |
+
if lib.is_integer(key):
|
| 283 |
+
# fast-path
|
| 284 |
+
result = self._ndarray[key]
|
| 285 |
+
if self.ndim == 1:
|
| 286 |
+
return self._box_func(result)
|
| 287 |
+
return self._from_backing_data(result)
|
| 288 |
+
|
| 289 |
+
# error: Incompatible types in assignment (expression has type "ExtensionArray",
|
| 290 |
+
# variable has type "Union[int, slice, ndarray]")
|
| 291 |
+
key = extract_array(key, extract_numpy=True) # type: ignore[assignment]
|
| 292 |
+
key = check_array_indexer(self, key)
|
| 293 |
+
result = self._ndarray[key]
|
| 294 |
+
if lib.is_scalar(result):
|
| 295 |
+
return self._box_func(result)
|
| 296 |
+
|
| 297 |
+
result = self._from_backing_data(result)
|
| 298 |
+
return result
|
| 299 |
+
|
| 300 |
+
def _fill_mask_inplace(
|
| 301 |
+
self, method: str, limit: int | None, mask: npt.NDArray[np.bool_]
|
| 302 |
+
) -> None:
|
| 303 |
+
# (for now) when self.ndim == 2, we assume axis=0
|
| 304 |
+
func = missing.get_fill_func(method, ndim=self.ndim)
|
| 305 |
+
func(self._ndarray.T, limit=limit, mask=mask.T)
|
| 306 |
+
|
| 307 |
+
def _pad_or_backfill(
|
| 308 |
+
self,
|
| 309 |
+
*,
|
| 310 |
+
method: FillnaOptions,
|
| 311 |
+
limit: int | None = None,
|
| 312 |
+
limit_area: Literal["inside", "outside"] | None = None,
|
| 313 |
+
copy: bool = True,
|
| 314 |
+
) -> Self:
|
| 315 |
+
mask = self.isna()
|
| 316 |
+
if mask.any():
|
| 317 |
+
# (for now) when self.ndim == 2, we assume axis=0
|
| 318 |
+
func = missing.get_fill_func(method, ndim=self.ndim)
|
| 319 |
+
|
| 320 |
+
npvalues = self._ndarray.T
|
| 321 |
+
if copy:
|
| 322 |
+
npvalues = npvalues.copy()
|
| 323 |
+
func(npvalues, limit=limit, limit_area=limit_area, mask=mask.T)
|
| 324 |
+
npvalues = npvalues.T
|
| 325 |
+
|
| 326 |
+
if copy:
|
| 327 |
+
new_values = self._from_backing_data(npvalues)
|
| 328 |
+
else:
|
| 329 |
+
new_values = self
|
| 330 |
+
|
| 331 |
+
else:
|
| 332 |
+
if copy:
|
| 333 |
+
new_values = self.copy()
|
| 334 |
+
else:
|
| 335 |
+
new_values = self
|
| 336 |
+
return new_values
|
| 337 |
+
|
| 338 |
+
@doc(ExtensionArray.fillna)
|
| 339 |
+
def fillna(
|
| 340 |
+
self, value=None, method=None, limit: int | None = None, copy: bool = True
|
| 341 |
+
) -> Self:
|
| 342 |
+
value, method = validate_fillna_kwargs(
|
| 343 |
+
value, method, validate_scalar_dict_value=False
|
| 344 |
+
)
|
| 345 |
+
|
| 346 |
+
mask = self.isna()
|
| 347 |
+
# error: Argument 2 to "check_value_size" has incompatible type
|
| 348 |
+
# "ExtensionArray"; expected "ndarray"
|
| 349 |
+
value = missing.check_value_size(
|
| 350 |
+
value, mask, len(self) # type: ignore[arg-type]
|
| 351 |
+
)
|
| 352 |
+
|
| 353 |
+
if mask.any():
|
| 354 |
+
if method is not None:
|
| 355 |
+
# (for now) when self.ndim == 2, we assume axis=0
|
| 356 |
+
func = missing.get_fill_func(method, ndim=self.ndim)
|
| 357 |
+
npvalues = self._ndarray.T
|
| 358 |
+
if copy:
|
| 359 |
+
npvalues = npvalues.copy()
|
| 360 |
+
func(npvalues, limit=limit, mask=mask.T)
|
| 361 |
+
npvalues = npvalues.T
|
| 362 |
+
|
| 363 |
+
# TODO: NumpyExtensionArray didn't used to copy, need tests
|
| 364 |
+
# for this
|
| 365 |
+
new_values = self._from_backing_data(npvalues)
|
| 366 |
+
else:
|
| 367 |
+
# fill with value
|
| 368 |
+
if copy:
|
| 369 |
+
new_values = self.copy()
|
| 370 |
+
else:
|
| 371 |
+
new_values = self[:]
|
| 372 |
+
new_values[mask] = value
|
| 373 |
+
else:
|
| 374 |
+
# We validate the fill_value even if there is nothing to fill
|
| 375 |
+
if value is not None:
|
| 376 |
+
self._validate_setitem_value(value)
|
| 377 |
+
|
| 378 |
+
if not copy:
|
| 379 |
+
new_values = self[:]
|
| 380 |
+
else:
|
| 381 |
+
new_values = self.copy()
|
| 382 |
+
return new_values
|
| 383 |
+
|
| 384 |
+
# ------------------------------------------------------------------------
|
| 385 |
+
# Reductions
|
| 386 |
+
|
| 387 |
+
def _wrap_reduction_result(self, axis: AxisInt | None, result):
|
| 388 |
+
if axis is None or self.ndim == 1:
|
| 389 |
+
return self._box_func(result)
|
| 390 |
+
return self._from_backing_data(result)
|
| 391 |
+
|
| 392 |
+
# ------------------------------------------------------------------------
|
| 393 |
+
# __array_function__ methods
|
| 394 |
+
|
| 395 |
+
def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None:
|
| 396 |
+
"""
|
| 397 |
+
Analogue to np.putmask(self, mask, value)
|
| 398 |
+
|
| 399 |
+
Parameters
|
| 400 |
+
----------
|
| 401 |
+
mask : np.ndarray[bool]
|
| 402 |
+
value : scalar or listlike
|
| 403 |
+
|
| 404 |
+
Raises
|
| 405 |
+
------
|
| 406 |
+
TypeError
|
| 407 |
+
If value cannot be cast to self.dtype.
|
| 408 |
+
"""
|
| 409 |
+
value = self._validate_setitem_value(value)
|
| 410 |
+
|
| 411 |
+
np.putmask(self._ndarray, mask, value)
|
| 412 |
+
|
| 413 |
+
def _where(self: Self, mask: npt.NDArray[np.bool_], value) -> Self:
|
| 414 |
+
"""
|
| 415 |
+
Analogue to np.where(mask, self, value)
|
| 416 |
+
|
| 417 |
+
Parameters
|
| 418 |
+
----------
|
| 419 |
+
mask : np.ndarray[bool]
|
| 420 |
+
value : scalar or listlike
|
| 421 |
+
|
| 422 |
+
Raises
|
| 423 |
+
------
|
| 424 |
+
TypeError
|
| 425 |
+
If value cannot be cast to self.dtype.
|
| 426 |
+
"""
|
| 427 |
+
value = self._validate_setitem_value(value)
|
| 428 |
+
|
| 429 |
+
res_values = np.where(mask, self._ndarray, value)
|
| 430 |
+
if res_values.dtype != self._ndarray.dtype:
|
| 431 |
+
raise AssertionError(
|
| 432 |
+
# GH#56410
|
| 433 |
+
"Something has gone wrong, please report a bug at "
|
| 434 |
+
"github.com/pandas-dev/pandas/"
|
| 435 |
+
)
|
| 436 |
+
return self._from_backing_data(res_values)
|
| 437 |
+
|
| 438 |
+
# ------------------------------------------------------------------------
|
| 439 |
+
# Index compat methods
|
| 440 |
+
|
| 441 |
+
def insert(self, loc: int, item) -> Self:
|
| 442 |
+
"""
|
| 443 |
+
Make new ExtensionArray inserting new item at location. Follows
|
| 444 |
+
Python list.append semantics for negative values.
|
| 445 |
+
|
| 446 |
+
Parameters
|
| 447 |
+
----------
|
| 448 |
+
loc : int
|
| 449 |
+
item : object
|
| 450 |
+
|
| 451 |
+
Returns
|
| 452 |
+
-------
|
| 453 |
+
type(self)
|
| 454 |
+
"""
|
| 455 |
+
loc = validate_insert_loc(loc, len(self))
|
| 456 |
+
|
| 457 |
+
code = self._validate_scalar(item)
|
| 458 |
+
|
| 459 |
+
new_vals = np.concatenate(
|
| 460 |
+
(
|
| 461 |
+
self._ndarray[:loc],
|
| 462 |
+
np.asarray([code], dtype=self._ndarray.dtype),
|
| 463 |
+
self._ndarray[loc:],
|
| 464 |
+
)
|
| 465 |
+
)
|
| 466 |
+
return self._from_backing_data(new_vals)
|
| 467 |
+
|
| 468 |
+
# ------------------------------------------------------------------------
|
| 469 |
+
# Additional array methods
|
| 470 |
+
# These are not part of the EA API, but we implement them because
|
| 471 |
+
# pandas assumes they're there.
|
| 472 |
+
|
| 473 |
+
def value_counts(self, dropna: bool = True) -> Series:
|
| 474 |
+
"""
|
| 475 |
+
Return a Series containing counts of unique values.
|
| 476 |
+
|
| 477 |
+
Parameters
|
| 478 |
+
----------
|
| 479 |
+
dropna : bool, default True
|
| 480 |
+
Don't include counts of NA values.
|
| 481 |
+
|
| 482 |
+
Returns
|
| 483 |
+
-------
|
| 484 |
+
Series
|
| 485 |
+
"""
|
| 486 |
+
if self.ndim != 1:
|
| 487 |
+
raise NotImplementedError
|
| 488 |
+
|
| 489 |
+
from pandas import (
|
| 490 |
+
Index,
|
| 491 |
+
Series,
|
| 492 |
+
)
|
| 493 |
+
|
| 494 |
+
if dropna:
|
| 495 |
+
# error: Unsupported operand type for ~ ("ExtensionArray")
|
| 496 |
+
values = self[~self.isna()]._ndarray # type: ignore[operator]
|
| 497 |
+
else:
|
| 498 |
+
values = self._ndarray
|
| 499 |
+
|
| 500 |
+
result = value_counts(values, sort=False, dropna=dropna)
|
| 501 |
+
|
| 502 |
+
index_arr = self._from_backing_data(np.asarray(result.index._data))
|
| 503 |
+
index = Index(index_arr, name=result.index.name)
|
| 504 |
+
return Series(result._values, index=index, name=result.name, copy=False)
|
| 505 |
+
|
| 506 |
+
def _quantile(
|
| 507 |
+
self,
|
| 508 |
+
qs: npt.NDArray[np.float64],
|
| 509 |
+
interpolation: str,
|
| 510 |
+
) -> Self:
|
| 511 |
+
# TODO: disable for Categorical if not ordered?
|
| 512 |
+
|
| 513 |
+
mask = np.asarray(self.isna())
|
| 514 |
+
arr = self._ndarray
|
| 515 |
+
fill_value = self._internal_fill_value
|
| 516 |
+
|
| 517 |
+
res_values = quantile_with_mask(arr, mask, fill_value, qs, interpolation)
|
| 518 |
+
|
| 519 |
+
res_values = self._cast_quantile_result(res_values)
|
| 520 |
+
return self._from_backing_data(res_values)
|
| 521 |
+
|
| 522 |
+
# TODO: see if we can share this with other dispatch-wrapping methods
|
| 523 |
+
def _cast_quantile_result(self, res_values: np.ndarray) -> np.ndarray:
|
| 524 |
+
"""
|
| 525 |
+
Cast the result of quantile_with_mask to an appropriate dtype
|
| 526 |
+
to pass to _from_backing_data in _quantile.
|
| 527 |
+
"""
|
| 528 |
+
return res_values
|
| 529 |
+
|
| 530 |
+
# ------------------------------------------------------------------------
|
| 531 |
+
# numpy-like methods
|
| 532 |
+
|
| 533 |
+
@classmethod
|
| 534 |
+
def _empty(cls, shape: Shape, dtype: ExtensionDtype) -> Self:
|
| 535 |
+
"""
|
| 536 |
+
Analogous to np.empty(shape, dtype=dtype)
|
| 537 |
+
|
| 538 |
+
Parameters
|
| 539 |
+
----------
|
| 540 |
+
shape : tuple[int]
|
| 541 |
+
dtype : ExtensionDtype
|
| 542 |
+
"""
|
| 543 |
+
# The base implementation uses a naive approach to find the dtype
|
| 544 |
+
# for the backing ndarray
|
| 545 |
+
arr = cls._from_sequence([], dtype=dtype)
|
| 546 |
+
backing = np.empty(shape, dtype=arr._ndarray.dtype)
|
| 547 |
+
return arr._from_backing_data(backing)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/arrays/_ranges.py
ADDED
|
@@ -0,0 +1,207 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Helper functions to generate range-like data for DatetimeArray
|
| 3 |
+
(and possibly TimedeltaArray/PeriodArray)
|
| 4 |
+
"""
|
| 5 |
+
from __future__ import annotations
|
| 6 |
+
|
| 7 |
+
from typing import TYPE_CHECKING
|
| 8 |
+
|
| 9 |
+
import numpy as np
|
| 10 |
+
|
| 11 |
+
from pandas._libs.lib import i8max
|
| 12 |
+
from pandas._libs.tslibs import (
|
| 13 |
+
BaseOffset,
|
| 14 |
+
OutOfBoundsDatetime,
|
| 15 |
+
Timedelta,
|
| 16 |
+
Timestamp,
|
| 17 |
+
iNaT,
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
if TYPE_CHECKING:
|
| 21 |
+
from pandas._typing import npt
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def generate_regular_range(
|
| 25 |
+
start: Timestamp | Timedelta | None,
|
| 26 |
+
end: Timestamp | Timedelta | None,
|
| 27 |
+
periods: int | None,
|
| 28 |
+
freq: BaseOffset,
|
| 29 |
+
unit: str = "ns",
|
| 30 |
+
) -> npt.NDArray[np.intp]:
|
| 31 |
+
"""
|
| 32 |
+
Generate a range of dates or timestamps with the spans between dates
|
| 33 |
+
described by the given `freq` DateOffset.
|
| 34 |
+
|
| 35 |
+
Parameters
|
| 36 |
+
----------
|
| 37 |
+
start : Timedelta, Timestamp or None
|
| 38 |
+
First point of produced date range.
|
| 39 |
+
end : Timedelta, Timestamp or None
|
| 40 |
+
Last point of produced date range.
|
| 41 |
+
periods : int or None
|
| 42 |
+
Number of periods in produced date range.
|
| 43 |
+
freq : Tick
|
| 44 |
+
Describes space between dates in produced date range.
|
| 45 |
+
unit : str, default "ns"
|
| 46 |
+
The resolution the output is meant to represent.
|
| 47 |
+
|
| 48 |
+
Returns
|
| 49 |
+
-------
|
| 50 |
+
ndarray[np.int64]
|
| 51 |
+
Representing the given resolution.
|
| 52 |
+
"""
|
| 53 |
+
istart = start._value if start is not None else None
|
| 54 |
+
iend = end._value if end is not None else None
|
| 55 |
+
freq.nanos # raises if non-fixed frequency
|
| 56 |
+
td = Timedelta(freq)
|
| 57 |
+
b: int
|
| 58 |
+
e: int
|
| 59 |
+
try:
|
| 60 |
+
td = td.as_unit(unit, round_ok=False)
|
| 61 |
+
except ValueError as err:
|
| 62 |
+
raise ValueError(
|
| 63 |
+
f"freq={freq} is incompatible with unit={unit}. "
|
| 64 |
+
"Use a lower freq or a higher unit instead."
|
| 65 |
+
) from err
|
| 66 |
+
stride = int(td._value)
|
| 67 |
+
|
| 68 |
+
if periods is None and istart is not None and iend is not None:
|
| 69 |
+
b = istart
|
| 70 |
+
# cannot just use e = Timestamp(end) + 1 because arange breaks when
|
| 71 |
+
# stride is too large, see GH10887
|
| 72 |
+
e = b + (iend - b) // stride * stride + stride // 2 + 1
|
| 73 |
+
elif istart is not None and periods is not None:
|
| 74 |
+
b = istart
|
| 75 |
+
e = _generate_range_overflow_safe(b, periods, stride, side="start")
|
| 76 |
+
elif iend is not None and periods is not None:
|
| 77 |
+
e = iend + stride
|
| 78 |
+
b = _generate_range_overflow_safe(e, periods, stride, side="end")
|
| 79 |
+
else:
|
| 80 |
+
raise ValueError(
|
| 81 |
+
"at least 'start' or 'end' should be specified if a 'period' is given."
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
with np.errstate(over="raise"):
|
| 85 |
+
# If the range is sufficiently large, np.arange may overflow
|
| 86 |
+
# and incorrectly return an empty array if not caught.
|
| 87 |
+
try:
|
| 88 |
+
values = np.arange(b, e, stride, dtype=np.int64)
|
| 89 |
+
except FloatingPointError:
|
| 90 |
+
xdr = [b]
|
| 91 |
+
while xdr[-1] != e:
|
| 92 |
+
xdr.append(xdr[-1] + stride)
|
| 93 |
+
values = np.array(xdr[:-1], dtype=np.int64)
|
| 94 |
+
return values
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def _generate_range_overflow_safe(
|
| 98 |
+
endpoint: int, periods: int, stride: int, side: str = "start"
|
| 99 |
+
) -> int:
|
| 100 |
+
"""
|
| 101 |
+
Calculate the second endpoint for passing to np.arange, checking
|
| 102 |
+
to avoid an integer overflow. Catch OverflowError and re-raise
|
| 103 |
+
as OutOfBoundsDatetime.
|
| 104 |
+
|
| 105 |
+
Parameters
|
| 106 |
+
----------
|
| 107 |
+
endpoint : int
|
| 108 |
+
nanosecond timestamp of the known endpoint of the desired range
|
| 109 |
+
periods : int
|
| 110 |
+
number of periods in the desired range
|
| 111 |
+
stride : int
|
| 112 |
+
nanoseconds between periods in the desired range
|
| 113 |
+
side : {'start', 'end'}
|
| 114 |
+
which end of the range `endpoint` refers to
|
| 115 |
+
|
| 116 |
+
Returns
|
| 117 |
+
-------
|
| 118 |
+
other_end : int
|
| 119 |
+
|
| 120 |
+
Raises
|
| 121 |
+
------
|
| 122 |
+
OutOfBoundsDatetime
|
| 123 |
+
"""
|
| 124 |
+
# GH#14187 raise instead of incorrectly wrapping around
|
| 125 |
+
assert side in ["start", "end"]
|
| 126 |
+
|
| 127 |
+
i64max = np.uint64(i8max)
|
| 128 |
+
msg = f"Cannot generate range with {side}={endpoint} and periods={periods}"
|
| 129 |
+
|
| 130 |
+
with np.errstate(over="raise"):
|
| 131 |
+
# if periods * strides cannot be multiplied within the *uint64* bounds,
|
| 132 |
+
# we cannot salvage the operation by recursing, so raise
|
| 133 |
+
try:
|
| 134 |
+
addend = np.uint64(periods) * np.uint64(np.abs(stride))
|
| 135 |
+
except FloatingPointError as err:
|
| 136 |
+
raise OutOfBoundsDatetime(msg) from err
|
| 137 |
+
|
| 138 |
+
if np.abs(addend) <= i64max:
|
| 139 |
+
# relatively easy case without casting concerns
|
| 140 |
+
return _generate_range_overflow_safe_signed(endpoint, periods, stride, side)
|
| 141 |
+
|
| 142 |
+
elif (endpoint > 0 and side == "start" and stride > 0) or (
|
| 143 |
+
endpoint < 0 < stride and side == "end"
|
| 144 |
+
):
|
| 145 |
+
# no chance of not-overflowing
|
| 146 |
+
raise OutOfBoundsDatetime(msg)
|
| 147 |
+
|
| 148 |
+
elif side == "end" and endpoint - stride <= i64max < endpoint:
|
| 149 |
+
# in _generate_regular_range we added `stride` thereby overflowing
|
| 150 |
+
# the bounds. Adjust to fix this.
|
| 151 |
+
return _generate_range_overflow_safe(
|
| 152 |
+
endpoint - stride, periods - 1, stride, side
|
| 153 |
+
)
|
| 154 |
+
|
| 155 |
+
# split into smaller pieces
|
| 156 |
+
mid_periods = periods // 2
|
| 157 |
+
remaining = periods - mid_periods
|
| 158 |
+
assert 0 < remaining < periods, (remaining, periods, endpoint, stride)
|
| 159 |
+
|
| 160 |
+
midpoint = int(_generate_range_overflow_safe(endpoint, mid_periods, stride, side))
|
| 161 |
+
return _generate_range_overflow_safe(midpoint, remaining, stride, side)
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
def _generate_range_overflow_safe_signed(
|
| 165 |
+
endpoint: int, periods: int, stride: int, side: str
|
| 166 |
+
) -> int:
|
| 167 |
+
"""
|
| 168 |
+
A special case for _generate_range_overflow_safe where `periods * stride`
|
| 169 |
+
can be calculated without overflowing int64 bounds.
|
| 170 |
+
"""
|
| 171 |
+
assert side in ["start", "end"]
|
| 172 |
+
if side == "end":
|
| 173 |
+
stride *= -1
|
| 174 |
+
|
| 175 |
+
with np.errstate(over="raise"):
|
| 176 |
+
addend = np.int64(periods) * np.int64(stride)
|
| 177 |
+
try:
|
| 178 |
+
# easy case with no overflows
|
| 179 |
+
result = np.int64(endpoint) + addend
|
| 180 |
+
if result == iNaT:
|
| 181 |
+
# Putting this into a DatetimeArray/TimedeltaArray
|
| 182 |
+
# would incorrectly be interpreted as NaT
|
| 183 |
+
raise OverflowError
|
| 184 |
+
return int(result)
|
| 185 |
+
except (FloatingPointError, OverflowError):
|
| 186 |
+
# with endpoint negative and addend positive we risk
|
| 187 |
+
# FloatingPointError; with reversed signed we risk OverflowError
|
| 188 |
+
pass
|
| 189 |
+
|
| 190 |
+
# if stride and endpoint had opposite signs, then endpoint + addend
|
| 191 |
+
# should never overflow. so they must have the same signs
|
| 192 |
+
assert (stride > 0 and endpoint >= 0) or (stride < 0 and endpoint <= 0)
|
| 193 |
+
|
| 194 |
+
if stride > 0:
|
| 195 |
+
# watch out for very special case in which we just slightly
|
| 196 |
+
# exceed implementation bounds, but when passing the result to
|
| 197 |
+
# np.arange will get a result slightly within the bounds
|
| 198 |
+
|
| 199 |
+
uresult = np.uint64(endpoint) + np.uint64(addend)
|
| 200 |
+
i64max = np.uint64(i8max)
|
| 201 |
+
assert uresult > i64max
|
| 202 |
+
if uresult <= i64max + np.uint64(stride):
|
| 203 |
+
return int(uresult)
|
| 204 |
+
|
| 205 |
+
raise OutOfBoundsDatetime(
|
| 206 |
+
f"Cannot generate range with {side}={endpoint} and periods={periods}"
|
| 207 |
+
)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/arrays/base.py
ADDED
|
@@ -0,0 +1,2588 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
An interface for extending pandas with custom arrays.
|
| 3 |
+
|
| 4 |
+
.. warning::
|
| 5 |
+
|
| 6 |
+
This is an experimental API and subject to breaking changes
|
| 7 |
+
without warning.
|
| 8 |
+
"""
|
| 9 |
+
from __future__ import annotations
|
| 10 |
+
|
| 11 |
+
import operator
|
| 12 |
+
from typing import (
|
| 13 |
+
TYPE_CHECKING,
|
| 14 |
+
Any,
|
| 15 |
+
Callable,
|
| 16 |
+
ClassVar,
|
| 17 |
+
Literal,
|
| 18 |
+
cast,
|
| 19 |
+
overload,
|
| 20 |
+
)
|
| 21 |
+
import warnings
|
| 22 |
+
|
| 23 |
+
import numpy as np
|
| 24 |
+
|
| 25 |
+
from pandas._libs import (
|
| 26 |
+
algos as libalgos,
|
| 27 |
+
lib,
|
| 28 |
+
)
|
| 29 |
+
from pandas.compat import set_function_name
|
| 30 |
+
from pandas.compat.numpy import function as nv
|
| 31 |
+
from pandas.errors import AbstractMethodError
|
| 32 |
+
from pandas.util._decorators import (
|
| 33 |
+
Appender,
|
| 34 |
+
Substitution,
|
| 35 |
+
cache_readonly,
|
| 36 |
+
)
|
| 37 |
+
from pandas.util._exceptions import find_stack_level
|
| 38 |
+
from pandas.util._validators import (
|
| 39 |
+
validate_bool_kwarg,
|
| 40 |
+
validate_fillna_kwargs,
|
| 41 |
+
validate_insert_loc,
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
from pandas.core.dtypes.cast import maybe_cast_pointwise_result
|
| 45 |
+
from pandas.core.dtypes.common import (
|
| 46 |
+
is_list_like,
|
| 47 |
+
is_scalar,
|
| 48 |
+
pandas_dtype,
|
| 49 |
+
)
|
| 50 |
+
from pandas.core.dtypes.dtypes import ExtensionDtype
|
| 51 |
+
from pandas.core.dtypes.generic import (
|
| 52 |
+
ABCDataFrame,
|
| 53 |
+
ABCIndex,
|
| 54 |
+
ABCSeries,
|
| 55 |
+
)
|
| 56 |
+
from pandas.core.dtypes.missing import isna
|
| 57 |
+
|
| 58 |
+
from pandas.core import (
|
| 59 |
+
arraylike,
|
| 60 |
+
missing,
|
| 61 |
+
roperator,
|
| 62 |
+
)
|
| 63 |
+
from pandas.core.algorithms import (
|
| 64 |
+
duplicated,
|
| 65 |
+
factorize_array,
|
| 66 |
+
isin,
|
| 67 |
+
map_array,
|
| 68 |
+
mode,
|
| 69 |
+
rank,
|
| 70 |
+
unique,
|
| 71 |
+
)
|
| 72 |
+
from pandas.core.array_algos.quantile import quantile_with_mask
|
| 73 |
+
from pandas.core.missing import _fill_limit_area_1d
|
| 74 |
+
from pandas.core.sorting import (
|
| 75 |
+
nargminmax,
|
| 76 |
+
nargsort,
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
if TYPE_CHECKING:
|
| 80 |
+
from collections.abc import (
|
| 81 |
+
Iterator,
|
| 82 |
+
Sequence,
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
from pandas._typing import (
|
| 86 |
+
ArrayLike,
|
| 87 |
+
AstypeArg,
|
| 88 |
+
AxisInt,
|
| 89 |
+
Dtype,
|
| 90 |
+
DtypeObj,
|
| 91 |
+
FillnaOptions,
|
| 92 |
+
InterpolateOptions,
|
| 93 |
+
NumpySorter,
|
| 94 |
+
NumpyValueArrayLike,
|
| 95 |
+
PositionalIndexer,
|
| 96 |
+
ScalarIndexer,
|
| 97 |
+
Self,
|
| 98 |
+
SequenceIndexer,
|
| 99 |
+
Shape,
|
| 100 |
+
SortKind,
|
| 101 |
+
TakeIndexer,
|
| 102 |
+
npt,
|
| 103 |
+
)
|
| 104 |
+
|
| 105 |
+
from pandas import Index
|
| 106 |
+
|
| 107 |
+
_extension_array_shared_docs: dict[str, str] = {}
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
class ExtensionArray:
|
| 111 |
+
"""
|
| 112 |
+
Abstract base class for custom 1-D array types.
|
| 113 |
+
|
| 114 |
+
pandas will recognize instances of this class as proper arrays
|
| 115 |
+
with a custom type and will not attempt to coerce them to objects. They
|
| 116 |
+
may be stored directly inside a :class:`DataFrame` or :class:`Series`.
|
| 117 |
+
|
| 118 |
+
Attributes
|
| 119 |
+
----------
|
| 120 |
+
dtype
|
| 121 |
+
nbytes
|
| 122 |
+
ndim
|
| 123 |
+
shape
|
| 124 |
+
|
| 125 |
+
Methods
|
| 126 |
+
-------
|
| 127 |
+
argsort
|
| 128 |
+
astype
|
| 129 |
+
copy
|
| 130 |
+
dropna
|
| 131 |
+
duplicated
|
| 132 |
+
factorize
|
| 133 |
+
fillna
|
| 134 |
+
equals
|
| 135 |
+
insert
|
| 136 |
+
interpolate
|
| 137 |
+
isin
|
| 138 |
+
isna
|
| 139 |
+
ravel
|
| 140 |
+
repeat
|
| 141 |
+
searchsorted
|
| 142 |
+
shift
|
| 143 |
+
take
|
| 144 |
+
tolist
|
| 145 |
+
unique
|
| 146 |
+
view
|
| 147 |
+
_accumulate
|
| 148 |
+
_concat_same_type
|
| 149 |
+
_explode
|
| 150 |
+
_formatter
|
| 151 |
+
_from_factorized
|
| 152 |
+
_from_sequence
|
| 153 |
+
_from_sequence_of_strings
|
| 154 |
+
_hash_pandas_object
|
| 155 |
+
_pad_or_backfill
|
| 156 |
+
_reduce
|
| 157 |
+
_values_for_argsort
|
| 158 |
+
_values_for_factorize
|
| 159 |
+
|
| 160 |
+
Notes
|
| 161 |
+
-----
|
| 162 |
+
The interface includes the following abstract methods that must be
|
| 163 |
+
implemented by subclasses:
|
| 164 |
+
|
| 165 |
+
* _from_sequence
|
| 166 |
+
* _from_factorized
|
| 167 |
+
* __getitem__
|
| 168 |
+
* __len__
|
| 169 |
+
* __eq__
|
| 170 |
+
* dtype
|
| 171 |
+
* nbytes
|
| 172 |
+
* isna
|
| 173 |
+
* take
|
| 174 |
+
* copy
|
| 175 |
+
* _concat_same_type
|
| 176 |
+
* interpolate
|
| 177 |
+
|
| 178 |
+
A default repr displaying the type, (truncated) data, length,
|
| 179 |
+
and dtype is provided. It can be customized or replaced by
|
| 180 |
+
by overriding:
|
| 181 |
+
|
| 182 |
+
* __repr__ : A default repr for the ExtensionArray.
|
| 183 |
+
* _formatter : Print scalars inside a Series or DataFrame.
|
| 184 |
+
|
| 185 |
+
Some methods require casting the ExtensionArray to an ndarray of Python
|
| 186 |
+
objects with ``self.astype(object)``, which may be expensive. When
|
| 187 |
+
performance is a concern, we highly recommend overriding the following
|
| 188 |
+
methods:
|
| 189 |
+
|
| 190 |
+
* fillna
|
| 191 |
+
* _pad_or_backfill
|
| 192 |
+
* dropna
|
| 193 |
+
* unique
|
| 194 |
+
* factorize / _values_for_factorize
|
| 195 |
+
* argsort, argmax, argmin / _values_for_argsort
|
| 196 |
+
* searchsorted
|
| 197 |
+
* map
|
| 198 |
+
|
| 199 |
+
The remaining methods implemented on this class should be performant,
|
| 200 |
+
as they only compose abstract methods. Still, a more efficient
|
| 201 |
+
implementation may be available, and these methods can be overridden.
|
| 202 |
+
|
| 203 |
+
One can implement methods to handle array accumulations or reductions.
|
| 204 |
+
|
| 205 |
+
* _accumulate
|
| 206 |
+
* _reduce
|
| 207 |
+
|
| 208 |
+
One can implement methods to handle parsing from strings that will be used
|
| 209 |
+
in methods such as ``pandas.io.parsers.read_csv``.
|
| 210 |
+
|
| 211 |
+
* _from_sequence_of_strings
|
| 212 |
+
|
| 213 |
+
This class does not inherit from 'abc.ABCMeta' for performance reasons.
|
| 214 |
+
Methods and properties required by the interface raise
|
| 215 |
+
``pandas.errors.AbstractMethodError`` and no ``register`` method is
|
| 216 |
+
provided for registering virtual subclasses.
|
| 217 |
+
|
| 218 |
+
ExtensionArrays are limited to 1 dimension.
|
| 219 |
+
|
| 220 |
+
They may be backed by none, one, or many NumPy arrays. For example,
|
| 221 |
+
``pandas.Categorical`` is an extension array backed by two arrays,
|
| 222 |
+
one for codes and one for categories. An array of IPv6 address may
|
| 223 |
+
be backed by a NumPy structured array with two fields, one for the
|
| 224 |
+
lower 64 bits and one for the upper 64 bits. Or they may be backed
|
| 225 |
+
by some other storage type, like Python lists. Pandas makes no
|
| 226 |
+
assumptions on how the data are stored, just that it can be converted
|
| 227 |
+
to a NumPy array.
|
| 228 |
+
The ExtensionArray interface does not impose any rules on how this data
|
| 229 |
+
is stored. However, currently, the backing data cannot be stored in
|
| 230 |
+
attributes called ``.values`` or ``._values`` to ensure full compatibility
|
| 231 |
+
with pandas internals. But other names as ``.data``, ``._data``,
|
| 232 |
+
``._items``, ... can be freely used.
|
| 233 |
+
|
| 234 |
+
If implementing NumPy's ``__array_ufunc__`` interface, pandas expects
|
| 235 |
+
that
|
| 236 |
+
|
| 237 |
+
1. You defer by returning ``NotImplemented`` when any Series are present
|
| 238 |
+
in `inputs`. Pandas will extract the arrays and call the ufunc again.
|
| 239 |
+
2. You define a ``_HANDLED_TYPES`` tuple as an attribute on the class.
|
| 240 |
+
Pandas inspect this to determine whether the ufunc is valid for the
|
| 241 |
+
types present.
|
| 242 |
+
|
| 243 |
+
See :ref:`extending.extension.ufunc` for more.
|
| 244 |
+
|
| 245 |
+
By default, ExtensionArrays are not hashable. Immutable subclasses may
|
| 246 |
+
override this behavior.
|
| 247 |
+
|
| 248 |
+
Examples
|
| 249 |
+
--------
|
| 250 |
+
Please see the following:
|
| 251 |
+
|
| 252 |
+
https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/list/array.py
|
| 253 |
+
"""
|
| 254 |
+
|
| 255 |
+
# '_typ' is for pandas.core.dtypes.generic.ABCExtensionArray.
|
| 256 |
+
# Don't override this.
|
| 257 |
+
_typ = "extension"
|
| 258 |
+
|
| 259 |
+
# similar to __array_priority__, positions ExtensionArray after Index,
|
| 260 |
+
# Series, and DataFrame. EA subclasses may override to choose which EA
|
| 261 |
+
# subclass takes priority. If overriding, the value should always be
|
| 262 |
+
# strictly less than 2000 to be below Index.__pandas_priority__.
|
| 263 |
+
__pandas_priority__ = 1000
|
| 264 |
+
|
| 265 |
+
# ------------------------------------------------------------------------
|
| 266 |
+
# Constructors
|
| 267 |
+
# ------------------------------------------------------------------------
|
| 268 |
+
|
| 269 |
+
@classmethod
|
| 270 |
+
def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = False):
|
| 271 |
+
"""
|
| 272 |
+
Construct a new ExtensionArray from a sequence of scalars.
|
| 273 |
+
|
| 274 |
+
Parameters
|
| 275 |
+
----------
|
| 276 |
+
scalars : Sequence
|
| 277 |
+
Each element will be an instance of the scalar type for this
|
| 278 |
+
array, ``cls.dtype.type`` or be converted into this type in this method.
|
| 279 |
+
dtype : dtype, optional
|
| 280 |
+
Construct for this particular dtype. This should be a Dtype
|
| 281 |
+
compatible with the ExtensionArray.
|
| 282 |
+
copy : bool, default False
|
| 283 |
+
If True, copy the underlying data.
|
| 284 |
+
|
| 285 |
+
Returns
|
| 286 |
+
-------
|
| 287 |
+
ExtensionArray
|
| 288 |
+
|
| 289 |
+
Examples
|
| 290 |
+
--------
|
| 291 |
+
>>> pd.arrays.IntegerArray._from_sequence([4, 5])
|
| 292 |
+
<IntegerArray>
|
| 293 |
+
[4, 5]
|
| 294 |
+
Length: 2, dtype: Int64
|
| 295 |
+
"""
|
| 296 |
+
raise AbstractMethodError(cls)
|
| 297 |
+
|
| 298 |
+
@classmethod
|
| 299 |
+
def _from_scalars(cls, scalars, *, dtype: DtypeObj) -> Self:
|
| 300 |
+
"""
|
| 301 |
+
Strict analogue to _from_sequence, allowing only sequences of scalars
|
| 302 |
+
that should be specifically inferred to the given dtype.
|
| 303 |
+
|
| 304 |
+
Parameters
|
| 305 |
+
----------
|
| 306 |
+
scalars : sequence
|
| 307 |
+
dtype : ExtensionDtype
|
| 308 |
+
|
| 309 |
+
Raises
|
| 310 |
+
------
|
| 311 |
+
TypeError or ValueError
|
| 312 |
+
|
| 313 |
+
Notes
|
| 314 |
+
-----
|
| 315 |
+
This is called in a try/except block when casting the result of a
|
| 316 |
+
pointwise operation.
|
| 317 |
+
"""
|
| 318 |
+
try:
|
| 319 |
+
return cls._from_sequence(scalars, dtype=dtype, copy=False)
|
| 320 |
+
except (ValueError, TypeError):
|
| 321 |
+
raise
|
| 322 |
+
except Exception:
|
| 323 |
+
warnings.warn(
|
| 324 |
+
"_from_scalars should only raise ValueError or TypeError. "
|
| 325 |
+
"Consider overriding _from_scalars where appropriate.",
|
| 326 |
+
stacklevel=find_stack_level(),
|
| 327 |
+
)
|
| 328 |
+
raise
|
| 329 |
+
|
| 330 |
+
@classmethod
|
| 331 |
+
def _from_sequence_of_strings(
|
| 332 |
+
cls, strings, *, dtype: Dtype | None = None, copy: bool = False
|
| 333 |
+
):
|
| 334 |
+
"""
|
| 335 |
+
Construct a new ExtensionArray from a sequence of strings.
|
| 336 |
+
|
| 337 |
+
Parameters
|
| 338 |
+
----------
|
| 339 |
+
strings : Sequence
|
| 340 |
+
Each element will be an instance of the scalar type for this
|
| 341 |
+
array, ``cls.dtype.type``.
|
| 342 |
+
dtype : dtype, optional
|
| 343 |
+
Construct for this particular dtype. This should be a Dtype
|
| 344 |
+
compatible with the ExtensionArray.
|
| 345 |
+
copy : bool, default False
|
| 346 |
+
If True, copy the underlying data.
|
| 347 |
+
|
| 348 |
+
Returns
|
| 349 |
+
-------
|
| 350 |
+
ExtensionArray
|
| 351 |
+
|
| 352 |
+
Examples
|
| 353 |
+
--------
|
| 354 |
+
>>> pd.arrays.IntegerArray._from_sequence_of_strings(["1", "2", "3"])
|
| 355 |
+
<IntegerArray>
|
| 356 |
+
[1, 2, 3]
|
| 357 |
+
Length: 3, dtype: Int64
|
| 358 |
+
"""
|
| 359 |
+
raise AbstractMethodError(cls)
|
| 360 |
+
|
| 361 |
+
@classmethod
|
| 362 |
+
def _from_factorized(cls, values, original):
|
| 363 |
+
"""
|
| 364 |
+
Reconstruct an ExtensionArray after factorization.
|
| 365 |
+
|
| 366 |
+
Parameters
|
| 367 |
+
----------
|
| 368 |
+
values : ndarray
|
| 369 |
+
An integer ndarray with the factorized values.
|
| 370 |
+
original : ExtensionArray
|
| 371 |
+
The original ExtensionArray that factorize was called on.
|
| 372 |
+
|
| 373 |
+
See Also
|
| 374 |
+
--------
|
| 375 |
+
factorize : Top-level factorize method that dispatches here.
|
| 376 |
+
ExtensionArray.factorize : Encode the extension array as an enumerated type.
|
| 377 |
+
|
| 378 |
+
Examples
|
| 379 |
+
--------
|
| 380 |
+
>>> interv_arr = pd.arrays.IntervalArray([pd.Interval(0, 1),
|
| 381 |
+
... pd.Interval(1, 5), pd.Interval(1, 5)])
|
| 382 |
+
>>> codes, uniques = pd.factorize(interv_arr)
|
| 383 |
+
>>> pd.arrays.IntervalArray._from_factorized(uniques, interv_arr)
|
| 384 |
+
<IntervalArray>
|
| 385 |
+
[(0, 1], (1, 5]]
|
| 386 |
+
Length: 2, dtype: interval[int64, right]
|
| 387 |
+
"""
|
| 388 |
+
raise AbstractMethodError(cls)
|
| 389 |
+
|
| 390 |
+
# ------------------------------------------------------------------------
|
| 391 |
+
# Must be a Sequence
|
| 392 |
+
# ------------------------------------------------------------------------
|
| 393 |
+
@overload
|
| 394 |
+
def __getitem__(self, item: ScalarIndexer) -> Any:
|
| 395 |
+
...
|
| 396 |
+
|
| 397 |
+
@overload
|
| 398 |
+
def __getitem__(self, item: SequenceIndexer) -> Self:
|
| 399 |
+
...
|
| 400 |
+
|
| 401 |
+
def __getitem__(self, item: PositionalIndexer) -> Self | Any:
|
| 402 |
+
"""
|
| 403 |
+
Select a subset of self.
|
| 404 |
+
|
| 405 |
+
Parameters
|
| 406 |
+
----------
|
| 407 |
+
item : int, slice, or ndarray
|
| 408 |
+
* int: The position in 'self' to get.
|
| 409 |
+
|
| 410 |
+
* slice: A slice object, where 'start', 'stop', and 'step' are
|
| 411 |
+
integers or None
|
| 412 |
+
|
| 413 |
+
* ndarray: A 1-d boolean NumPy ndarray the same length as 'self'
|
| 414 |
+
|
| 415 |
+
* list[int]: A list of int
|
| 416 |
+
|
| 417 |
+
Returns
|
| 418 |
+
-------
|
| 419 |
+
item : scalar or ExtensionArray
|
| 420 |
+
|
| 421 |
+
Notes
|
| 422 |
+
-----
|
| 423 |
+
For scalar ``item``, return a scalar value suitable for the array's
|
| 424 |
+
type. This should be an instance of ``self.dtype.type``.
|
| 425 |
+
|
| 426 |
+
For slice ``key``, return an instance of ``ExtensionArray``, even
|
| 427 |
+
if the slice is length 0 or 1.
|
| 428 |
+
|
| 429 |
+
For a boolean mask, return an instance of ``ExtensionArray``, filtered
|
| 430 |
+
to the values where ``item`` is True.
|
| 431 |
+
"""
|
| 432 |
+
raise AbstractMethodError(self)
|
| 433 |
+
|
| 434 |
+
def __setitem__(self, key, value) -> None:
|
| 435 |
+
"""
|
| 436 |
+
Set one or more values inplace.
|
| 437 |
+
|
| 438 |
+
This method is not required to satisfy the pandas extension array
|
| 439 |
+
interface.
|
| 440 |
+
|
| 441 |
+
Parameters
|
| 442 |
+
----------
|
| 443 |
+
key : int, ndarray, or slice
|
| 444 |
+
When called from, e.g. ``Series.__setitem__``, ``key`` will be
|
| 445 |
+
one of
|
| 446 |
+
|
| 447 |
+
* scalar int
|
| 448 |
+
* ndarray of integers.
|
| 449 |
+
* boolean ndarray
|
| 450 |
+
* slice object
|
| 451 |
+
|
| 452 |
+
value : ExtensionDtype.type, Sequence[ExtensionDtype.type], or object
|
| 453 |
+
value or values to be set of ``key``.
|
| 454 |
+
|
| 455 |
+
Returns
|
| 456 |
+
-------
|
| 457 |
+
None
|
| 458 |
+
"""
|
| 459 |
+
# Some notes to the ExtensionArray implementer who may have ended up
|
| 460 |
+
# here. While this method is not required for the interface, if you
|
| 461 |
+
# *do* choose to implement __setitem__, then some semantics should be
|
| 462 |
+
# observed:
|
| 463 |
+
#
|
| 464 |
+
# * Setting multiple values : ExtensionArrays should support setting
|
| 465 |
+
# multiple values at once, 'key' will be a sequence of integers and
|
| 466 |
+
# 'value' will be a same-length sequence.
|
| 467 |
+
#
|
| 468 |
+
# * Broadcasting : For a sequence 'key' and a scalar 'value',
|
| 469 |
+
# each position in 'key' should be set to 'value'.
|
| 470 |
+
#
|
| 471 |
+
# * Coercion : Most users will expect basic coercion to work. For
|
| 472 |
+
# example, a string like '2018-01-01' is coerced to a datetime
|
| 473 |
+
# when setting on a datetime64ns array. In general, if the
|
| 474 |
+
# __init__ method coerces that value, then so should __setitem__
|
| 475 |
+
# Note, also, that Series/DataFrame.where internally use __setitem__
|
| 476 |
+
# on a copy of the data.
|
| 477 |
+
raise NotImplementedError(f"{type(self)} does not implement __setitem__.")
|
| 478 |
+
|
| 479 |
+
def __len__(self) -> int:
|
| 480 |
+
"""
|
| 481 |
+
Length of this array
|
| 482 |
+
|
| 483 |
+
Returns
|
| 484 |
+
-------
|
| 485 |
+
length : int
|
| 486 |
+
"""
|
| 487 |
+
raise AbstractMethodError(self)
|
| 488 |
+
|
| 489 |
+
def __iter__(self) -> Iterator[Any]:
|
| 490 |
+
"""
|
| 491 |
+
Iterate over elements of the array.
|
| 492 |
+
"""
|
| 493 |
+
# This needs to be implemented so that pandas recognizes extension
|
| 494 |
+
# arrays as list-like. The default implementation makes successive
|
| 495 |
+
# calls to ``__getitem__``, which may be slower than necessary.
|
| 496 |
+
for i in range(len(self)):
|
| 497 |
+
yield self[i]
|
| 498 |
+
|
| 499 |
+
def __contains__(self, item: object) -> bool | np.bool_:
|
| 500 |
+
"""
|
| 501 |
+
Return for `item in self`.
|
| 502 |
+
"""
|
| 503 |
+
# GH37867
|
| 504 |
+
# comparisons of any item to pd.NA always return pd.NA, so e.g. "a" in [pd.NA]
|
| 505 |
+
# would raise a TypeError. The implementation below works around that.
|
| 506 |
+
if is_scalar(item) and isna(item):
|
| 507 |
+
if not self._can_hold_na:
|
| 508 |
+
return False
|
| 509 |
+
elif item is self.dtype.na_value or isinstance(item, self.dtype.type):
|
| 510 |
+
return self._hasna
|
| 511 |
+
else:
|
| 512 |
+
return False
|
| 513 |
+
else:
|
| 514 |
+
# error: Item "ExtensionArray" of "Union[ExtensionArray, ndarray]" has no
|
| 515 |
+
# attribute "any"
|
| 516 |
+
return (item == self).any() # type: ignore[union-attr]
|
| 517 |
+
|
| 518 |
+
# error: Signature of "__eq__" incompatible with supertype "object"
|
| 519 |
+
def __eq__(self, other: object) -> ArrayLike: # type: ignore[override]
|
| 520 |
+
"""
|
| 521 |
+
Return for `self == other` (element-wise equality).
|
| 522 |
+
"""
|
| 523 |
+
# Implementer note: this should return a boolean numpy ndarray or
|
| 524 |
+
# a boolean ExtensionArray.
|
| 525 |
+
# When `other` is one of Series, Index, or DataFrame, this method should
|
| 526 |
+
# return NotImplemented (to ensure that those objects are responsible for
|
| 527 |
+
# first unpacking the arrays, and then dispatch the operation to the
|
| 528 |
+
# underlying arrays)
|
| 529 |
+
raise AbstractMethodError(self)
|
| 530 |
+
|
| 531 |
+
# error: Signature of "__ne__" incompatible with supertype "object"
|
| 532 |
+
def __ne__(self, other: object) -> ArrayLike: # type: ignore[override]
|
| 533 |
+
"""
|
| 534 |
+
Return for `self != other` (element-wise in-equality).
|
| 535 |
+
"""
|
| 536 |
+
# error: Unsupported operand type for ~ ("ExtensionArray")
|
| 537 |
+
return ~(self == other) # type: ignore[operator]
|
| 538 |
+
|
| 539 |
+
def to_numpy(
|
| 540 |
+
self,
|
| 541 |
+
dtype: npt.DTypeLike | None = None,
|
| 542 |
+
copy: bool = False,
|
| 543 |
+
na_value: object = lib.no_default,
|
| 544 |
+
) -> np.ndarray:
|
| 545 |
+
"""
|
| 546 |
+
Convert to a NumPy ndarray.
|
| 547 |
+
|
| 548 |
+
This is similar to :meth:`numpy.asarray`, but may provide additional control
|
| 549 |
+
over how the conversion is done.
|
| 550 |
+
|
| 551 |
+
Parameters
|
| 552 |
+
----------
|
| 553 |
+
dtype : str or numpy.dtype, optional
|
| 554 |
+
The dtype to pass to :meth:`numpy.asarray`.
|
| 555 |
+
copy : bool, default False
|
| 556 |
+
Whether to ensure that the returned value is a not a view on
|
| 557 |
+
another array. Note that ``copy=False`` does not *ensure* that
|
| 558 |
+
``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that
|
| 559 |
+
a copy is made, even if not strictly necessary.
|
| 560 |
+
na_value : Any, optional
|
| 561 |
+
The value to use for missing values. The default value depends
|
| 562 |
+
on `dtype` and the type of the array.
|
| 563 |
+
|
| 564 |
+
Returns
|
| 565 |
+
-------
|
| 566 |
+
numpy.ndarray
|
| 567 |
+
"""
|
| 568 |
+
result = np.asarray(self, dtype=dtype)
|
| 569 |
+
if copy or na_value is not lib.no_default:
|
| 570 |
+
result = result.copy()
|
| 571 |
+
if na_value is not lib.no_default:
|
| 572 |
+
result[self.isna()] = na_value
|
| 573 |
+
return result
|
| 574 |
+
|
| 575 |
+
# ------------------------------------------------------------------------
|
| 576 |
+
# Required attributes
|
| 577 |
+
# ------------------------------------------------------------------------
|
| 578 |
+
|
| 579 |
+
@property
|
| 580 |
+
def dtype(self) -> ExtensionDtype:
|
| 581 |
+
"""
|
| 582 |
+
An instance of ExtensionDtype.
|
| 583 |
+
|
| 584 |
+
Examples
|
| 585 |
+
--------
|
| 586 |
+
>>> pd.array([1, 2, 3]).dtype
|
| 587 |
+
Int64Dtype()
|
| 588 |
+
"""
|
| 589 |
+
raise AbstractMethodError(self)
|
| 590 |
+
|
| 591 |
+
@property
|
| 592 |
+
def shape(self) -> Shape:
|
| 593 |
+
"""
|
| 594 |
+
Return a tuple of the array dimensions.
|
| 595 |
+
|
| 596 |
+
Examples
|
| 597 |
+
--------
|
| 598 |
+
>>> arr = pd.array([1, 2, 3])
|
| 599 |
+
>>> arr.shape
|
| 600 |
+
(3,)
|
| 601 |
+
"""
|
| 602 |
+
return (len(self),)
|
| 603 |
+
|
| 604 |
+
@property
|
| 605 |
+
def size(self) -> int:
|
| 606 |
+
"""
|
| 607 |
+
The number of elements in the array.
|
| 608 |
+
"""
|
| 609 |
+
# error: Incompatible return value type (got "signedinteger[_64Bit]",
|
| 610 |
+
# expected "int") [return-value]
|
| 611 |
+
return np.prod(self.shape) # type: ignore[return-value]
|
| 612 |
+
|
| 613 |
+
@property
|
| 614 |
+
def ndim(self) -> int:
|
| 615 |
+
"""
|
| 616 |
+
Extension Arrays are only allowed to be 1-dimensional.
|
| 617 |
+
|
| 618 |
+
Examples
|
| 619 |
+
--------
|
| 620 |
+
>>> arr = pd.array([1, 2, 3])
|
| 621 |
+
>>> arr.ndim
|
| 622 |
+
1
|
| 623 |
+
"""
|
| 624 |
+
return 1
|
| 625 |
+
|
| 626 |
+
@property
|
| 627 |
+
def nbytes(self) -> int:
|
| 628 |
+
"""
|
| 629 |
+
The number of bytes needed to store this object in memory.
|
| 630 |
+
|
| 631 |
+
Examples
|
| 632 |
+
--------
|
| 633 |
+
>>> pd.array([1, 2, 3]).nbytes
|
| 634 |
+
27
|
| 635 |
+
"""
|
| 636 |
+
# If this is expensive to compute, return an approximate lower bound
|
| 637 |
+
# on the number of bytes needed.
|
| 638 |
+
raise AbstractMethodError(self)
|
| 639 |
+
|
| 640 |
+
# ------------------------------------------------------------------------
|
| 641 |
+
# Additional Methods
|
| 642 |
+
# ------------------------------------------------------------------------
|
| 643 |
+
|
| 644 |
+
@overload
|
| 645 |
+
def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray:
|
| 646 |
+
...
|
| 647 |
+
|
| 648 |
+
@overload
|
| 649 |
+
def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray:
|
| 650 |
+
...
|
| 651 |
+
|
| 652 |
+
@overload
|
| 653 |
+
def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike:
|
| 654 |
+
...
|
| 655 |
+
|
| 656 |
+
def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
|
| 657 |
+
"""
|
| 658 |
+
Cast to a NumPy array or ExtensionArray with 'dtype'.
|
| 659 |
+
|
| 660 |
+
Parameters
|
| 661 |
+
----------
|
| 662 |
+
dtype : str or dtype
|
| 663 |
+
Typecode or data-type to which the array is cast.
|
| 664 |
+
copy : bool, default True
|
| 665 |
+
Whether to copy the data, even if not necessary. If False,
|
| 666 |
+
a copy is made only if the old dtype does not match the
|
| 667 |
+
new dtype.
|
| 668 |
+
|
| 669 |
+
Returns
|
| 670 |
+
-------
|
| 671 |
+
np.ndarray or pandas.api.extensions.ExtensionArray
|
| 672 |
+
An ``ExtensionArray`` if ``dtype`` is ``ExtensionDtype``,
|
| 673 |
+
otherwise a Numpy ndarray with ``dtype`` for its dtype.
|
| 674 |
+
|
| 675 |
+
Examples
|
| 676 |
+
--------
|
| 677 |
+
>>> arr = pd.array([1, 2, 3])
|
| 678 |
+
>>> arr
|
| 679 |
+
<IntegerArray>
|
| 680 |
+
[1, 2, 3]
|
| 681 |
+
Length: 3, dtype: Int64
|
| 682 |
+
|
| 683 |
+
Casting to another ``ExtensionDtype`` returns an ``ExtensionArray``:
|
| 684 |
+
|
| 685 |
+
>>> arr1 = arr.astype('Float64')
|
| 686 |
+
>>> arr1
|
| 687 |
+
<FloatingArray>
|
| 688 |
+
[1.0, 2.0, 3.0]
|
| 689 |
+
Length: 3, dtype: Float64
|
| 690 |
+
>>> arr1.dtype
|
| 691 |
+
Float64Dtype()
|
| 692 |
+
|
| 693 |
+
Otherwise, we will get a Numpy ndarray:
|
| 694 |
+
|
| 695 |
+
>>> arr2 = arr.astype('float64')
|
| 696 |
+
>>> arr2
|
| 697 |
+
array([1., 2., 3.])
|
| 698 |
+
>>> arr2.dtype
|
| 699 |
+
dtype('float64')
|
| 700 |
+
"""
|
| 701 |
+
dtype = pandas_dtype(dtype)
|
| 702 |
+
if dtype == self.dtype:
|
| 703 |
+
if not copy:
|
| 704 |
+
return self
|
| 705 |
+
else:
|
| 706 |
+
return self.copy()
|
| 707 |
+
|
| 708 |
+
if isinstance(dtype, ExtensionDtype):
|
| 709 |
+
cls = dtype.construct_array_type()
|
| 710 |
+
return cls._from_sequence(self, dtype=dtype, copy=copy)
|
| 711 |
+
|
| 712 |
+
elif lib.is_np_dtype(dtype, "M"):
|
| 713 |
+
from pandas.core.arrays import DatetimeArray
|
| 714 |
+
|
| 715 |
+
return DatetimeArray._from_sequence(self, dtype=dtype, copy=copy)
|
| 716 |
+
|
| 717 |
+
elif lib.is_np_dtype(dtype, "m"):
|
| 718 |
+
from pandas.core.arrays import TimedeltaArray
|
| 719 |
+
|
| 720 |
+
return TimedeltaArray._from_sequence(self, dtype=dtype, copy=copy)
|
| 721 |
+
|
| 722 |
+
if not copy:
|
| 723 |
+
return np.asarray(self, dtype=dtype)
|
| 724 |
+
else:
|
| 725 |
+
return np.array(self, dtype=dtype, copy=copy)
|
| 726 |
+
|
| 727 |
+
def isna(self) -> np.ndarray | ExtensionArraySupportsAnyAll:
|
| 728 |
+
"""
|
| 729 |
+
A 1-D array indicating if each value is missing.
|
| 730 |
+
|
| 731 |
+
Returns
|
| 732 |
+
-------
|
| 733 |
+
numpy.ndarray or pandas.api.extensions.ExtensionArray
|
| 734 |
+
In most cases, this should return a NumPy ndarray. For
|
| 735 |
+
exceptional cases like ``SparseArray``, where returning
|
| 736 |
+
an ndarray would be expensive, an ExtensionArray may be
|
| 737 |
+
returned.
|
| 738 |
+
|
| 739 |
+
Notes
|
| 740 |
+
-----
|
| 741 |
+
If returning an ExtensionArray, then
|
| 742 |
+
|
| 743 |
+
* ``na_values._is_boolean`` should be True
|
| 744 |
+
* `na_values` should implement :func:`ExtensionArray._reduce`
|
| 745 |
+
* ``na_values.any`` and ``na_values.all`` should be implemented
|
| 746 |
+
|
| 747 |
+
Examples
|
| 748 |
+
--------
|
| 749 |
+
>>> arr = pd.array([1, 2, np.nan, np.nan])
|
| 750 |
+
>>> arr.isna()
|
| 751 |
+
array([False, False, True, True])
|
| 752 |
+
"""
|
| 753 |
+
raise AbstractMethodError(self)
|
| 754 |
+
|
| 755 |
+
@property
|
| 756 |
+
def _hasna(self) -> bool:
|
| 757 |
+
# GH#22680
|
| 758 |
+
"""
|
| 759 |
+
Equivalent to `self.isna().any()`.
|
| 760 |
+
|
| 761 |
+
Some ExtensionArray subclasses may be able to optimize this check.
|
| 762 |
+
"""
|
| 763 |
+
return bool(self.isna().any())
|
| 764 |
+
|
| 765 |
+
def _values_for_argsort(self) -> np.ndarray:
|
| 766 |
+
"""
|
| 767 |
+
Return values for sorting.
|
| 768 |
+
|
| 769 |
+
Returns
|
| 770 |
+
-------
|
| 771 |
+
ndarray
|
| 772 |
+
The transformed values should maintain the ordering between values
|
| 773 |
+
within the array.
|
| 774 |
+
|
| 775 |
+
See Also
|
| 776 |
+
--------
|
| 777 |
+
ExtensionArray.argsort : Return the indices that would sort this array.
|
| 778 |
+
|
| 779 |
+
Notes
|
| 780 |
+
-----
|
| 781 |
+
The caller is responsible for *not* modifying these values in-place, so
|
| 782 |
+
it is safe for implementers to give views on ``self``.
|
| 783 |
+
|
| 784 |
+
Functions that use this (e.g. ``ExtensionArray.argsort``) should ignore
|
| 785 |
+
entries with missing values in the original array (according to
|
| 786 |
+
``self.isna()``). This means that the corresponding entries in the returned
|
| 787 |
+
array don't need to be modified to sort correctly.
|
| 788 |
+
|
| 789 |
+
Examples
|
| 790 |
+
--------
|
| 791 |
+
In most cases, this is the underlying Numpy array of the ``ExtensionArray``:
|
| 792 |
+
|
| 793 |
+
>>> arr = pd.array([1, 2, 3])
|
| 794 |
+
>>> arr._values_for_argsort()
|
| 795 |
+
array([1, 2, 3])
|
| 796 |
+
"""
|
| 797 |
+
# Note: this is used in `ExtensionArray.argsort/argmin/argmax`.
|
| 798 |
+
return np.array(self)
|
| 799 |
+
|
| 800 |
+
def argsort(
|
| 801 |
+
self,
|
| 802 |
+
*,
|
| 803 |
+
ascending: bool = True,
|
| 804 |
+
kind: SortKind = "quicksort",
|
| 805 |
+
na_position: str = "last",
|
| 806 |
+
**kwargs,
|
| 807 |
+
) -> np.ndarray:
|
| 808 |
+
"""
|
| 809 |
+
Return the indices that would sort this array.
|
| 810 |
+
|
| 811 |
+
Parameters
|
| 812 |
+
----------
|
| 813 |
+
ascending : bool, default True
|
| 814 |
+
Whether the indices should result in an ascending
|
| 815 |
+
or descending sort.
|
| 816 |
+
kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, optional
|
| 817 |
+
Sorting algorithm.
|
| 818 |
+
na_position : {'first', 'last'}, default 'last'
|
| 819 |
+
If ``'first'``, put ``NaN`` values at the beginning.
|
| 820 |
+
If ``'last'``, put ``NaN`` values at the end.
|
| 821 |
+
*args, **kwargs:
|
| 822 |
+
Passed through to :func:`numpy.argsort`.
|
| 823 |
+
|
| 824 |
+
Returns
|
| 825 |
+
-------
|
| 826 |
+
np.ndarray[np.intp]
|
| 827 |
+
Array of indices that sort ``self``. If NaN values are contained,
|
| 828 |
+
NaN values are placed at the end.
|
| 829 |
+
|
| 830 |
+
See Also
|
| 831 |
+
--------
|
| 832 |
+
numpy.argsort : Sorting implementation used internally.
|
| 833 |
+
|
| 834 |
+
Examples
|
| 835 |
+
--------
|
| 836 |
+
>>> arr = pd.array([3, 1, 2, 5, 4])
|
| 837 |
+
>>> arr.argsort()
|
| 838 |
+
array([1, 2, 0, 4, 3])
|
| 839 |
+
"""
|
| 840 |
+
# Implementer note: You have two places to override the behavior of
|
| 841 |
+
# argsort.
|
| 842 |
+
# 1. _values_for_argsort : construct the values passed to np.argsort
|
| 843 |
+
# 2. argsort : total control over sorting. In case of overriding this,
|
| 844 |
+
# it is recommended to also override argmax/argmin
|
| 845 |
+
ascending = nv.validate_argsort_with_ascending(ascending, (), kwargs)
|
| 846 |
+
|
| 847 |
+
values = self._values_for_argsort()
|
| 848 |
+
return nargsort(
|
| 849 |
+
values,
|
| 850 |
+
kind=kind,
|
| 851 |
+
ascending=ascending,
|
| 852 |
+
na_position=na_position,
|
| 853 |
+
mask=np.asarray(self.isna()),
|
| 854 |
+
)
|
| 855 |
+
|
| 856 |
+
def argmin(self, skipna: bool = True) -> int:
|
| 857 |
+
"""
|
| 858 |
+
Return the index of minimum value.
|
| 859 |
+
|
| 860 |
+
In case of multiple occurrences of the minimum value, the index
|
| 861 |
+
corresponding to the first occurrence is returned.
|
| 862 |
+
|
| 863 |
+
Parameters
|
| 864 |
+
----------
|
| 865 |
+
skipna : bool, default True
|
| 866 |
+
|
| 867 |
+
Returns
|
| 868 |
+
-------
|
| 869 |
+
int
|
| 870 |
+
|
| 871 |
+
See Also
|
| 872 |
+
--------
|
| 873 |
+
ExtensionArray.argmax : Return the index of the maximum value.
|
| 874 |
+
|
| 875 |
+
Examples
|
| 876 |
+
--------
|
| 877 |
+
>>> arr = pd.array([3, 1, 2, 5, 4])
|
| 878 |
+
>>> arr.argmin()
|
| 879 |
+
1
|
| 880 |
+
"""
|
| 881 |
+
# Implementer note: You have two places to override the behavior of
|
| 882 |
+
# argmin.
|
| 883 |
+
# 1. _values_for_argsort : construct the values used in nargminmax
|
| 884 |
+
# 2. argmin itself : total control over sorting.
|
| 885 |
+
validate_bool_kwarg(skipna, "skipna")
|
| 886 |
+
if not skipna and self._hasna:
|
| 887 |
+
raise NotImplementedError
|
| 888 |
+
return nargminmax(self, "argmin")
|
| 889 |
+
|
| 890 |
+
def argmax(self, skipna: bool = True) -> int:
|
| 891 |
+
"""
|
| 892 |
+
Return the index of maximum value.
|
| 893 |
+
|
| 894 |
+
In case of multiple occurrences of the maximum value, the index
|
| 895 |
+
corresponding to the first occurrence is returned.
|
| 896 |
+
|
| 897 |
+
Parameters
|
| 898 |
+
----------
|
| 899 |
+
skipna : bool, default True
|
| 900 |
+
|
| 901 |
+
Returns
|
| 902 |
+
-------
|
| 903 |
+
int
|
| 904 |
+
|
| 905 |
+
See Also
|
| 906 |
+
--------
|
| 907 |
+
ExtensionArray.argmin : Return the index of the minimum value.
|
| 908 |
+
|
| 909 |
+
Examples
|
| 910 |
+
--------
|
| 911 |
+
>>> arr = pd.array([3, 1, 2, 5, 4])
|
| 912 |
+
>>> arr.argmax()
|
| 913 |
+
3
|
| 914 |
+
"""
|
| 915 |
+
# Implementer note: You have two places to override the behavior of
|
| 916 |
+
# argmax.
|
| 917 |
+
# 1. _values_for_argsort : construct the values used in nargminmax
|
| 918 |
+
# 2. argmax itself : total control over sorting.
|
| 919 |
+
validate_bool_kwarg(skipna, "skipna")
|
| 920 |
+
if not skipna and self._hasna:
|
| 921 |
+
raise NotImplementedError
|
| 922 |
+
return nargminmax(self, "argmax")
|
| 923 |
+
|
| 924 |
+
def interpolate(
|
| 925 |
+
self,
|
| 926 |
+
*,
|
| 927 |
+
method: InterpolateOptions,
|
| 928 |
+
axis: int,
|
| 929 |
+
index: Index,
|
| 930 |
+
limit,
|
| 931 |
+
limit_direction,
|
| 932 |
+
limit_area,
|
| 933 |
+
copy: bool,
|
| 934 |
+
**kwargs,
|
| 935 |
+
) -> Self:
|
| 936 |
+
"""
|
| 937 |
+
See DataFrame.interpolate.__doc__.
|
| 938 |
+
|
| 939 |
+
Examples
|
| 940 |
+
--------
|
| 941 |
+
>>> arr = pd.arrays.NumpyExtensionArray(np.array([0, 1, np.nan, 3]))
|
| 942 |
+
>>> arr.interpolate(method="linear",
|
| 943 |
+
... limit=3,
|
| 944 |
+
... limit_direction="forward",
|
| 945 |
+
... index=pd.Index([1, 2, 3, 4]),
|
| 946 |
+
... fill_value=1,
|
| 947 |
+
... copy=False,
|
| 948 |
+
... axis=0,
|
| 949 |
+
... limit_area="inside"
|
| 950 |
+
... )
|
| 951 |
+
<NumpyExtensionArray>
|
| 952 |
+
[0.0, 1.0, 2.0, 3.0]
|
| 953 |
+
Length: 4, dtype: float64
|
| 954 |
+
"""
|
| 955 |
+
# NB: we return type(self) even if copy=False
|
| 956 |
+
raise NotImplementedError(
|
| 957 |
+
f"{type(self).__name__} does not implement interpolate"
|
| 958 |
+
)
|
| 959 |
+
|
| 960 |
+
def _pad_or_backfill(
|
| 961 |
+
self,
|
| 962 |
+
*,
|
| 963 |
+
method: FillnaOptions,
|
| 964 |
+
limit: int | None = None,
|
| 965 |
+
limit_area: Literal["inside", "outside"] | None = None,
|
| 966 |
+
copy: bool = True,
|
| 967 |
+
) -> Self:
|
| 968 |
+
"""
|
| 969 |
+
Pad or backfill values, used by Series/DataFrame ffill and bfill.
|
| 970 |
+
|
| 971 |
+
Parameters
|
| 972 |
+
----------
|
| 973 |
+
method : {'backfill', 'bfill', 'pad', 'ffill'}
|
| 974 |
+
Method to use for filling holes in reindexed Series:
|
| 975 |
+
|
| 976 |
+
* pad / ffill: propagate last valid observation forward to next valid.
|
| 977 |
+
* backfill / bfill: use NEXT valid observation to fill gap.
|
| 978 |
+
|
| 979 |
+
limit : int, default None
|
| 980 |
+
This is the maximum number of consecutive
|
| 981 |
+
NaN values to forward/backward fill. In other words, if there is
|
| 982 |
+
a gap with more than this number of consecutive NaNs, it will only
|
| 983 |
+
be partially filled. If method is not specified, this is the
|
| 984 |
+
maximum number of entries along the entire axis where NaNs will be
|
| 985 |
+
filled.
|
| 986 |
+
|
| 987 |
+
copy : bool, default True
|
| 988 |
+
Whether to make a copy of the data before filling. If False, then
|
| 989 |
+
the original should be modified and no new memory should be allocated.
|
| 990 |
+
For ExtensionArray subclasses that cannot do this, it is at the
|
| 991 |
+
author's discretion whether to ignore "copy=False" or to raise.
|
| 992 |
+
The base class implementation ignores the keyword if any NAs are
|
| 993 |
+
present.
|
| 994 |
+
|
| 995 |
+
Returns
|
| 996 |
+
-------
|
| 997 |
+
Same type as self
|
| 998 |
+
|
| 999 |
+
Examples
|
| 1000 |
+
--------
|
| 1001 |
+
>>> arr = pd.array([np.nan, np.nan, 2, 3, np.nan, np.nan])
|
| 1002 |
+
>>> arr._pad_or_backfill(method="backfill", limit=1)
|
| 1003 |
+
<IntegerArray>
|
| 1004 |
+
[<NA>, 2, 2, 3, <NA>, <NA>]
|
| 1005 |
+
Length: 6, dtype: Int64
|
| 1006 |
+
"""
|
| 1007 |
+
|
| 1008 |
+
# If a 3rd-party EA has implemented this functionality in fillna,
|
| 1009 |
+
# we warn that they need to implement _pad_or_backfill instead.
|
| 1010 |
+
if (
|
| 1011 |
+
type(self).fillna is not ExtensionArray.fillna
|
| 1012 |
+
and type(self)._pad_or_backfill is ExtensionArray._pad_or_backfill
|
| 1013 |
+
):
|
| 1014 |
+
# Check for _pad_or_backfill here allows us to call
|
| 1015 |
+
# super()._pad_or_backfill without getting this warning
|
| 1016 |
+
warnings.warn(
|
| 1017 |
+
"ExtensionArray.fillna 'method' keyword is deprecated. "
|
| 1018 |
+
"In a future version. arr._pad_or_backfill will be called "
|
| 1019 |
+
"instead. 3rd-party ExtensionArray authors need to implement "
|
| 1020 |
+
"_pad_or_backfill.",
|
| 1021 |
+
DeprecationWarning,
|
| 1022 |
+
stacklevel=find_stack_level(),
|
| 1023 |
+
)
|
| 1024 |
+
if limit_area is not None:
|
| 1025 |
+
raise NotImplementedError(
|
| 1026 |
+
f"{type(self).__name__} does not implement limit_area "
|
| 1027 |
+
"(added in pandas 2.2). 3rd-party ExtnsionArray authors "
|
| 1028 |
+
"need to add this argument to _pad_or_backfill."
|
| 1029 |
+
)
|
| 1030 |
+
return self.fillna(method=method, limit=limit)
|
| 1031 |
+
|
| 1032 |
+
mask = self.isna()
|
| 1033 |
+
|
| 1034 |
+
if mask.any():
|
| 1035 |
+
# NB: the base class does not respect the "copy" keyword
|
| 1036 |
+
meth = missing.clean_fill_method(method)
|
| 1037 |
+
|
| 1038 |
+
npmask = np.asarray(mask)
|
| 1039 |
+
if limit_area is not None and not npmask.all():
|
| 1040 |
+
_fill_limit_area_1d(npmask, limit_area)
|
| 1041 |
+
if meth == "pad":
|
| 1042 |
+
indexer = libalgos.get_fill_indexer(npmask, limit=limit)
|
| 1043 |
+
return self.take(indexer, allow_fill=True)
|
| 1044 |
+
else:
|
| 1045 |
+
# i.e. meth == "backfill"
|
| 1046 |
+
indexer = libalgos.get_fill_indexer(npmask[::-1], limit=limit)[::-1]
|
| 1047 |
+
return self[::-1].take(indexer, allow_fill=True)
|
| 1048 |
+
|
| 1049 |
+
else:
|
| 1050 |
+
if not copy:
|
| 1051 |
+
return self
|
| 1052 |
+
new_values = self.copy()
|
| 1053 |
+
return new_values
|
| 1054 |
+
|
| 1055 |
+
def fillna(
|
| 1056 |
+
self,
|
| 1057 |
+
value: object | ArrayLike | None = None,
|
| 1058 |
+
method: FillnaOptions | None = None,
|
| 1059 |
+
limit: int | None = None,
|
| 1060 |
+
copy: bool = True,
|
| 1061 |
+
) -> Self:
|
| 1062 |
+
"""
|
| 1063 |
+
Fill NA/NaN values using the specified method.
|
| 1064 |
+
|
| 1065 |
+
Parameters
|
| 1066 |
+
----------
|
| 1067 |
+
value : scalar, array-like
|
| 1068 |
+
If a scalar value is passed it is used to fill all missing values.
|
| 1069 |
+
Alternatively, an array-like "value" can be given. It's expected
|
| 1070 |
+
that the array-like have the same length as 'self'.
|
| 1071 |
+
method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
|
| 1072 |
+
Method to use for filling holes in reindexed Series:
|
| 1073 |
+
|
| 1074 |
+
* pad / ffill: propagate last valid observation forward to next valid.
|
| 1075 |
+
* backfill / bfill: use NEXT valid observation to fill gap.
|
| 1076 |
+
|
| 1077 |
+
.. deprecated:: 2.1.0
|
| 1078 |
+
|
| 1079 |
+
limit : int, default None
|
| 1080 |
+
If method is specified, this is the maximum number of consecutive
|
| 1081 |
+
NaN values to forward/backward fill. In other words, if there is
|
| 1082 |
+
a gap with more than this number of consecutive NaNs, it will only
|
| 1083 |
+
be partially filled. If method is not specified, this is the
|
| 1084 |
+
maximum number of entries along the entire axis where NaNs will be
|
| 1085 |
+
filled.
|
| 1086 |
+
|
| 1087 |
+
.. deprecated:: 2.1.0
|
| 1088 |
+
|
| 1089 |
+
copy : bool, default True
|
| 1090 |
+
Whether to make a copy of the data before filling. If False, then
|
| 1091 |
+
the original should be modified and no new memory should be allocated.
|
| 1092 |
+
For ExtensionArray subclasses that cannot do this, it is at the
|
| 1093 |
+
author's discretion whether to ignore "copy=False" or to raise.
|
| 1094 |
+
The base class implementation ignores the keyword in pad/backfill
|
| 1095 |
+
cases.
|
| 1096 |
+
|
| 1097 |
+
Returns
|
| 1098 |
+
-------
|
| 1099 |
+
ExtensionArray
|
| 1100 |
+
With NA/NaN filled.
|
| 1101 |
+
|
| 1102 |
+
Examples
|
| 1103 |
+
--------
|
| 1104 |
+
>>> arr = pd.array([np.nan, np.nan, 2, 3, np.nan, np.nan])
|
| 1105 |
+
>>> arr.fillna(0)
|
| 1106 |
+
<IntegerArray>
|
| 1107 |
+
[0, 0, 2, 3, 0, 0]
|
| 1108 |
+
Length: 6, dtype: Int64
|
| 1109 |
+
"""
|
| 1110 |
+
if method is not None:
|
| 1111 |
+
warnings.warn(
|
| 1112 |
+
f"The 'method' keyword in {type(self).__name__}.fillna is "
|
| 1113 |
+
"deprecated and will be removed in a future version.",
|
| 1114 |
+
FutureWarning,
|
| 1115 |
+
stacklevel=find_stack_level(),
|
| 1116 |
+
)
|
| 1117 |
+
|
| 1118 |
+
value, method = validate_fillna_kwargs(value, method)
|
| 1119 |
+
|
| 1120 |
+
mask = self.isna()
|
| 1121 |
+
# error: Argument 2 to "check_value_size" has incompatible type
|
| 1122 |
+
# "ExtensionArray"; expected "ndarray"
|
| 1123 |
+
value = missing.check_value_size(
|
| 1124 |
+
value, mask, len(self) # type: ignore[arg-type]
|
| 1125 |
+
)
|
| 1126 |
+
|
| 1127 |
+
if mask.any():
|
| 1128 |
+
if method is not None:
|
| 1129 |
+
meth = missing.clean_fill_method(method)
|
| 1130 |
+
|
| 1131 |
+
npmask = np.asarray(mask)
|
| 1132 |
+
if meth == "pad":
|
| 1133 |
+
indexer = libalgos.get_fill_indexer(npmask, limit=limit)
|
| 1134 |
+
return self.take(indexer, allow_fill=True)
|
| 1135 |
+
else:
|
| 1136 |
+
# i.e. meth == "backfill"
|
| 1137 |
+
indexer = libalgos.get_fill_indexer(npmask[::-1], limit=limit)[::-1]
|
| 1138 |
+
return self[::-1].take(indexer, allow_fill=True)
|
| 1139 |
+
else:
|
| 1140 |
+
# fill with value
|
| 1141 |
+
if not copy:
|
| 1142 |
+
new_values = self[:]
|
| 1143 |
+
else:
|
| 1144 |
+
new_values = self.copy()
|
| 1145 |
+
new_values[mask] = value
|
| 1146 |
+
else:
|
| 1147 |
+
if not copy:
|
| 1148 |
+
new_values = self[:]
|
| 1149 |
+
else:
|
| 1150 |
+
new_values = self.copy()
|
| 1151 |
+
return new_values
|
| 1152 |
+
|
| 1153 |
+
def dropna(self) -> Self:
|
| 1154 |
+
"""
|
| 1155 |
+
Return ExtensionArray without NA values.
|
| 1156 |
+
|
| 1157 |
+
Returns
|
| 1158 |
+
-------
|
| 1159 |
+
|
| 1160 |
+
Examples
|
| 1161 |
+
--------
|
| 1162 |
+
>>> pd.array([1, 2, np.nan]).dropna()
|
| 1163 |
+
<IntegerArray>
|
| 1164 |
+
[1, 2]
|
| 1165 |
+
Length: 2, dtype: Int64
|
| 1166 |
+
"""
|
| 1167 |
+
# error: Unsupported operand type for ~ ("ExtensionArray")
|
| 1168 |
+
return self[~self.isna()] # type: ignore[operator]
|
| 1169 |
+
|
| 1170 |
+
def duplicated(
|
| 1171 |
+
self, keep: Literal["first", "last", False] = "first"
|
| 1172 |
+
) -> npt.NDArray[np.bool_]:
|
| 1173 |
+
"""
|
| 1174 |
+
Return boolean ndarray denoting duplicate values.
|
| 1175 |
+
|
| 1176 |
+
Parameters
|
| 1177 |
+
----------
|
| 1178 |
+
keep : {'first', 'last', False}, default 'first'
|
| 1179 |
+
- ``first`` : Mark duplicates as ``True`` except for the first occurrence.
|
| 1180 |
+
- ``last`` : Mark duplicates as ``True`` except for the last occurrence.
|
| 1181 |
+
- False : Mark all duplicates as ``True``.
|
| 1182 |
+
|
| 1183 |
+
Returns
|
| 1184 |
+
-------
|
| 1185 |
+
ndarray[bool]
|
| 1186 |
+
|
| 1187 |
+
Examples
|
| 1188 |
+
--------
|
| 1189 |
+
>>> pd.array([1, 1, 2, 3, 3], dtype="Int64").duplicated()
|
| 1190 |
+
array([False, True, False, False, True])
|
| 1191 |
+
"""
|
| 1192 |
+
mask = self.isna().astype(np.bool_, copy=False)
|
| 1193 |
+
return duplicated(values=self, keep=keep, mask=mask)
|
| 1194 |
+
|
| 1195 |
+
def shift(self, periods: int = 1, fill_value: object = None) -> ExtensionArray:
|
| 1196 |
+
"""
|
| 1197 |
+
Shift values by desired number.
|
| 1198 |
+
|
| 1199 |
+
Newly introduced missing values are filled with
|
| 1200 |
+
``self.dtype.na_value``.
|
| 1201 |
+
|
| 1202 |
+
Parameters
|
| 1203 |
+
----------
|
| 1204 |
+
periods : int, default 1
|
| 1205 |
+
The number of periods to shift. Negative values are allowed
|
| 1206 |
+
for shifting backwards.
|
| 1207 |
+
|
| 1208 |
+
fill_value : object, optional
|
| 1209 |
+
The scalar value to use for newly introduced missing values.
|
| 1210 |
+
The default is ``self.dtype.na_value``.
|
| 1211 |
+
|
| 1212 |
+
Returns
|
| 1213 |
+
-------
|
| 1214 |
+
ExtensionArray
|
| 1215 |
+
Shifted.
|
| 1216 |
+
|
| 1217 |
+
Notes
|
| 1218 |
+
-----
|
| 1219 |
+
If ``self`` is empty or ``periods`` is 0, a copy of ``self`` is
|
| 1220 |
+
returned.
|
| 1221 |
+
|
| 1222 |
+
If ``periods > len(self)``, then an array of size
|
| 1223 |
+
len(self) is returned, with all values filled with
|
| 1224 |
+
``self.dtype.na_value``.
|
| 1225 |
+
|
| 1226 |
+
For 2-dimensional ExtensionArrays, we are always shifting along axis=0.
|
| 1227 |
+
|
| 1228 |
+
Examples
|
| 1229 |
+
--------
|
| 1230 |
+
>>> arr = pd.array([1, 2, 3])
|
| 1231 |
+
>>> arr.shift(2)
|
| 1232 |
+
<IntegerArray>
|
| 1233 |
+
[<NA>, <NA>, 1]
|
| 1234 |
+
Length: 3, dtype: Int64
|
| 1235 |
+
"""
|
| 1236 |
+
# Note: this implementation assumes that `self.dtype.na_value` can be
|
| 1237 |
+
# stored in an instance of your ExtensionArray with `self.dtype`.
|
| 1238 |
+
if not len(self) or periods == 0:
|
| 1239 |
+
return self.copy()
|
| 1240 |
+
|
| 1241 |
+
if isna(fill_value):
|
| 1242 |
+
fill_value = self.dtype.na_value
|
| 1243 |
+
|
| 1244 |
+
empty = self._from_sequence(
|
| 1245 |
+
[fill_value] * min(abs(periods), len(self)), dtype=self.dtype
|
| 1246 |
+
)
|
| 1247 |
+
if periods > 0:
|
| 1248 |
+
a = empty
|
| 1249 |
+
b = self[:-periods]
|
| 1250 |
+
else:
|
| 1251 |
+
a = self[abs(periods) :]
|
| 1252 |
+
b = empty
|
| 1253 |
+
return self._concat_same_type([a, b])
|
| 1254 |
+
|
| 1255 |
+
def unique(self) -> Self:
|
| 1256 |
+
"""
|
| 1257 |
+
Compute the ExtensionArray of unique values.
|
| 1258 |
+
|
| 1259 |
+
Returns
|
| 1260 |
+
-------
|
| 1261 |
+
pandas.api.extensions.ExtensionArray
|
| 1262 |
+
|
| 1263 |
+
Examples
|
| 1264 |
+
--------
|
| 1265 |
+
>>> arr = pd.array([1, 2, 3, 1, 2, 3])
|
| 1266 |
+
>>> arr.unique()
|
| 1267 |
+
<IntegerArray>
|
| 1268 |
+
[1, 2, 3]
|
| 1269 |
+
Length: 3, dtype: Int64
|
| 1270 |
+
"""
|
| 1271 |
+
uniques = unique(self.astype(object))
|
| 1272 |
+
return self._from_sequence(uniques, dtype=self.dtype)
|
| 1273 |
+
|
| 1274 |
+
def searchsorted(
|
| 1275 |
+
self,
|
| 1276 |
+
value: NumpyValueArrayLike | ExtensionArray,
|
| 1277 |
+
side: Literal["left", "right"] = "left",
|
| 1278 |
+
sorter: NumpySorter | None = None,
|
| 1279 |
+
) -> npt.NDArray[np.intp] | np.intp:
|
| 1280 |
+
"""
|
| 1281 |
+
Find indices where elements should be inserted to maintain order.
|
| 1282 |
+
|
| 1283 |
+
Find the indices into a sorted array `self` (a) such that, if the
|
| 1284 |
+
corresponding elements in `value` were inserted before the indices,
|
| 1285 |
+
the order of `self` would be preserved.
|
| 1286 |
+
|
| 1287 |
+
Assuming that `self` is sorted:
|
| 1288 |
+
|
| 1289 |
+
====== ================================
|
| 1290 |
+
`side` returned index `i` satisfies
|
| 1291 |
+
====== ================================
|
| 1292 |
+
left ``self[i-1] < value <= self[i]``
|
| 1293 |
+
right ``self[i-1] <= value < self[i]``
|
| 1294 |
+
====== ================================
|
| 1295 |
+
|
| 1296 |
+
Parameters
|
| 1297 |
+
----------
|
| 1298 |
+
value : array-like, list or scalar
|
| 1299 |
+
Value(s) to insert into `self`.
|
| 1300 |
+
side : {'left', 'right'}, optional
|
| 1301 |
+
If 'left', the index of the first suitable location found is given.
|
| 1302 |
+
If 'right', return the last such index. If there is no suitable
|
| 1303 |
+
index, return either 0 or N (where N is the length of `self`).
|
| 1304 |
+
sorter : 1-D array-like, optional
|
| 1305 |
+
Optional array of integer indices that sort array a into ascending
|
| 1306 |
+
order. They are typically the result of argsort.
|
| 1307 |
+
|
| 1308 |
+
Returns
|
| 1309 |
+
-------
|
| 1310 |
+
array of ints or int
|
| 1311 |
+
If value is array-like, array of insertion points.
|
| 1312 |
+
If value is scalar, a single integer.
|
| 1313 |
+
|
| 1314 |
+
See Also
|
| 1315 |
+
--------
|
| 1316 |
+
numpy.searchsorted : Similar method from NumPy.
|
| 1317 |
+
|
| 1318 |
+
Examples
|
| 1319 |
+
--------
|
| 1320 |
+
>>> arr = pd.array([1, 2, 3, 5])
|
| 1321 |
+
>>> arr.searchsorted([4])
|
| 1322 |
+
array([3])
|
| 1323 |
+
"""
|
| 1324 |
+
# Note: the base tests provided by pandas only test the basics.
|
| 1325 |
+
# We do not test
|
| 1326 |
+
# 1. Values outside the range of the `data_for_sorting` fixture
|
| 1327 |
+
# 2. Values between the values in the `data_for_sorting` fixture
|
| 1328 |
+
# 3. Missing values.
|
| 1329 |
+
arr = self.astype(object)
|
| 1330 |
+
if isinstance(value, ExtensionArray):
|
| 1331 |
+
value = value.astype(object)
|
| 1332 |
+
return arr.searchsorted(value, side=side, sorter=sorter)
|
| 1333 |
+
|
| 1334 |
+
def equals(self, other: object) -> bool:
|
| 1335 |
+
"""
|
| 1336 |
+
Return if another array is equivalent to this array.
|
| 1337 |
+
|
| 1338 |
+
Equivalent means that both arrays have the same shape and dtype, and
|
| 1339 |
+
all values compare equal. Missing values in the same location are
|
| 1340 |
+
considered equal (in contrast with normal equality).
|
| 1341 |
+
|
| 1342 |
+
Parameters
|
| 1343 |
+
----------
|
| 1344 |
+
other : ExtensionArray
|
| 1345 |
+
Array to compare to this Array.
|
| 1346 |
+
|
| 1347 |
+
Returns
|
| 1348 |
+
-------
|
| 1349 |
+
boolean
|
| 1350 |
+
Whether the arrays are equivalent.
|
| 1351 |
+
|
| 1352 |
+
Examples
|
| 1353 |
+
--------
|
| 1354 |
+
>>> arr1 = pd.array([1, 2, np.nan])
|
| 1355 |
+
>>> arr2 = pd.array([1, 2, np.nan])
|
| 1356 |
+
>>> arr1.equals(arr2)
|
| 1357 |
+
True
|
| 1358 |
+
"""
|
| 1359 |
+
if type(self) != type(other):
|
| 1360 |
+
return False
|
| 1361 |
+
other = cast(ExtensionArray, other)
|
| 1362 |
+
if self.dtype != other.dtype:
|
| 1363 |
+
return False
|
| 1364 |
+
elif len(self) != len(other):
|
| 1365 |
+
return False
|
| 1366 |
+
else:
|
| 1367 |
+
equal_values = self == other
|
| 1368 |
+
if isinstance(equal_values, ExtensionArray):
|
| 1369 |
+
# boolean array with NA -> fill with False
|
| 1370 |
+
equal_values = equal_values.fillna(False)
|
| 1371 |
+
# error: Unsupported left operand type for & ("ExtensionArray")
|
| 1372 |
+
equal_na = self.isna() & other.isna() # type: ignore[operator]
|
| 1373 |
+
return bool((equal_values | equal_na).all())
|
| 1374 |
+
|
| 1375 |
+
def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
|
| 1376 |
+
"""
|
| 1377 |
+
Pointwise comparison for set containment in the given values.
|
| 1378 |
+
|
| 1379 |
+
Roughly equivalent to `np.array([x in values for x in self])`
|
| 1380 |
+
|
| 1381 |
+
Parameters
|
| 1382 |
+
----------
|
| 1383 |
+
values : np.ndarray or ExtensionArray
|
| 1384 |
+
|
| 1385 |
+
Returns
|
| 1386 |
+
-------
|
| 1387 |
+
np.ndarray[bool]
|
| 1388 |
+
|
| 1389 |
+
Examples
|
| 1390 |
+
--------
|
| 1391 |
+
>>> arr = pd.array([1, 2, 3])
|
| 1392 |
+
>>> arr.isin([1])
|
| 1393 |
+
<BooleanArray>
|
| 1394 |
+
[True, False, False]
|
| 1395 |
+
Length: 3, dtype: boolean
|
| 1396 |
+
"""
|
| 1397 |
+
return isin(np.asarray(self), values)
|
| 1398 |
+
|
| 1399 |
+
def _values_for_factorize(self) -> tuple[np.ndarray, Any]:
|
| 1400 |
+
"""
|
| 1401 |
+
Return an array and missing value suitable for factorization.
|
| 1402 |
+
|
| 1403 |
+
Returns
|
| 1404 |
+
-------
|
| 1405 |
+
values : ndarray
|
| 1406 |
+
An array suitable for factorization. This should maintain order
|
| 1407 |
+
and be a supported dtype (Float64, Int64, UInt64, String, Object).
|
| 1408 |
+
By default, the extension array is cast to object dtype.
|
| 1409 |
+
na_value : object
|
| 1410 |
+
The value in `values` to consider missing. This will be treated
|
| 1411 |
+
as NA in the factorization routines, so it will be coded as
|
| 1412 |
+
`-1` and not included in `uniques`. By default,
|
| 1413 |
+
``np.nan`` is used.
|
| 1414 |
+
|
| 1415 |
+
Notes
|
| 1416 |
+
-----
|
| 1417 |
+
The values returned by this method are also used in
|
| 1418 |
+
:func:`pandas.util.hash_pandas_object`. If needed, this can be
|
| 1419 |
+
overridden in the ``self._hash_pandas_object()`` method.
|
| 1420 |
+
|
| 1421 |
+
Examples
|
| 1422 |
+
--------
|
| 1423 |
+
>>> pd.array([1, 2, 3])._values_for_factorize()
|
| 1424 |
+
(array([1, 2, 3], dtype=object), nan)
|
| 1425 |
+
"""
|
| 1426 |
+
return self.astype(object), np.nan
|
| 1427 |
+
|
| 1428 |
+
def factorize(
|
| 1429 |
+
self,
|
| 1430 |
+
use_na_sentinel: bool = True,
|
| 1431 |
+
) -> tuple[np.ndarray, ExtensionArray]:
|
| 1432 |
+
"""
|
| 1433 |
+
Encode the extension array as an enumerated type.
|
| 1434 |
+
|
| 1435 |
+
Parameters
|
| 1436 |
+
----------
|
| 1437 |
+
use_na_sentinel : bool, default True
|
| 1438 |
+
If True, the sentinel -1 will be used for NaN values. If False,
|
| 1439 |
+
NaN values will be encoded as non-negative integers and will not drop the
|
| 1440 |
+
NaN from the uniques of the values.
|
| 1441 |
+
|
| 1442 |
+
.. versionadded:: 1.5.0
|
| 1443 |
+
|
| 1444 |
+
Returns
|
| 1445 |
+
-------
|
| 1446 |
+
codes : ndarray
|
| 1447 |
+
An integer NumPy array that's an indexer into the original
|
| 1448 |
+
ExtensionArray.
|
| 1449 |
+
uniques : ExtensionArray
|
| 1450 |
+
An ExtensionArray containing the unique values of `self`.
|
| 1451 |
+
|
| 1452 |
+
.. note::
|
| 1453 |
+
|
| 1454 |
+
uniques will *not* contain an entry for the NA value of
|
| 1455 |
+
the ExtensionArray if there are any missing values present
|
| 1456 |
+
in `self`.
|
| 1457 |
+
|
| 1458 |
+
See Also
|
| 1459 |
+
--------
|
| 1460 |
+
factorize : Top-level factorize method that dispatches here.
|
| 1461 |
+
|
| 1462 |
+
Notes
|
| 1463 |
+
-----
|
| 1464 |
+
:meth:`pandas.factorize` offers a `sort` keyword as well.
|
| 1465 |
+
|
| 1466 |
+
Examples
|
| 1467 |
+
--------
|
| 1468 |
+
>>> idx1 = pd.PeriodIndex(["2014-01", "2014-01", "2014-02", "2014-02",
|
| 1469 |
+
... "2014-03", "2014-03"], freq="M")
|
| 1470 |
+
>>> arr, idx = idx1.factorize()
|
| 1471 |
+
>>> arr
|
| 1472 |
+
array([0, 0, 1, 1, 2, 2])
|
| 1473 |
+
>>> idx
|
| 1474 |
+
PeriodIndex(['2014-01', '2014-02', '2014-03'], dtype='period[M]')
|
| 1475 |
+
"""
|
| 1476 |
+
# Implementer note: There are two ways to override the behavior of
|
| 1477 |
+
# pandas.factorize
|
| 1478 |
+
# 1. _values_for_factorize and _from_factorize.
|
| 1479 |
+
# Specify the values passed to pandas' internal factorization
|
| 1480 |
+
# routines, and how to convert from those values back to the
|
| 1481 |
+
# original ExtensionArray.
|
| 1482 |
+
# 2. ExtensionArray.factorize.
|
| 1483 |
+
# Complete control over factorization.
|
| 1484 |
+
arr, na_value = self._values_for_factorize()
|
| 1485 |
+
|
| 1486 |
+
codes, uniques = factorize_array(
|
| 1487 |
+
arr, use_na_sentinel=use_na_sentinel, na_value=na_value
|
| 1488 |
+
)
|
| 1489 |
+
|
| 1490 |
+
uniques_ea = self._from_factorized(uniques, self)
|
| 1491 |
+
return codes, uniques_ea
|
| 1492 |
+
|
| 1493 |
+
_extension_array_shared_docs[
|
| 1494 |
+
"repeat"
|
| 1495 |
+
] = """
|
| 1496 |
+
Repeat elements of a %(klass)s.
|
| 1497 |
+
|
| 1498 |
+
Returns a new %(klass)s where each element of the current %(klass)s
|
| 1499 |
+
is repeated consecutively a given number of times.
|
| 1500 |
+
|
| 1501 |
+
Parameters
|
| 1502 |
+
----------
|
| 1503 |
+
repeats : int or array of ints
|
| 1504 |
+
The number of repetitions for each element. This should be a
|
| 1505 |
+
non-negative integer. Repeating 0 times will return an empty
|
| 1506 |
+
%(klass)s.
|
| 1507 |
+
axis : None
|
| 1508 |
+
Must be ``None``. Has no effect but is accepted for compatibility
|
| 1509 |
+
with numpy.
|
| 1510 |
+
|
| 1511 |
+
Returns
|
| 1512 |
+
-------
|
| 1513 |
+
%(klass)s
|
| 1514 |
+
Newly created %(klass)s with repeated elements.
|
| 1515 |
+
|
| 1516 |
+
See Also
|
| 1517 |
+
--------
|
| 1518 |
+
Series.repeat : Equivalent function for Series.
|
| 1519 |
+
Index.repeat : Equivalent function for Index.
|
| 1520 |
+
numpy.repeat : Similar method for :class:`numpy.ndarray`.
|
| 1521 |
+
ExtensionArray.take : Take arbitrary positions.
|
| 1522 |
+
|
| 1523 |
+
Examples
|
| 1524 |
+
--------
|
| 1525 |
+
>>> cat = pd.Categorical(['a', 'b', 'c'])
|
| 1526 |
+
>>> cat
|
| 1527 |
+
['a', 'b', 'c']
|
| 1528 |
+
Categories (3, object): ['a', 'b', 'c']
|
| 1529 |
+
>>> cat.repeat(2)
|
| 1530 |
+
['a', 'a', 'b', 'b', 'c', 'c']
|
| 1531 |
+
Categories (3, object): ['a', 'b', 'c']
|
| 1532 |
+
>>> cat.repeat([1, 2, 3])
|
| 1533 |
+
['a', 'b', 'b', 'c', 'c', 'c']
|
| 1534 |
+
Categories (3, object): ['a', 'b', 'c']
|
| 1535 |
+
"""
|
| 1536 |
+
|
| 1537 |
+
@Substitution(klass="ExtensionArray")
|
| 1538 |
+
@Appender(_extension_array_shared_docs["repeat"])
|
| 1539 |
+
def repeat(self, repeats: int | Sequence[int], axis: AxisInt | None = None) -> Self:
|
| 1540 |
+
nv.validate_repeat((), {"axis": axis})
|
| 1541 |
+
ind = np.arange(len(self)).repeat(repeats)
|
| 1542 |
+
return self.take(ind)
|
| 1543 |
+
|
| 1544 |
+
# ------------------------------------------------------------------------
|
| 1545 |
+
# Indexing methods
|
| 1546 |
+
# ------------------------------------------------------------------------
|
| 1547 |
+
|
| 1548 |
+
def take(
|
| 1549 |
+
self,
|
| 1550 |
+
indices: TakeIndexer,
|
| 1551 |
+
*,
|
| 1552 |
+
allow_fill: bool = False,
|
| 1553 |
+
fill_value: Any = None,
|
| 1554 |
+
) -> Self:
|
| 1555 |
+
"""
|
| 1556 |
+
Take elements from an array.
|
| 1557 |
+
|
| 1558 |
+
Parameters
|
| 1559 |
+
----------
|
| 1560 |
+
indices : sequence of int or one-dimensional np.ndarray of int
|
| 1561 |
+
Indices to be taken.
|
| 1562 |
+
allow_fill : bool, default False
|
| 1563 |
+
How to handle negative values in `indices`.
|
| 1564 |
+
|
| 1565 |
+
* False: negative values in `indices` indicate positional indices
|
| 1566 |
+
from the right (the default). This is similar to
|
| 1567 |
+
:func:`numpy.take`.
|
| 1568 |
+
|
| 1569 |
+
* True: negative values in `indices` indicate
|
| 1570 |
+
missing values. These values are set to `fill_value`. Any other
|
| 1571 |
+
other negative values raise a ``ValueError``.
|
| 1572 |
+
|
| 1573 |
+
fill_value : any, optional
|
| 1574 |
+
Fill value to use for NA-indices when `allow_fill` is True.
|
| 1575 |
+
This may be ``None``, in which case the default NA value for
|
| 1576 |
+
the type, ``self.dtype.na_value``, is used.
|
| 1577 |
+
|
| 1578 |
+
For many ExtensionArrays, there will be two representations of
|
| 1579 |
+
`fill_value`: a user-facing "boxed" scalar, and a low-level
|
| 1580 |
+
physical NA value. `fill_value` should be the user-facing version,
|
| 1581 |
+
and the implementation should handle translating that to the
|
| 1582 |
+
physical version for processing the take if necessary.
|
| 1583 |
+
|
| 1584 |
+
Returns
|
| 1585 |
+
-------
|
| 1586 |
+
ExtensionArray
|
| 1587 |
+
|
| 1588 |
+
Raises
|
| 1589 |
+
------
|
| 1590 |
+
IndexError
|
| 1591 |
+
When the indices are out of bounds for the array.
|
| 1592 |
+
ValueError
|
| 1593 |
+
When `indices` contains negative values other than ``-1``
|
| 1594 |
+
and `allow_fill` is True.
|
| 1595 |
+
|
| 1596 |
+
See Also
|
| 1597 |
+
--------
|
| 1598 |
+
numpy.take : Take elements from an array along an axis.
|
| 1599 |
+
api.extensions.take : Take elements from an array.
|
| 1600 |
+
|
| 1601 |
+
Notes
|
| 1602 |
+
-----
|
| 1603 |
+
ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``,
|
| 1604 |
+
``iloc``, when `indices` is a sequence of values. Additionally,
|
| 1605 |
+
it's called by :meth:`Series.reindex`, or any other method
|
| 1606 |
+
that causes realignment, with a `fill_value`.
|
| 1607 |
+
|
| 1608 |
+
Examples
|
| 1609 |
+
--------
|
| 1610 |
+
Here's an example implementation, which relies on casting the
|
| 1611 |
+
extension array to object dtype. This uses the helper method
|
| 1612 |
+
:func:`pandas.api.extensions.take`.
|
| 1613 |
+
|
| 1614 |
+
.. code-block:: python
|
| 1615 |
+
|
| 1616 |
+
def take(self, indices, allow_fill=False, fill_value=None):
|
| 1617 |
+
from pandas.core.algorithms import take
|
| 1618 |
+
|
| 1619 |
+
# If the ExtensionArray is backed by an ndarray, then
|
| 1620 |
+
# just pass that here instead of coercing to object.
|
| 1621 |
+
data = self.astype(object)
|
| 1622 |
+
|
| 1623 |
+
if allow_fill and fill_value is None:
|
| 1624 |
+
fill_value = self.dtype.na_value
|
| 1625 |
+
|
| 1626 |
+
# fill value should always be translated from the scalar
|
| 1627 |
+
# type for the array, to the physical storage type for
|
| 1628 |
+
# the data, before passing to take.
|
| 1629 |
+
|
| 1630 |
+
result = take(data, indices, fill_value=fill_value,
|
| 1631 |
+
allow_fill=allow_fill)
|
| 1632 |
+
return self._from_sequence(result, dtype=self.dtype)
|
| 1633 |
+
"""
|
| 1634 |
+
# Implementer note: The `fill_value` parameter should be a user-facing
|
| 1635 |
+
# value, an instance of self.dtype.type. When passed `fill_value=None`,
|
| 1636 |
+
# the default of `self.dtype.na_value` should be used.
|
| 1637 |
+
# This may differ from the physical storage type your ExtensionArray
|
| 1638 |
+
# uses. In this case, your implementation is responsible for casting
|
| 1639 |
+
# the user-facing type to the storage type, before using
|
| 1640 |
+
# pandas.api.extensions.take
|
| 1641 |
+
raise AbstractMethodError(self)
|
| 1642 |
+
|
| 1643 |
+
def copy(self) -> Self:
|
| 1644 |
+
"""
|
| 1645 |
+
Return a copy of the array.
|
| 1646 |
+
|
| 1647 |
+
Returns
|
| 1648 |
+
-------
|
| 1649 |
+
ExtensionArray
|
| 1650 |
+
|
| 1651 |
+
Examples
|
| 1652 |
+
--------
|
| 1653 |
+
>>> arr = pd.array([1, 2, 3])
|
| 1654 |
+
>>> arr2 = arr.copy()
|
| 1655 |
+
>>> arr[0] = 2
|
| 1656 |
+
>>> arr2
|
| 1657 |
+
<IntegerArray>
|
| 1658 |
+
[1, 2, 3]
|
| 1659 |
+
Length: 3, dtype: Int64
|
| 1660 |
+
"""
|
| 1661 |
+
raise AbstractMethodError(self)
|
| 1662 |
+
|
| 1663 |
+
def view(self, dtype: Dtype | None = None) -> ArrayLike:
|
| 1664 |
+
"""
|
| 1665 |
+
Return a view on the array.
|
| 1666 |
+
|
| 1667 |
+
Parameters
|
| 1668 |
+
----------
|
| 1669 |
+
dtype : str, np.dtype, or ExtensionDtype, optional
|
| 1670 |
+
Default None.
|
| 1671 |
+
|
| 1672 |
+
Returns
|
| 1673 |
+
-------
|
| 1674 |
+
ExtensionArray or np.ndarray
|
| 1675 |
+
A view on the :class:`ExtensionArray`'s data.
|
| 1676 |
+
|
| 1677 |
+
Examples
|
| 1678 |
+
--------
|
| 1679 |
+
This gives view on the underlying data of an ``ExtensionArray`` and is not a
|
| 1680 |
+
copy. Modifications on either the view or the original ``ExtensionArray``
|
| 1681 |
+
will be reflectd on the underlying data:
|
| 1682 |
+
|
| 1683 |
+
>>> arr = pd.array([1, 2, 3])
|
| 1684 |
+
>>> arr2 = arr.view()
|
| 1685 |
+
>>> arr[0] = 2
|
| 1686 |
+
>>> arr2
|
| 1687 |
+
<IntegerArray>
|
| 1688 |
+
[2, 2, 3]
|
| 1689 |
+
Length: 3, dtype: Int64
|
| 1690 |
+
"""
|
| 1691 |
+
# NB:
|
| 1692 |
+
# - This must return a *new* object referencing the same data, not self.
|
| 1693 |
+
# - The only case that *must* be implemented is with dtype=None,
|
| 1694 |
+
# giving a view with the same dtype as self.
|
| 1695 |
+
if dtype is not None:
|
| 1696 |
+
raise NotImplementedError(dtype)
|
| 1697 |
+
return self[:]
|
| 1698 |
+
|
| 1699 |
+
# ------------------------------------------------------------------------
|
| 1700 |
+
# Printing
|
| 1701 |
+
# ------------------------------------------------------------------------
|
| 1702 |
+
|
| 1703 |
+
def __repr__(self) -> str:
|
| 1704 |
+
if self.ndim > 1:
|
| 1705 |
+
return self._repr_2d()
|
| 1706 |
+
|
| 1707 |
+
from pandas.io.formats.printing import format_object_summary
|
| 1708 |
+
|
| 1709 |
+
# the short repr has no trailing newline, while the truncated
|
| 1710 |
+
# repr does. So we include a newline in our template, and strip
|
| 1711 |
+
# any trailing newlines from format_object_summary
|
| 1712 |
+
data = format_object_summary(
|
| 1713 |
+
self, self._formatter(), indent_for_name=False
|
| 1714 |
+
).rstrip(", \n")
|
| 1715 |
+
class_name = f"<{type(self).__name__}>\n"
|
| 1716 |
+
footer = self._get_repr_footer()
|
| 1717 |
+
return f"{class_name}{data}\n{footer}"
|
| 1718 |
+
|
| 1719 |
+
def _get_repr_footer(self) -> str:
|
| 1720 |
+
# GH#24278
|
| 1721 |
+
if self.ndim > 1:
|
| 1722 |
+
return f"Shape: {self.shape}, dtype: {self.dtype}"
|
| 1723 |
+
return f"Length: {len(self)}, dtype: {self.dtype}"
|
| 1724 |
+
|
| 1725 |
+
def _repr_2d(self) -> str:
|
| 1726 |
+
from pandas.io.formats.printing import format_object_summary
|
| 1727 |
+
|
| 1728 |
+
# the short repr has no trailing newline, while the truncated
|
| 1729 |
+
# repr does. So we include a newline in our template, and strip
|
| 1730 |
+
# any trailing newlines from format_object_summary
|
| 1731 |
+
lines = [
|
| 1732 |
+
format_object_summary(x, self._formatter(), indent_for_name=False).rstrip(
|
| 1733 |
+
", \n"
|
| 1734 |
+
)
|
| 1735 |
+
for x in self
|
| 1736 |
+
]
|
| 1737 |
+
data = ",\n".join(lines)
|
| 1738 |
+
class_name = f"<{type(self).__name__}>"
|
| 1739 |
+
footer = self._get_repr_footer()
|
| 1740 |
+
return f"{class_name}\n[\n{data}\n]\n{footer}"
|
| 1741 |
+
|
| 1742 |
+
def _formatter(self, boxed: bool = False) -> Callable[[Any], str | None]:
|
| 1743 |
+
"""
|
| 1744 |
+
Formatting function for scalar values.
|
| 1745 |
+
|
| 1746 |
+
This is used in the default '__repr__'. The returned formatting
|
| 1747 |
+
function receives instances of your scalar type.
|
| 1748 |
+
|
| 1749 |
+
Parameters
|
| 1750 |
+
----------
|
| 1751 |
+
boxed : bool, default False
|
| 1752 |
+
An indicated for whether or not your array is being printed
|
| 1753 |
+
within a Series, DataFrame, or Index (True), or just by
|
| 1754 |
+
itself (False). This may be useful if you want scalar values
|
| 1755 |
+
to appear differently within a Series versus on its own (e.g.
|
| 1756 |
+
quoted or not).
|
| 1757 |
+
|
| 1758 |
+
Returns
|
| 1759 |
+
-------
|
| 1760 |
+
Callable[[Any], str]
|
| 1761 |
+
A callable that gets instances of the scalar type and
|
| 1762 |
+
returns a string. By default, :func:`repr` is used
|
| 1763 |
+
when ``boxed=False`` and :func:`str` is used when
|
| 1764 |
+
``boxed=True``.
|
| 1765 |
+
|
| 1766 |
+
Examples
|
| 1767 |
+
--------
|
| 1768 |
+
>>> class MyExtensionArray(pd.arrays.NumpyExtensionArray):
|
| 1769 |
+
... def _formatter(self, boxed=False):
|
| 1770 |
+
... return lambda x: '*' + str(x) + '*' if boxed else repr(x) + '*'
|
| 1771 |
+
>>> MyExtensionArray(np.array([1, 2, 3, 4]))
|
| 1772 |
+
<MyExtensionArray>
|
| 1773 |
+
[1*, 2*, 3*, 4*]
|
| 1774 |
+
Length: 4, dtype: int64
|
| 1775 |
+
"""
|
| 1776 |
+
if boxed:
|
| 1777 |
+
return str
|
| 1778 |
+
return repr
|
| 1779 |
+
|
| 1780 |
+
# ------------------------------------------------------------------------
|
| 1781 |
+
# Reshaping
|
| 1782 |
+
# ------------------------------------------------------------------------
|
| 1783 |
+
|
| 1784 |
+
def transpose(self, *axes: int) -> ExtensionArray:
|
| 1785 |
+
"""
|
| 1786 |
+
Return a transposed view on this array.
|
| 1787 |
+
|
| 1788 |
+
Because ExtensionArrays are always 1D, this is a no-op. It is included
|
| 1789 |
+
for compatibility with np.ndarray.
|
| 1790 |
+
|
| 1791 |
+
Returns
|
| 1792 |
+
-------
|
| 1793 |
+
ExtensionArray
|
| 1794 |
+
|
| 1795 |
+
Examples
|
| 1796 |
+
--------
|
| 1797 |
+
>>> pd.array([1, 2, 3]).transpose()
|
| 1798 |
+
<IntegerArray>
|
| 1799 |
+
[1, 2, 3]
|
| 1800 |
+
Length: 3, dtype: Int64
|
| 1801 |
+
"""
|
| 1802 |
+
return self[:]
|
| 1803 |
+
|
| 1804 |
+
@property
|
| 1805 |
+
def T(self) -> ExtensionArray:
|
| 1806 |
+
return self.transpose()
|
| 1807 |
+
|
| 1808 |
+
def ravel(self, order: Literal["C", "F", "A", "K"] | None = "C") -> ExtensionArray:
|
| 1809 |
+
"""
|
| 1810 |
+
Return a flattened view on this array.
|
| 1811 |
+
|
| 1812 |
+
Parameters
|
| 1813 |
+
----------
|
| 1814 |
+
order : {None, 'C', 'F', 'A', 'K'}, default 'C'
|
| 1815 |
+
|
| 1816 |
+
Returns
|
| 1817 |
+
-------
|
| 1818 |
+
ExtensionArray
|
| 1819 |
+
|
| 1820 |
+
Notes
|
| 1821 |
+
-----
|
| 1822 |
+
- Because ExtensionArrays are 1D-only, this is a no-op.
|
| 1823 |
+
- The "order" argument is ignored, is for compatibility with NumPy.
|
| 1824 |
+
|
| 1825 |
+
Examples
|
| 1826 |
+
--------
|
| 1827 |
+
>>> pd.array([1, 2, 3]).ravel()
|
| 1828 |
+
<IntegerArray>
|
| 1829 |
+
[1, 2, 3]
|
| 1830 |
+
Length: 3, dtype: Int64
|
| 1831 |
+
"""
|
| 1832 |
+
return self
|
| 1833 |
+
|
| 1834 |
+
@classmethod
|
| 1835 |
+
def _concat_same_type(cls, to_concat: Sequence[Self]) -> Self:
|
| 1836 |
+
"""
|
| 1837 |
+
Concatenate multiple array of this dtype.
|
| 1838 |
+
|
| 1839 |
+
Parameters
|
| 1840 |
+
----------
|
| 1841 |
+
to_concat : sequence of this type
|
| 1842 |
+
|
| 1843 |
+
Returns
|
| 1844 |
+
-------
|
| 1845 |
+
ExtensionArray
|
| 1846 |
+
|
| 1847 |
+
Examples
|
| 1848 |
+
--------
|
| 1849 |
+
>>> arr1 = pd.array([1, 2, 3])
|
| 1850 |
+
>>> arr2 = pd.array([4, 5, 6])
|
| 1851 |
+
>>> pd.arrays.IntegerArray._concat_same_type([arr1, arr2])
|
| 1852 |
+
<IntegerArray>
|
| 1853 |
+
[1, 2, 3, 4, 5, 6]
|
| 1854 |
+
Length: 6, dtype: Int64
|
| 1855 |
+
"""
|
| 1856 |
+
# Implementer note: this method will only be called with a sequence of
|
| 1857 |
+
# ExtensionArrays of this class and with the same dtype as self. This
|
| 1858 |
+
# should allow "easy" concatenation (no upcasting needed), and result
|
| 1859 |
+
# in a new ExtensionArray of the same dtype.
|
| 1860 |
+
# Note: this strict behaviour is only guaranteed starting with pandas 1.1
|
| 1861 |
+
raise AbstractMethodError(cls)
|
| 1862 |
+
|
| 1863 |
+
# The _can_hold_na attribute is set to True so that pandas internals
|
| 1864 |
+
# will use the ExtensionDtype.na_value as the NA value in operations
|
| 1865 |
+
# such as take(), reindex(), shift(), etc. In addition, those results
|
| 1866 |
+
# will then be of the ExtensionArray subclass rather than an array
|
| 1867 |
+
# of objects
|
| 1868 |
+
@cache_readonly
|
| 1869 |
+
def _can_hold_na(self) -> bool:
|
| 1870 |
+
return self.dtype._can_hold_na
|
| 1871 |
+
|
| 1872 |
+
def _accumulate(
|
| 1873 |
+
self, name: str, *, skipna: bool = True, **kwargs
|
| 1874 |
+
) -> ExtensionArray:
|
| 1875 |
+
"""
|
| 1876 |
+
Return an ExtensionArray performing an accumulation operation.
|
| 1877 |
+
|
| 1878 |
+
The underlying data type might change.
|
| 1879 |
+
|
| 1880 |
+
Parameters
|
| 1881 |
+
----------
|
| 1882 |
+
name : str
|
| 1883 |
+
Name of the function, supported values are:
|
| 1884 |
+
- cummin
|
| 1885 |
+
- cummax
|
| 1886 |
+
- cumsum
|
| 1887 |
+
- cumprod
|
| 1888 |
+
skipna : bool, default True
|
| 1889 |
+
If True, skip NA values.
|
| 1890 |
+
**kwargs
|
| 1891 |
+
Additional keyword arguments passed to the accumulation function.
|
| 1892 |
+
Currently, there is no supported kwarg.
|
| 1893 |
+
|
| 1894 |
+
Returns
|
| 1895 |
+
-------
|
| 1896 |
+
array
|
| 1897 |
+
|
| 1898 |
+
Raises
|
| 1899 |
+
------
|
| 1900 |
+
NotImplementedError : subclass does not define accumulations
|
| 1901 |
+
|
| 1902 |
+
Examples
|
| 1903 |
+
--------
|
| 1904 |
+
>>> arr = pd.array([1, 2, 3])
|
| 1905 |
+
>>> arr._accumulate(name='cumsum')
|
| 1906 |
+
<IntegerArray>
|
| 1907 |
+
[1, 3, 6]
|
| 1908 |
+
Length: 3, dtype: Int64
|
| 1909 |
+
"""
|
| 1910 |
+
raise NotImplementedError(f"cannot perform {name} with type {self.dtype}")
|
| 1911 |
+
|
| 1912 |
+
def _reduce(
|
| 1913 |
+
self, name: str, *, skipna: bool = True, keepdims: bool = False, **kwargs
|
| 1914 |
+
):
|
| 1915 |
+
"""
|
| 1916 |
+
Return a scalar result of performing the reduction operation.
|
| 1917 |
+
|
| 1918 |
+
Parameters
|
| 1919 |
+
----------
|
| 1920 |
+
name : str
|
| 1921 |
+
Name of the function, supported values are:
|
| 1922 |
+
{ any, all, min, max, sum, mean, median, prod,
|
| 1923 |
+
std, var, sem, kurt, skew }.
|
| 1924 |
+
skipna : bool, default True
|
| 1925 |
+
If True, skip NaN values.
|
| 1926 |
+
keepdims : bool, default False
|
| 1927 |
+
If False, a scalar is returned.
|
| 1928 |
+
If True, the result has dimension with size one along the reduced axis.
|
| 1929 |
+
|
| 1930 |
+
.. versionadded:: 2.1
|
| 1931 |
+
|
| 1932 |
+
This parameter is not required in the _reduce signature to keep backward
|
| 1933 |
+
compatibility, but will become required in the future. If the parameter
|
| 1934 |
+
is not found in the method signature, a FutureWarning will be emitted.
|
| 1935 |
+
**kwargs
|
| 1936 |
+
Additional keyword arguments passed to the reduction function.
|
| 1937 |
+
Currently, `ddof` is the only supported kwarg.
|
| 1938 |
+
|
| 1939 |
+
Returns
|
| 1940 |
+
-------
|
| 1941 |
+
scalar
|
| 1942 |
+
|
| 1943 |
+
Raises
|
| 1944 |
+
------
|
| 1945 |
+
TypeError : subclass does not define reductions
|
| 1946 |
+
|
| 1947 |
+
Examples
|
| 1948 |
+
--------
|
| 1949 |
+
>>> pd.array([1, 2, 3])._reduce("min")
|
| 1950 |
+
1
|
| 1951 |
+
"""
|
| 1952 |
+
meth = getattr(self, name, None)
|
| 1953 |
+
if meth is None:
|
| 1954 |
+
raise TypeError(
|
| 1955 |
+
f"'{type(self).__name__}' with dtype {self.dtype} "
|
| 1956 |
+
f"does not support reduction '{name}'"
|
| 1957 |
+
)
|
| 1958 |
+
result = meth(skipna=skipna, **kwargs)
|
| 1959 |
+
if keepdims:
|
| 1960 |
+
result = np.array([result])
|
| 1961 |
+
|
| 1962 |
+
return result
|
| 1963 |
+
|
| 1964 |
+
# https://github.com/python/typeshed/issues/2148#issuecomment-520783318
|
| 1965 |
+
# Incompatible types in assignment (expression has type "None", base class
|
| 1966 |
+
# "object" defined the type as "Callable[[object], int]")
|
| 1967 |
+
__hash__: ClassVar[None] # type: ignore[assignment]
|
| 1968 |
+
|
| 1969 |
+
# ------------------------------------------------------------------------
|
| 1970 |
+
# Non-Optimized Default Methods; in the case of the private methods here,
|
| 1971 |
+
# these are not guaranteed to be stable across pandas versions.
|
| 1972 |
+
|
| 1973 |
+
def _values_for_json(self) -> np.ndarray:
|
| 1974 |
+
"""
|
| 1975 |
+
Specify how to render our entries in to_json.
|
| 1976 |
+
|
| 1977 |
+
Notes
|
| 1978 |
+
-----
|
| 1979 |
+
The dtype on the returned ndarray is not restricted, but for non-native
|
| 1980 |
+
types that are not specifically handled in objToJSON.c, to_json is
|
| 1981 |
+
liable to raise. In these cases, it may be safer to return an ndarray
|
| 1982 |
+
of strings.
|
| 1983 |
+
"""
|
| 1984 |
+
return np.asarray(self)
|
| 1985 |
+
|
| 1986 |
+
def _hash_pandas_object(
|
| 1987 |
+
self, *, encoding: str, hash_key: str, categorize: bool
|
| 1988 |
+
) -> npt.NDArray[np.uint64]:
|
| 1989 |
+
"""
|
| 1990 |
+
Hook for hash_pandas_object.
|
| 1991 |
+
|
| 1992 |
+
Default is to use the values returned by _values_for_factorize.
|
| 1993 |
+
|
| 1994 |
+
Parameters
|
| 1995 |
+
----------
|
| 1996 |
+
encoding : str
|
| 1997 |
+
Encoding for data & key when strings.
|
| 1998 |
+
hash_key : str
|
| 1999 |
+
Hash_key for string key to encode.
|
| 2000 |
+
categorize : bool
|
| 2001 |
+
Whether to first categorize object arrays before hashing. This is more
|
| 2002 |
+
efficient when the array contains duplicate values.
|
| 2003 |
+
|
| 2004 |
+
Returns
|
| 2005 |
+
-------
|
| 2006 |
+
np.ndarray[uint64]
|
| 2007 |
+
|
| 2008 |
+
Examples
|
| 2009 |
+
--------
|
| 2010 |
+
>>> pd.array([1, 2])._hash_pandas_object(encoding='utf-8',
|
| 2011 |
+
... hash_key="1000000000000000",
|
| 2012 |
+
... categorize=False
|
| 2013 |
+
... )
|
| 2014 |
+
array([ 6238072747940578789, 15839785061582574730], dtype=uint64)
|
| 2015 |
+
"""
|
| 2016 |
+
from pandas.core.util.hashing import hash_array
|
| 2017 |
+
|
| 2018 |
+
values, _ = self._values_for_factorize()
|
| 2019 |
+
return hash_array(
|
| 2020 |
+
values, encoding=encoding, hash_key=hash_key, categorize=categorize
|
| 2021 |
+
)
|
| 2022 |
+
|
| 2023 |
+
def _explode(self) -> tuple[Self, npt.NDArray[np.uint64]]:
|
| 2024 |
+
"""
|
| 2025 |
+
Transform each element of list-like to a row.
|
| 2026 |
+
|
| 2027 |
+
For arrays that do not contain list-like elements the default
|
| 2028 |
+
implementation of this method just returns a copy and an array
|
| 2029 |
+
of ones (unchanged index).
|
| 2030 |
+
|
| 2031 |
+
Returns
|
| 2032 |
+
-------
|
| 2033 |
+
ExtensionArray
|
| 2034 |
+
Array with the exploded values.
|
| 2035 |
+
np.ndarray[uint64]
|
| 2036 |
+
The original lengths of each list-like for determining the
|
| 2037 |
+
resulting index.
|
| 2038 |
+
|
| 2039 |
+
See Also
|
| 2040 |
+
--------
|
| 2041 |
+
Series.explode : The method on the ``Series`` object that this
|
| 2042 |
+
extension array method is meant to support.
|
| 2043 |
+
|
| 2044 |
+
Examples
|
| 2045 |
+
--------
|
| 2046 |
+
>>> import pyarrow as pa
|
| 2047 |
+
>>> a = pd.array([[1, 2, 3], [4], [5, 6]],
|
| 2048 |
+
... dtype=pd.ArrowDtype(pa.list_(pa.int64())))
|
| 2049 |
+
>>> a._explode()
|
| 2050 |
+
(<ArrowExtensionArray>
|
| 2051 |
+
[1, 2, 3, 4, 5, 6]
|
| 2052 |
+
Length: 6, dtype: int64[pyarrow], array([3, 1, 2], dtype=int32))
|
| 2053 |
+
"""
|
| 2054 |
+
values = self.copy()
|
| 2055 |
+
counts = np.ones(shape=(len(self),), dtype=np.uint64)
|
| 2056 |
+
return values, counts
|
| 2057 |
+
|
| 2058 |
+
def tolist(self) -> list:
|
| 2059 |
+
"""
|
| 2060 |
+
Return a list of the values.
|
| 2061 |
+
|
| 2062 |
+
These are each a scalar type, which is a Python scalar
|
| 2063 |
+
(for str, int, float) or a pandas scalar
|
| 2064 |
+
(for Timestamp/Timedelta/Interval/Period)
|
| 2065 |
+
|
| 2066 |
+
Returns
|
| 2067 |
+
-------
|
| 2068 |
+
list
|
| 2069 |
+
|
| 2070 |
+
Examples
|
| 2071 |
+
--------
|
| 2072 |
+
>>> arr = pd.array([1, 2, 3])
|
| 2073 |
+
>>> arr.tolist()
|
| 2074 |
+
[1, 2, 3]
|
| 2075 |
+
"""
|
| 2076 |
+
if self.ndim > 1:
|
| 2077 |
+
return [x.tolist() for x in self]
|
| 2078 |
+
return list(self)
|
| 2079 |
+
|
| 2080 |
+
def delete(self, loc: PositionalIndexer) -> Self:
|
| 2081 |
+
indexer = np.delete(np.arange(len(self)), loc)
|
| 2082 |
+
return self.take(indexer)
|
| 2083 |
+
|
| 2084 |
+
def insert(self, loc: int, item) -> Self:
|
| 2085 |
+
"""
|
| 2086 |
+
Insert an item at the given position.
|
| 2087 |
+
|
| 2088 |
+
Parameters
|
| 2089 |
+
----------
|
| 2090 |
+
loc : int
|
| 2091 |
+
item : scalar-like
|
| 2092 |
+
|
| 2093 |
+
Returns
|
| 2094 |
+
-------
|
| 2095 |
+
same type as self
|
| 2096 |
+
|
| 2097 |
+
Notes
|
| 2098 |
+
-----
|
| 2099 |
+
This method should be both type and dtype-preserving. If the item
|
| 2100 |
+
cannot be held in an array of this type/dtype, either ValueError or
|
| 2101 |
+
TypeError should be raised.
|
| 2102 |
+
|
| 2103 |
+
The default implementation relies on _from_sequence to raise on invalid
|
| 2104 |
+
items.
|
| 2105 |
+
|
| 2106 |
+
Examples
|
| 2107 |
+
--------
|
| 2108 |
+
>>> arr = pd.array([1, 2, 3])
|
| 2109 |
+
>>> arr.insert(2, -1)
|
| 2110 |
+
<IntegerArray>
|
| 2111 |
+
[1, 2, -1, 3]
|
| 2112 |
+
Length: 4, dtype: Int64
|
| 2113 |
+
"""
|
| 2114 |
+
loc = validate_insert_loc(loc, len(self))
|
| 2115 |
+
|
| 2116 |
+
item_arr = type(self)._from_sequence([item], dtype=self.dtype)
|
| 2117 |
+
|
| 2118 |
+
return type(self)._concat_same_type([self[:loc], item_arr, self[loc:]])
|
| 2119 |
+
|
| 2120 |
+
def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None:
|
| 2121 |
+
"""
|
| 2122 |
+
Analogue to np.putmask(self, mask, value)
|
| 2123 |
+
|
| 2124 |
+
Parameters
|
| 2125 |
+
----------
|
| 2126 |
+
mask : np.ndarray[bool]
|
| 2127 |
+
value : scalar or listlike
|
| 2128 |
+
If listlike, must be arraylike with same length as self.
|
| 2129 |
+
|
| 2130 |
+
Returns
|
| 2131 |
+
-------
|
| 2132 |
+
None
|
| 2133 |
+
|
| 2134 |
+
Notes
|
| 2135 |
+
-----
|
| 2136 |
+
Unlike np.putmask, we do not repeat listlike values with mismatched length.
|
| 2137 |
+
'value' should either be a scalar or an arraylike with the same length
|
| 2138 |
+
as self.
|
| 2139 |
+
"""
|
| 2140 |
+
if is_list_like(value):
|
| 2141 |
+
val = value[mask]
|
| 2142 |
+
else:
|
| 2143 |
+
val = value
|
| 2144 |
+
|
| 2145 |
+
self[mask] = val
|
| 2146 |
+
|
| 2147 |
+
def _where(self, mask: npt.NDArray[np.bool_], value) -> Self:
|
| 2148 |
+
"""
|
| 2149 |
+
Analogue to np.where(mask, self, value)
|
| 2150 |
+
|
| 2151 |
+
Parameters
|
| 2152 |
+
----------
|
| 2153 |
+
mask : np.ndarray[bool]
|
| 2154 |
+
value : scalar or listlike
|
| 2155 |
+
|
| 2156 |
+
Returns
|
| 2157 |
+
-------
|
| 2158 |
+
same type as self
|
| 2159 |
+
"""
|
| 2160 |
+
result = self.copy()
|
| 2161 |
+
|
| 2162 |
+
if is_list_like(value):
|
| 2163 |
+
val = value[~mask]
|
| 2164 |
+
else:
|
| 2165 |
+
val = value
|
| 2166 |
+
|
| 2167 |
+
result[~mask] = val
|
| 2168 |
+
return result
|
| 2169 |
+
|
| 2170 |
+
# TODO(3.0): this can be removed once GH#33302 deprecation is enforced
|
| 2171 |
+
def _fill_mask_inplace(
|
| 2172 |
+
self, method: str, limit: int | None, mask: npt.NDArray[np.bool_]
|
| 2173 |
+
) -> None:
|
| 2174 |
+
"""
|
| 2175 |
+
Replace values in locations specified by 'mask' using pad or backfill.
|
| 2176 |
+
|
| 2177 |
+
See also
|
| 2178 |
+
--------
|
| 2179 |
+
ExtensionArray.fillna
|
| 2180 |
+
"""
|
| 2181 |
+
func = missing.get_fill_func(method)
|
| 2182 |
+
npvalues = self.astype(object)
|
| 2183 |
+
# NB: if we don't copy mask here, it may be altered inplace, which
|
| 2184 |
+
# would mess up the `self[mask] = ...` below.
|
| 2185 |
+
func(npvalues, limit=limit, mask=mask.copy())
|
| 2186 |
+
new_values = self._from_sequence(npvalues, dtype=self.dtype)
|
| 2187 |
+
self[mask] = new_values[mask]
|
| 2188 |
+
|
| 2189 |
+
def _rank(
|
| 2190 |
+
self,
|
| 2191 |
+
*,
|
| 2192 |
+
axis: AxisInt = 0,
|
| 2193 |
+
method: str = "average",
|
| 2194 |
+
na_option: str = "keep",
|
| 2195 |
+
ascending: bool = True,
|
| 2196 |
+
pct: bool = False,
|
| 2197 |
+
):
|
| 2198 |
+
"""
|
| 2199 |
+
See Series.rank.__doc__.
|
| 2200 |
+
"""
|
| 2201 |
+
if axis != 0:
|
| 2202 |
+
raise NotImplementedError
|
| 2203 |
+
|
| 2204 |
+
return rank(
|
| 2205 |
+
self._values_for_argsort(),
|
| 2206 |
+
axis=axis,
|
| 2207 |
+
method=method,
|
| 2208 |
+
na_option=na_option,
|
| 2209 |
+
ascending=ascending,
|
| 2210 |
+
pct=pct,
|
| 2211 |
+
)
|
| 2212 |
+
|
| 2213 |
+
@classmethod
|
| 2214 |
+
def _empty(cls, shape: Shape, dtype: ExtensionDtype):
|
| 2215 |
+
"""
|
| 2216 |
+
Create an ExtensionArray with the given shape and dtype.
|
| 2217 |
+
|
| 2218 |
+
See also
|
| 2219 |
+
--------
|
| 2220 |
+
ExtensionDtype.empty
|
| 2221 |
+
ExtensionDtype.empty is the 'official' public version of this API.
|
| 2222 |
+
"""
|
| 2223 |
+
# Implementer note: while ExtensionDtype.empty is the public way to
|
| 2224 |
+
# call this method, it is still required to implement this `_empty`
|
| 2225 |
+
# method as well (it is called internally in pandas)
|
| 2226 |
+
obj = cls._from_sequence([], dtype=dtype)
|
| 2227 |
+
|
| 2228 |
+
taker = np.broadcast_to(np.intp(-1), shape)
|
| 2229 |
+
result = obj.take(taker, allow_fill=True)
|
| 2230 |
+
if not isinstance(result, cls) or dtype != result.dtype:
|
| 2231 |
+
raise NotImplementedError(
|
| 2232 |
+
f"Default 'empty' implementation is invalid for dtype='{dtype}'"
|
| 2233 |
+
)
|
| 2234 |
+
return result
|
| 2235 |
+
|
| 2236 |
+
def _quantile(self, qs: npt.NDArray[np.float64], interpolation: str) -> Self:
|
| 2237 |
+
"""
|
| 2238 |
+
Compute the quantiles of self for each quantile in `qs`.
|
| 2239 |
+
|
| 2240 |
+
Parameters
|
| 2241 |
+
----------
|
| 2242 |
+
qs : np.ndarray[float64]
|
| 2243 |
+
interpolation: str
|
| 2244 |
+
|
| 2245 |
+
Returns
|
| 2246 |
+
-------
|
| 2247 |
+
same type as self
|
| 2248 |
+
"""
|
| 2249 |
+
mask = np.asarray(self.isna())
|
| 2250 |
+
arr = np.asarray(self)
|
| 2251 |
+
fill_value = np.nan
|
| 2252 |
+
|
| 2253 |
+
res_values = quantile_with_mask(arr, mask, fill_value, qs, interpolation)
|
| 2254 |
+
return type(self)._from_sequence(res_values)
|
| 2255 |
+
|
| 2256 |
+
def _mode(self, dropna: bool = True) -> Self:
|
| 2257 |
+
"""
|
| 2258 |
+
Returns the mode(s) of the ExtensionArray.
|
| 2259 |
+
|
| 2260 |
+
Always returns `ExtensionArray` even if only one value.
|
| 2261 |
+
|
| 2262 |
+
Parameters
|
| 2263 |
+
----------
|
| 2264 |
+
dropna : bool, default True
|
| 2265 |
+
Don't consider counts of NA values.
|
| 2266 |
+
|
| 2267 |
+
Returns
|
| 2268 |
+
-------
|
| 2269 |
+
same type as self
|
| 2270 |
+
Sorted, if possible.
|
| 2271 |
+
"""
|
| 2272 |
+
# error: Incompatible return value type (got "Union[ExtensionArray,
|
| 2273 |
+
# ndarray[Any, Any]]", expected "Self")
|
| 2274 |
+
return mode(self, dropna=dropna) # type: ignore[return-value]
|
| 2275 |
+
|
| 2276 |
+
def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
|
| 2277 |
+
if any(
|
| 2278 |
+
isinstance(other, (ABCSeries, ABCIndex, ABCDataFrame)) for other in inputs
|
| 2279 |
+
):
|
| 2280 |
+
return NotImplemented
|
| 2281 |
+
|
| 2282 |
+
result = arraylike.maybe_dispatch_ufunc_to_dunder_op(
|
| 2283 |
+
self, ufunc, method, *inputs, **kwargs
|
| 2284 |
+
)
|
| 2285 |
+
if result is not NotImplemented:
|
| 2286 |
+
return result
|
| 2287 |
+
|
| 2288 |
+
if "out" in kwargs:
|
| 2289 |
+
return arraylike.dispatch_ufunc_with_out(
|
| 2290 |
+
self, ufunc, method, *inputs, **kwargs
|
| 2291 |
+
)
|
| 2292 |
+
|
| 2293 |
+
if method == "reduce":
|
| 2294 |
+
result = arraylike.dispatch_reduction_ufunc(
|
| 2295 |
+
self, ufunc, method, *inputs, **kwargs
|
| 2296 |
+
)
|
| 2297 |
+
if result is not NotImplemented:
|
| 2298 |
+
return result
|
| 2299 |
+
|
| 2300 |
+
return arraylike.default_array_ufunc(self, ufunc, method, *inputs, **kwargs)
|
| 2301 |
+
|
| 2302 |
+
def map(self, mapper, na_action=None):
|
| 2303 |
+
"""
|
| 2304 |
+
Map values using an input mapping or function.
|
| 2305 |
+
|
| 2306 |
+
Parameters
|
| 2307 |
+
----------
|
| 2308 |
+
mapper : function, dict, or Series
|
| 2309 |
+
Mapping correspondence.
|
| 2310 |
+
na_action : {None, 'ignore'}, default None
|
| 2311 |
+
If 'ignore', propagate NA values, without passing them to the
|
| 2312 |
+
mapping correspondence. If 'ignore' is not supported, a
|
| 2313 |
+
``NotImplementedError`` should be raised.
|
| 2314 |
+
|
| 2315 |
+
Returns
|
| 2316 |
+
-------
|
| 2317 |
+
Union[ndarray, Index, ExtensionArray]
|
| 2318 |
+
The output of the mapping function applied to the array.
|
| 2319 |
+
If the function returns a tuple with more than one element
|
| 2320 |
+
a MultiIndex will be returned.
|
| 2321 |
+
"""
|
| 2322 |
+
return map_array(self, mapper, na_action=na_action)
|
| 2323 |
+
|
| 2324 |
+
# ------------------------------------------------------------------------
|
| 2325 |
+
# GroupBy Methods
|
| 2326 |
+
|
| 2327 |
+
def _groupby_op(
|
| 2328 |
+
self,
|
| 2329 |
+
*,
|
| 2330 |
+
how: str,
|
| 2331 |
+
has_dropped_na: bool,
|
| 2332 |
+
min_count: int,
|
| 2333 |
+
ngroups: int,
|
| 2334 |
+
ids: npt.NDArray[np.intp],
|
| 2335 |
+
**kwargs,
|
| 2336 |
+
) -> ArrayLike:
|
| 2337 |
+
"""
|
| 2338 |
+
Dispatch GroupBy reduction or transformation operation.
|
| 2339 |
+
|
| 2340 |
+
This is an *experimental* API to allow ExtensionArray authors to implement
|
| 2341 |
+
reductions and transformations. The API is subject to change.
|
| 2342 |
+
|
| 2343 |
+
Parameters
|
| 2344 |
+
----------
|
| 2345 |
+
how : {'any', 'all', 'sum', 'prod', 'min', 'max', 'mean', 'median',
|
| 2346 |
+
'median', 'var', 'std', 'sem', 'nth', 'last', 'ohlc',
|
| 2347 |
+
'cumprod', 'cumsum', 'cummin', 'cummax', 'rank'}
|
| 2348 |
+
has_dropped_na : bool
|
| 2349 |
+
min_count : int
|
| 2350 |
+
ngroups : int
|
| 2351 |
+
ids : np.ndarray[np.intp]
|
| 2352 |
+
ids[i] gives the integer label for the group that self[i] belongs to.
|
| 2353 |
+
**kwargs : operation-specific
|
| 2354 |
+
'any', 'all' -> ['skipna']
|
| 2355 |
+
'var', 'std', 'sem' -> ['ddof']
|
| 2356 |
+
'cumprod', 'cumsum', 'cummin', 'cummax' -> ['skipna']
|
| 2357 |
+
'rank' -> ['ties_method', 'ascending', 'na_option', 'pct']
|
| 2358 |
+
|
| 2359 |
+
Returns
|
| 2360 |
+
-------
|
| 2361 |
+
np.ndarray or ExtensionArray
|
| 2362 |
+
"""
|
| 2363 |
+
from pandas.core.arrays.string_ import StringDtype
|
| 2364 |
+
from pandas.core.groupby.ops import WrappedCythonOp
|
| 2365 |
+
|
| 2366 |
+
kind = WrappedCythonOp.get_kind_from_how(how)
|
| 2367 |
+
op = WrappedCythonOp(how=how, kind=kind, has_dropped_na=has_dropped_na)
|
| 2368 |
+
|
| 2369 |
+
# GH#43682
|
| 2370 |
+
if isinstance(self.dtype, StringDtype):
|
| 2371 |
+
# StringArray
|
| 2372 |
+
if op.how not in ["any", "all"]:
|
| 2373 |
+
# Fail early to avoid conversion to object
|
| 2374 |
+
op._get_cython_function(op.kind, op.how, np.dtype(object), False)
|
| 2375 |
+
npvalues = self.to_numpy(object, na_value=np.nan)
|
| 2376 |
+
else:
|
| 2377 |
+
raise NotImplementedError(
|
| 2378 |
+
f"function is not implemented for this dtype: {self.dtype}"
|
| 2379 |
+
)
|
| 2380 |
+
|
| 2381 |
+
res_values = op._cython_op_ndim_compat(
|
| 2382 |
+
npvalues,
|
| 2383 |
+
min_count=min_count,
|
| 2384 |
+
ngroups=ngroups,
|
| 2385 |
+
comp_ids=ids,
|
| 2386 |
+
mask=None,
|
| 2387 |
+
**kwargs,
|
| 2388 |
+
)
|
| 2389 |
+
|
| 2390 |
+
if op.how in op.cast_blocklist:
|
| 2391 |
+
# i.e. how in ["rank"], since other cast_blocklist methods don't go
|
| 2392 |
+
# through cython_operation
|
| 2393 |
+
return res_values
|
| 2394 |
+
|
| 2395 |
+
if isinstance(self.dtype, StringDtype):
|
| 2396 |
+
dtype = self.dtype
|
| 2397 |
+
string_array_cls = dtype.construct_array_type()
|
| 2398 |
+
return string_array_cls._from_sequence(res_values, dtype=dtype)
|
| 2399 |
+
|
| 2400 |
+
else:
|
| 2401 |
+
raise NotImplementedError
|
| 2402 |
+
|
| 2403 |
+
|
| 2404 |
+
class ExtensionArraySupportsAnyAll(ExtensionArray):
|
| 2405 |
+
def any(self, *, skipna: bool = True) -> bool:
|
| 2406 |
+
raise AbstractMethodError(self)
|
| 2407 |
+
|
| 2408 |
+
def all(self, *, skipna: bool = True) -> bool:
|
| 2409 |
+
raise AbstractMethodError(self)
|
| 2410 |
+
|
| 2411 |
+
|
| 2412 |
+
class ExtensionOpsMixin:
|
| 2413 |
+
"""
|
| 2414 |
+
A base class for linking the operators to their dunder names.
|
| 2415 |
+
|
| 2416 |
+
.. note::
|
| 2417 |
+
|
| 2418 |
+
You may want to set ``__array_priority__`` if you want your
|
| 2419 |
+
implementation to be called when involved in binary operations
|
| 2420 |
+
with NumPy arrays.
|
| 2421 |
+
"""
|
| 2422 |
+
|
| 2423 |
+
@classmethod
|
| 2424 |
+
def _create_arithmetic_method(cls, op):
|
| 2425 |
+
raise AbstractMethodError(cls)
|
| 2426 |
+
|
| 2427 |
+
@classmethod
|
| 2428 |
+
def _add_arithmetic_ops(cls) -> None:
|
| 2429 |
+
setattr(cls, "__add__", cls._create_arithmetic_method(operator.add))
|
| 2430 |
+
setattr(cls, "__radd__", cls._create_arithmetic_method(roperator.radd))
|
| 2431 |
+
setattr(cls, "__sub__", cls._create_arithmetic_method(operator.sub))
|
| 2432 |
+
setattr(cls, "__rsub__", cls._create_arithmetic_method(roperator.rsub))
|
| 2433 |
+
setattr(cls, "__mul__", cls._create_arithmetic_method(operator.mul))
|
| 2434 |
+
setattr(cls, "__rmul__", cls._create_arithmetic_method(roperator.rmul))
|
| 2435 |
+
setattr(cls, "__pow__", cls._create_arithmetic_method(operator.pow))
|
| 2436 |
+
setattr(cls, "__rpow__", cls._create_arithmetic_method(roperator.rpow))
|
| 2437 |
+
setattr(cls, "__mod__", cls._create_arithmetic_method(operator.mod))
|
| 2438 |
+
setattr(cls, "__rmod__", cls._create_arithmetic_method(roperator.rmod))
|
| 2439 |
+
setattr(cls, "__floordiv__", cls._create_arithmetic_method(operator.floordiv))
|
| 2440 |
+
setattr(
|
| 2441 |
+
cls, "__rfloordiv__", cls._create_arithmetic_method(roperator.rfloordiv)
|
| 2442 |
+
)
|
| 2443 |
+
setattr(cls, "__truediv__", cls._create_arithmetic_method(operator.truediv))
|
| 2444 |
+
setattr(cls, "__rtruediv__", cls._create_arithmetic_method(roperator.rtruediv))
|
| 2445 |
+
setattr(cls, "__divmod__", cls._create_arithmetic_method(divmod))
|
| 2446 |
+
setattr(cls, "__rdivmod__", cls._create_arithmetic_method(roperator.rdivmod))
|
| 2447 |
+
|
| 2448 |
+
@classmethod
|
| 2449 |
+
def _create_comparison_method(cls, op):
|
| 2450 |
+
raise AbstractMethodError(cls)
|
| 2451 |
+
|
| 2452 |
+
@classmethod
|
| 2453 |
+
def _add_comparison_ops(cls) -> None:
|
| 2454 |
+
setattr(cls, "__eq__", cls._create_comparison_method(operator.eq))
|
| 2455 |
+
setattr(cls, "__ne__", cls._create_comparison_method(operator.ne))
|
| 2456 |
+
setattr(cls, "__lt__", cls._create_comparison_method(operator.lt))
|
| 2457 |
+
setattr(cls, "__gt__", cls._create_comparison_method(operator.gt))
|
| 2458 |
+
setattr(cls, "__le__", cls._create_comparison_method(operator.le))
|
| 2459 |
+
setattr(cls, "__ge__", cls._create_comparison_method(operator.ge))
|
| 2460 |
+
|
| 2461 |
+
@classmethod
|
| 2462 |
+
def _create_logical_method(cls, op):
|
| 2463 |
+
raise AbstractMethodError(cls)
|
| 2464 |
+
|
| 2465 |
+
@classmethod
|
| 2466 |
+
def _add_logical_ops(cls) -> None:
|
| 2467 |
+
setattr(cls, "__and__", cls._create_logical_method(operator.and_))
|
| 2468 |
+
setattr(cls, "__rand__", cls._create_logical_method(roperator.rand_))
|
| 2469 |
+
setattr(cls, "__or__", cls._create_logical_method(operator.or_))
|
| 2470 |
+
setattr(cls, "__ror__", cls._create_logical_method(roperator.ror_))
|
| 2471 |
+
setattr(cls, "__xor__", cls._create_logical_method(operator.xor))
|
| 2472 |
+
setattr(cls, "__rxor__", cls._create_logical_method(roperator.rxor))
|
| 2473 |
+
|
| 2474 |
+
|
| 2475 |
+
class ExtensionScalarOpsMixin(ExtensionOpsMixin):
|
| 2476 |
+
"""
|
| 2477 |
+
A mixin for defining ops on an ExtensionArray.
|
| 2478 |
+
|
| 2479 |
+
It is assumed that the underlying scalar objects have the operators
|
| 2480 |
+
already defined.
|
| 2481 |
+
|
| 2482 |
+
Notes
|
| 2483 |
+
-----
|
| 2484 |
+
If you have defined a subclass MyExtensionArray(ExtensionArray), then
|
| 2485 |
+
use MyExtensionArray(ExtensionArray, ExtensionScalarOpsMixin) to
|
| 2486 |
+
get the arithmetic operators. After the definition of MyExtensionArray,
|
| 2487 |
+
insert the lines
|
| 2488 |
+
|
| 2489 |
+
MyExtensionArray._add_arithmetic_ops()
|
| 2490 |
+
MyExtensionArray._add_comparison_ops()
|
| 2491 |
+
|
| 2492 |
+
to link the operators to your class.
|
| 2493 |
+
|
| 2494 |
+
.. note::
|
| 2495 |
+
|
| 2496 |
+
You may want to set ``__array_priority__`` if you want your
|
| 2497 |
+
implementation to be called when involved in binary operations
|
| 2498 |
+
with NumPy arrays.
|
| 2499 |
+
"""
|
| 2500 |
+
|
| 2501 |
+
@classmethod
|
| 2502 |
+
def _create_method(cls, op, coerce_to_dtype: bool = True, result_dtype=None):
|
| 2503 |
+
"""
|
| 2504 |
+
A class method that returns a method that will correspond to an
|
| 2505 |
+
operator for an ExtensionArray subclass, by dispatching to the
|
| 2506 |
+
relevant operator defined on the individual elements of the
|
| 2507 |
+
ExtensionArray.
|
| 2508 |
+
|
| 2509 |
+
Parameters
|
| 2510 |
+
----------
|
| 2511 |
+
op : function
|
| 2512 |
+
An operator that takes arguments op(a, b)
|
| 2513 |
+
coerce_to_dtype : bool, default True
|
| 2514 |
+
boolean indicating whether to attempt to convert
|
| 2515 |
+
the result to the underlying ExtensionArray dtype.
|
| 2516 |
+
If it's not possible to create a new ExtensionArray with the
|
| 2517 |
+
values, an ndarray is returned instead.
|
| 2518 |
+
|
| 2519 |
+
Returns
|
| 2520 |
+
-------
|
| 2521 |
+
Callable[[Any, Any], Union[ndarray, ExtensionArray]]
|
| 2522 |
+
A method that can be bound to a class. When used, the method
|
| 2523 |
+
receives the two arguments, one of which is the instance of
|
| 2524 |
+
this class, and should return an ExtensionArray or an ndarray.
|
| 2525 |
+
|
| 2526 |
+
Returning an ndarray may be necessary when the result of the
|
| 2527 |
+
`op` cannot be stored in the ExtensionArray. The dtype of the
|
| 2528 |
+
ndarray uses NumPy's normal inference rules.
|
| 2529 |
+
|
| 2530 |
+
Examples
|
| 2531 |
+
--------
|
| 2532 |
+
Given an ExtensionArray subclass called MyExtensionArray, use
|
| 2533 |
+
|
| 2534 |
+
__add__ = cls._create_method(operator.add)
|
| 2535 |
+
|
| 2536 |
+
in the class definition of MyExtensionArray to create the operator
|
| 2537 |
+
for addition, that will be based on the operator implementation
|
| 2538 |
+
of the underlying elements of the ExtensionArray
|
| 2539 |
+
"""
|
| 2540 |
+
|
| 2541 |
+
def _binop(self, other):
|
| 2542 |
+
def convert_values(param):
|
| 2543 |
+
if isinstance(param, ExtensionArray) or is_list_like(param):
|
| 2544 |
+
ovalues = param
|
| 2545 |
+
else: # Assume its an object
|
| 2546 |
+
ovalues = [param] * len(self)
|
| 2547 |
+
return ovalues
|
| 2548 |
+
|
| 2549 |
+
if isinstance(other, (ABCSeries, ABCIndex, ABCDataFrame)):
|
| 2550 |
+
# rely on pandas to unbox and dispatch to us
|
| 2551 |
+
return NotImplemented
|
| 2552 |
+
|
| 2553 |
+
lvalues = self
|
| 2554 |
+
rvalues = convert_values(other)
|
| 2555 |
+
|
| 2556 |
+
# If the operator is not defined for the underlying objects,
|
| 2557 |
+
# a TypeError should be raised
|
| 2558 |
+
res = [op(a, b) for (a, b) in zip(lvalues, rvalues)]
|
| 2559 |
+
|
| 2560 |
+
def _maybe_convert(arr):
|
| 2561 |
+
if coerce_to_dtype:
|
| 2562 |
+
# https://github.com/pandas-dev/pandas/issues/22850
|
| 2563 |
+
# We catch all regular exceptions here, and fall back
|
| 2564 |
+
# to an ndarray.
|
| 2565 |
+
res = maybe_cast_pointwise_result(arr, self.dtype, same_dtype=False)
|
| 2566 |
+
if not isinstance(res, type(self)):
|
| 2567 |
+
# exception raised in _from_sequence; ensure we have ndarray
|
| 2568 |
+
res = np.asarray(arr)
|
| 2569 |
+
else:
|
| 2570 |
+
res = np.asarray(arr, dtype=result_dtype)
|
| 2571 |
+
return res
|
| 2572 |
+
|
| 2573 |
+
if op.__name__ in {"divmod", "rdivmod"}:
|
| 2574 |
+
a, b = zip(*res)
|
| 2575 |
+
return _maybe_convert(a), _maybe_convert(b)
|
| 2576 |
+
|
| 2577 |
+
return _maybe_convert(res)
|
| 2578 |
+
|
| 2579 |
+
op_name = f"__{op.__name__}__"
|
| 2580 |
+
return set_function_name(_binop, op_name, cls)
|
| 2581 |
+
|
| 2582 |
+
@classmethod
|
| 2583 |
+
def _create_arithmetic_method(cls, op):
|
| 2584 |
+
return cls._create_method(op)
|
| 2585 |
+
|
| 2586 |
+
@classmethod
|
| 2587 |
+
def _create_comparison_method(cls, op):
|
| 2588 |
+
return cls._create_method(op, coerce_to_dtype=False, result_dtype=bool)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/arrays/boolean.py
ADDED
|
@@ -0,0 +1,407 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import numbers
|
| 4 |
+
from typing import (
|
| 5 |
+
TYPE_CHECKING,
|
| 6 |
+
ClassVar,
|
| 7 |
+
cast,
|
| 8 |
+
)
|
| 9 |
+
|
| 10 |
+
import numpy as np
|
| 11 |
+
|
| 12 |
+
from pandas._libs import (
|
| 13 |
+
lib,
|
| 14 |
+
missing as libmissing,
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
from pandas.core.dtypes.common import is_list_like
|
| 18 |
+
from pandas.core.dtypes.dtypes import register_extension_dtype
|
| 19 |
+
from pandas.core.dtypes.missing import isna
|
| 20 |
+
|
| 21 |
+
from pandas.core import ops
|
| 22 |
+
from pandas.core.array_algos import masked_accumulations
|
| 23 |
+
from pandas.core.arrays.masked import (
|
| 24 |
+
BaseMaskedArray,
|
| 25 |
+
BaseMaskedDtype,
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
if TYPE_CHECKING:
|
| 29 |
+
import pyarrow
|
| 30 |
+
|
| 31 |
+
from pandas._typing import (
|
| 32 |
+
Dtype,
|
| 33 |
+
DtypeObj,
|
| 34 |
+
Self,
|
| 35 |
+
npt,
|
| 36 |
+
type_t,
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
@register_extension_dtype
|
| 41 |
+
class BooleanDtype(BaseMaskedDtype):
|
| 42 |
+
"""
|
| 43 |
+
Extension dtype for boolean data.
|
| 44 |
+
|
| 45 |
+
.. warning::
|
| 46 |
+
|
| 47 |
+
BooleanDtype is considered experimental. The implementation and
|
| 48 |
+
parts of the API may change without warning.
|
| 49 |
+
|
| 50 |
+
Attributes
|
| 51 |
+
----------
|
| 52 |
+
None
|
| 53 |
+
|
| 54 |
+
Methods
|
| 55 |
+
-------
|
| 56 |
+
None
|
| 57 |
+
|
| 58 |
+
Examples
|
| 59 |
+
--------
|
| 60 |
+
>>> pd.BooleanDtype()
|
| 61 |
+
BooleanDtype
|
| 62 |
+
"""
|
| 63 |
+
|
| 64 |
+
name: ClassVar[str] = "boolean"
|
| 65 |
+
|
| 66 |
+
# https://github.com/python/mypy/issues/4125
|
| 67 |
+
# error: Signature of "type" incompatible with supertype "BaseMaskedDtype"
|
| 68 |
+
@property
|
| 69 |
+
def type(self) -> type: # type: ignore[override]
|
| 70 |
+
return np.bool_
|
| 71 |
+
|
| 72 |
+
@property
|
| 73 |
+
def kind(self) -> str:
|
| 74 |
+
return "b"
|
| 75 |
+
|
| 76 |
+
@property
|
| 77 |
+
def numpy_dtype(self) -> np.dtype:
|
| 78 |
+
return np.dtype("bool")
|
| 79 |
+
|
| 80 |
+
@classmethod
|
| 81 |
+
def construct_array_type(cls) -> type_t[BooleanArray]:
|
| 82 |
+
"""
|
| 83 |
+
Return the array type associated with this dtype.
|
| 84 |
+
|
| 85 |
+
Returns
|
| 86 |
+
-------
|
| 87 |
+
type
|
| 88 |
+
"""
|
| 89 |
+
return BooleanArray
|
| 90 |
+
|
| 91 |
+
def __repr__(self) -> str:
|
| 92 |
+
return "BooleanDtype"
|
| 93 |
+
|
| 94 |
+
@property
|
| 95 |
+
def _is_boolean(self) -> bool:
|
| 96 |
+
return True
|
| 97 |
+
|
| 98 |
+
@property
|
| 99 |
+
def _is_numeric(self) -> bool:
|
| 100 |
+
return True
|
| 101 |
+
|
| 102 |
+
def __from_arrow__(
|
| 103 |
+
self, array: pyarrow.Array | pyarrow.ChunkedArray
|
| 104 |
+
) -> BooleanArray:
|
| 105 |
+
"""
|
| 106 |
+
Construct BooleanArray from pyarrow Array/ChunkedArray.
|
| 107 |
+
"""
|
| 108 |
+
import pyarrow
|
| 109 |
+
|
| 110 |
+
if array.type != pyarrow.bool_() and not pyarrow.types.is_null(array.type):
|
| 111 |
+
raise TypeError(f"Expected array of boolean type, got {array.type} instead")
|
| 112 |
+
|
| 113 |
+
if isinstance(array, pyarrow.Array):
|
| 114 |
+
chunks = [array]
|
| 115 |
+
length = len(array)
|
| 116 |
+
else:
|
| 117 |
+
# pyarrow.ChunkedArray
|
| 118 |
+
chunks = array.chunks
|
| 119 |
+
length = array.length()
|
| 120 |
+
|
| 121 |
+
if pyarrow.types.is_null(array.type):
|
| 122 |
+
mask = np.ones(length, dtype=bool)
|
| 123 |
+
# No need to init data, since all null
|
| 124 |
+
data = np.empty(length, dtype=bool)
|
| 125 |
+
return BooleanArray(data, mask)
|
| 126 |
+
|
| 127 |
+
results = []
|
| 128 |
+
for arr in chunks:
|
| 129 |
+
buflist = arr.buffers()
|
| 130 |
+
data = pyarrow.BooleanArray.from_buffers(
|
| 131 |
+
arr.type, len(arr), [None, buflist[1]], offset=arr.offset
|
| 132 |
+
).to_numpy(zero_copy_only=False)
|
| 133 |
+
if arr.null_count != 0:
|
| 134 |
+
mask = pyarrow.BooleanArray.from_buffers(
|
| 135 |
+
arr.type, len(arr), [None, buflist[0]], offset=arr.offset
|
| 136 |
+
).to_numpy(zero_copy_only=False)
|
| 137 |
+
mask = ~mask
|
| 138 |
+
else:
|
| 139 |
+
mask = np.zeros(len(arr), dtype=bool)
|
| 140 |
+
|
| 141 |
+
bool_arr = BooleanArray(data, mask)
|
| 142 |
+
results.append(bool_arr)
|
| 143 |
+
|
| 144 |
+
if not results:
|
| 145 |
+
return BooleanArray(
|
| 146 |
+
np.array([], dtype=np.bool_), np.array([], dtype=np.bool_)
|
| 147 |
+
)
|
| 148 |
+
else:
|
| 149 |
+
return BooleanArray._concat_same_type(results)
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
def coerce_to_array(
|
| 153 |
+
values, mask=None, copy: bool = False
|
| 154 |
+
) -> tuple[np.ndarray, np.ndarray]:
|
| 155 |
+
"""
|
| 156 |
+
Coerce the input values array to numpy arrays with a mask.
|
| 157 |
+
|
| 158 |
+
Parameters
|
| 159 |
+
----------
|
| 160 |
+
values : 1D list-like
|
| 161 |
+
mask : bool 1D array, optional
|
| 162 |
+
copy : bool, default False
|
| 163 |
+
if True, copy the input
|
| 164 |
+
|
| 165 |
+
Returns
|
| 166 |
+
-------
|
| 167 |
+
tuple of (values, mask)
|
| 168 |
+
"""
|
| 169 |
+
if isinstance(values, BooleanArray):
|
| 170 |
+
if mask is not None:
|
| 171 |
+
raise ValueError("cannot pass mask for BooleanArray input")
|
| 172 |
+
values, mask = values._data, values._mask
|
| 173 |
+
if copy:
|
| 174 |
+
values = values.copy()
|
| 175 |
+
mask = mask.copy()
|
| 176 |
+
return values, mask
|
| 177 |
+
|
| 178 |
+
mask_values = None
|
| 179 |
+
if isinstance(values, np.ndarray) and values.dtype == np.bool_:
|
| 180 |
+
if copy:
|
| 181 |
+
values = values.copy()
|
| 182 |
+
elif isinstance(values, np.ndarray) and values.dtype.kind in "iufcb":
|
| 183 |
+
mask_values = isna(values)
|
| 184 |
+
|
| 185 |
+
values_bool = np.zeros(len(values), dtype=bool)
|
| 186 |
+
values_bool[~mask_values] = values[~mask_values].astype(bool)
|
| 187 |
+
|
| 188 |
+
if not np.all(
|
| 189 |
+
values_bool[~mask_values].astype(values.dtype) == values[~mask_values]
|
| 190 |
+
):
|
| 191 |
+
raise TypeError("Need to pass bool-like values")
|
| 192 |
+
|
| 193 |
+
values = values_bool
|
| 194 |
+
else:
|
| 195 |
+
values_object = np.asarray(values, dtype=object)
|
| 196 |
+
|
| 197 |
+
inferred_dtype = lib.infer_dtype(values_object, skipna=True)
|
| 198 |
+
integer_like = ("floating", "integer", "mixed-integer-float")
|
| 199 |
+
if inferred_dtype not in ("boolean", "empty") + integer_like:
|
| 200 |
+
raise TypeError("Need to pass bool-like values")
|
| 201 |
+
|
| 202 |
+
# mypy does not narrow the type of mask_values to npt.NDArray[np.bool_]
|
| 203 |
+
# within this branch, it assumes it can also be None
|
| 204 |
+
mask_values = cast("npt.NDArray[np.bool_]", isna(values_object))
|
| 205 |
+
values = np.zeros(len(values), dtype=bool)
|
| 206 |
+
values[~mask_values] = values_object[~mask_values].astype(bool)
|
| 207 |
+
|
| 208 |
+
# if the values were integer-like, validate it were actually 0/1's
|
| 209 |
+
if (inferred_dtype in integer_like) and not (
|
| 210 |
+
np.all(
|
| 211 |
+
values[~mask_values].astype(float)
|
| 212 |
+
== values_object[~mask_values].astype(float)
|
| 213 |
+
)
|
| 214 |
+
):
|
| 215 |
+
raise TypeError("Need to pass bool-like values")
|
| 216 |
+
|
| 217 |
+
if mask is None and mask_values is None:
|
| 218 |
+
mask = np.zeros(values.shape, dtype=bool)
|
| 219 |
+
elif mask is None:
|
| 220 |
+
mask = mask_values
|
| 221 |
+
else:
|
| 222 |
+
if isinstance(mask, np.ndarray) and mask.dtype == np.bool_:
|
| 223 |
+
if mask_values is not None:
|
| 224 |
+
mask = mask | mask_values
|
| 225 |
+
else:
|
| 226 |
+
if copy:
|
| 227 |
+
mask = mask.copy()
|
| 228 |
+
else:
|
| 229 |
+
mask = np.array(mask, dtype=bool)
|
| 230 |
+
if mask_values is not None:
|
| 231 |
+
mask = mask | mask_values
|
| 232 |
+
|
| 233 |
+
if values.shape != mask.shape:
|
| 234 |
+
raise ValueError("values.shape and mask.shape must match")
|
| 235 |
+
|
| 236 |
+
return values, mask
|
| 237 |
+
|
| 238 |
+
|
| 239 |
+
class BooleanArray(BaseMaskedArray):
|
| 240 |
+
"""
|
| 241 |
+
Array of boolean (True/False) data with missing values.
|
| 242 |
+
|
| 243 |
+
This is a pandas Extension array for boolean data, under the hood
|
| 244 |
+
represented by 2 numpy arrays: a boolean array with the data and
|
| 245 |
+
a boolean array with the mask (True indicating missing).
|
| 246 |
+
|
| 247 |
+
BooleanArray implements Kleene logic (sometimes called three-value
|
| 248 |
+
logic) for logical operations. See :ref:`boolean.kleene` for more.
|
| 249 |
+
|
| 250 |
+
To construct an BooleanArray from generic array-like input, use
|
| 251 |
+
:func:`pandas.array` specifying ``dtype="boolean"`` (see examples
|
| 252 |
+
below).
|
| 253 |
+
|
| 254 |
+
.. warning::
|
| 255 |
+
|
| 256 |
+
BooleanArray is considered experimental. The implementation and
|
| 257 |
+
parts of the API may change without warning.
|
| 258 |
+
|
| 259 |
+
Parameters
|
| 260 |
+
----------
|
| 261 |
+
values : numpy.ndarray
|
| 262 |
+
A 1-d boolean-dtype array with the data.
|
| 263 |
+
mask : numpy.ndarray
|
| 264 |
+
A 1-d boolean-dtype array indicating missing values (True
|
| 265 |
+
indicates missing).
|
| 266 |
+
copy : bool, default False
|
| 267 |
+
Whether to copy the `values` and `mask` arrays.
|
| 268 |
+
|
| 269 |
+
Attributes
|
| 270 |
+
----------
|
| 271 |
+
None
|
| 272 |
+
|
| 273 |
+
Methods
|
| 274 |
+
-------
|
| 275 |
+
None
|
| 276 |
+
|
| 277 |
+
Returns
|
| 278 |
+
-------
|
| 279 |
+
BooleanArray
|
| 280 |
+
|
| 281 |
+
Examples
|
| 282 |
+
--------
|
| 283 |
+
Create an BooleanArray with :func:`pandas.array`:
|
| 284 |
+
|
| 285 |
+
>>> pd.array([True, False, None], dtype="boolean")
|
| 286 |
+
<BooleanArray>
|
| 287 |
+
[True, False, <NA>]
|
| 288 |
+
Length: 3, dtype: boolean
|
| 289 |
+
"""
|
| 290 |
+
|
| 291 |
+
# The value used to fill '_data' to avoid upcasting
|
| 292 |
+
_internal_fill_value = False
|
| 293 |
+
# Fill values used for any/all
|
| 294 |
+
# Incompatible types in assignment (expression has type "bool", base class
|
| 295 |
+
# "BaseMaskedArray" defined the type as "<typing special form>")
|
| 296 |
+
_truthy_value = True # type: ignore[assignment]
|
| 297 |
+
_falsey_value = False # type: ignore[assignment]
|
| 298 |
+
_TRUE_VALUES = {"True", "TRUE", "true", "1", "1.0"}
|
| 299 |
+
_FALSE_VALUES = {"False", "FALSE", "false", "0", "0.0"}
|
| 300 |
+
|
| 301 |
+
@classmethod
|
| 302 |
+
def _simple_new(cls, values: np.ndarray, mask: npt.NDArray[np.bool_]) -> Self:
|
| 303 |
+
result = super()._simple_new(values, mask)
|
| 304 |
+
result._dtype = BooleanDtype()
|
| 305 |
+
return result
|
| 306 |
+
|
| 307 |
+
def __init__(
|
| 308 |
+
self, values: np.ndarray, mask: np.ndarray, copy: bool = False
|
| 309 |
+
) -> None:
|
| 310 |
+
if not (isinstance(values, np.ndarray) and values.dtype == np.bool_):
|
| 311 |
+
raise TypeError(
|
| 312 |
+
"values should be boolean numpy array. Use "
|
| 313 |
+
"the 'pd.array' function instead"
|
| 314 |
+
)
|
| 315 |
+
self._dtype = BooleanDtype()
|
| 316 |
+
super().__init__(values, mask, copy=copy)
|
| 317 |
+
|
| 318 |
+
@property
|
| 319 |
+
def dtype(self) -> BooleanDtype:
|
| 320 |
+
return self._dtype
|
| 321 |
+
|
| 322 |
+
@classmethod
|
| 323 |
+
def _from_sequence_of_strings(
|
| 324 |
+
cls,
|
| 325 |
+
strings: list[str],
|
| 326 |
+
*,
|
| 327 |
+
dtype: Dtype | None = None,
|
| 328 |
+
copy: bool = False,
|
| 329 |
+
true_values: list[str] | None = None,
|
| 330 |
+
false_values: list[str] | None = None,
|
| 331 |
+
) -> BooleanArray:
|
| 332 |
+
true_values_union = cls._TRUE_VALUES.union(true_values or [])
|
| 333 |
+
false_values_union = cls._FALSE_VALUES.union(false_values or [])
|
| 334 |
+
|
| 335 |
+
def map_string(s) -> bool:
|
| 336 |
+
if s in true_values_union:
|
| 337 |
+
return True
|
| 338 |
+
elif s in false_values_union:
|
| 339 |
+
return False
|
| 340 |
+
else:
|
| 341 |
+
raise ValueError(f"{s} cannot be cast to bool")
|
| 342 |
+
|
| 343 |
+
scalars = np.array(strings, dtype=object)
|
| 344 |
+
mask = isna(scalars)
|
| 345 |
+
scalars[~mask] = list(map(map_string, scalars[~mask]))
|
| 346 |
+
return cls._from_sequence(scalars, dtype=dtype, copy=copy)
|
| 347 |
+
|
| 348 |
+
_HANDLED_TYPES = (np.ndarray, numbers.Number, bool, np.bool_)
|
| 349 |
+
|
| 350 |
+
@classmethod
|
| 351 |
+
def _coerce_to_array(
|
| 352 |
+
cls, value, *, dtype: DtypeObj, copy: bool = False
|
| 353 |
+
) -> tuple[np.ndarray, np.ndarray]:
|
| 354 |
+
if dtype:
|
| 355 |
+
assert dtype == "boolean"
|
| 356 |
+
return coerce_to_array(value, copy=copy)
|
| 357 |
+
|
| 358 |
+
def _logical_method(self, other, op):
|
| 359 |
+
assert op.__name__ in {"or_", "ror_", "and_", "rand_", "xor", "rxor"}
|
| 360 |
+
other_is_scalar = lib.is_scalar(other)
|
| 361 |
+
mask = None
|
| 362 |
+
|
| 363 |
+
if isinstance(other, BooleanArray):
|
| 364 |
+
other, mask = other._data, other._mask
|
| 365 |
+
elif is_list_like(other):
|
| 366 |
+
other = np.asarray(other, dtype="bool")
|
| 367 |
+
if other.ndim > 1:
|
| 368 |
+
raise NotImplementedError("can only perform ops with 1-d structures")
|
| 369 |
+
other, mask = coerce_to_array(other, copy=False)
|
| 370 |
+
elif isinstance(other, np.bool_):
|
| 371 |
+
other = other.item()
|
| 372 |
+
|
| 373 |
+
if other_is_scalar and other is not libmissing.NA and not lib.is_bool(other):
|
| 374 |
+
raise TypeError(
|
| 375 |
+
"'other' should be pandas.NA or a bool. "
|
| 376 |
+
f"Got {type(other).__name__} instead."
|
| 377 |
+
)
|
| 378 |
+
|
| 379 |
+
if not other_is_scalar and len(self) != len(other):
|
| 380 |
+
raise ValueError("Lengths must match")
|
| 381 |
+
|
| 382 |
+
if op.__name__ in {"or_", "ror_"}:
|
| 383 |
+
result, mask = ops.kleene_or(self._data, other, self._mask, mask)
|
| 384 |
+
elif op.__name__ in {"and_", "rand_"}:
|
| 385 |
+
result, mask = ops.kleene_and(self._data, other, self._mask, mask)
|
| 386 |
+
else:
|
| 387 |
+
# i.e. xor, rxor
|
| 388 |
+
result, mask = ops.kleene_xor(self._data, other, self._mask, mask)
|
| 389 |
+
|
| 390 |
+
# i.e. BooleanArray
|
| 391 |
+
return self._maybe_mask_result(result, mask)
|
| 392 |
+
|
| 393 |
+
def _accumulate(
|
| 394 |
+
self, name: str, *, skipna: bool = True, **kwargs
|
| 395 |
+
) -> BaseMaskedArray:
|
| 396 |
+
data = self._data
|
| 397 |
+
mask = self._mask
|
| 398 |
+
if name in ("cummin", "cummax"):
|
| 399 |
+
op = getattr(masked_accumulations, name)
|
| 400 |
+
data, mask = op(data, mask, skipna=skipna, **kwargs)
|
| 401 |
+
return self._simple_new(data, mask)
|
| 402 |
+
else:
|
| 403 |
+
from pandas.core.arrays import IntegerArray
|
| 404 |
+
|
| 405 |
+
return IntegerArray(data.astype(int), mask)._accumulate(
|
| 406 |
+
name, skipna=skipna, **kwargs
|
| 407 |
+
)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/arrays/categorical.py
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/arrays/datetimelike.py
ADDED
|
@@ -0,0 +1,2556 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from datetime import (
|
| 4 |
+
datetime,
|
| 5 |
+
timedelta,
|
| 6 |
+
)
|
| 7 |
+
from functools import wraps
|
| 8 |
+
import operator
|
| 9 |
+
from typing import (
|
| 10 |
+
TYPE_CHECKING,
|
| 11 |
+
Any,
|
| 12 |
+
Callable,
|
| 13 |
+
Literal,
|
| 14 |
+
Union,
|
| 15 |
+
cast,
|
| 16 |
+
final,
|
| 17 |
+
overload,
|
| 18 |
+
)
|
| 19 |
+
import warnings
|
| 20 |
+
|
| 21 |
+
import numpy as np
|
| 22 |
+
|
| 23 |
+
from pandas._libs import (
|
| 24 |
+
algos,
|
| 25 |
+
lib,
|
| 26 |
+
)
|
| 27 |
+
from pandas._libs.arrays import NDArrayBacked
|
| 28 |
+
from pandas._libs.tslibs import (
|
| 29 |
+
BaseOffset,
|
| 30 |
+
IncompatibleFrequency,
|
| 31 |
+
NaT,
|
| 32 |
+
NaTType,
|
| 33 |
+
Period,
|
| 34 |
+
Resolution,
|
| 35 |
+
Tick,
|
| 36 |
+
Timedelta,
|
| 37 |
+
Timestamp,
|
| 38 |
+
add_overflowsafe,
|
| 39 |
+
astype_overflowsafe,
|
| 40 |
+
get_unit_from_dtype,
|
| 41 |
+
iNaT,
|
| 42 |
+
ints_to_pydatetime,
|
| 43 |
+
ints_to_pytimedelta,
|
| 44 |
+
periods_per_day,
|
| 45 |
+
to_offset,
|
| 46 |
+
)
|
| 47 |
+
from pandas._libs.tslibs.fields import (
|
| 48 |
+
RoundTo,
|
| 49 |
+
round_nsint64,
|
| 50 |
+
)
|
| 51 |
+
from pandas._libs.tslibs.np_datetime import compare_mismatched_resolutions
|
| 52 |
+
from pandas._libs.tslibs.timedeltas import get_unit_for_round
|
| 53 |
+
from pandas._libs.tslibs.timestamps import integer_op_not_supported
|
| 54 |
+
from pandas._typing import (
|
| 55 |
+
ArrayLike,
|
| 56 |
+
AxisInt,
|
| 57 |
+
DatetimeLikeScalar,
|
| 58 |
+
Dtype,
|
| 59 |
+
DtypeObj,
|
| 60 |
+
F,
|
| 61 |
+
InterpolateOptions,
|
| 62 |
+
NpDtype,
|
| 63 |
+
PositionalIndexer2D,
|
| 64 |
+
PositionalIndexerTuple,
|
| 65 |
+
ScalarIndexer,
|
| 66 |
+
Self,
|
| 67 |
+
SequenceIndexer,
|
| 68 |
+
TimeAmbiguous,
|
| 69 |
+
TimeNonexistent,
|
| 70 |
+
npt,
|
| 71 |
+
)
|
| 72 |
+
from pandas.compat.numpy import function as nv
|
| 73 |
+
from pandas.errors import (
|
| 74 |
+
AbstractMethodError,
|
| 75 |
+
InvalidComparison,
|
| 76 |
+
PerformanceWarning,
|
| 77 |
+
)
|
| 78 |
+
from pandas.util._decorators import (
|
| 79 |
+
Appender,
|
| 80 |
+
Substitution,
|
| 81 |
+
cache_readonly,
|
| 82 |
+
)
|
| 83 |
+
from pandas.util._exceptions import find_stack_level
|
| 84 |
+
|
| 85 |
+
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
|
| 86 |
+
from pandas.core.dtypes.common import (
|
| 87 |
+
is_all_strings,
|
| 88 |
+
is_integer_dtype,
|
| 89 |
+
is_list_like,
|
| 90 |
+
is_object_dtype,
|
| 91 |
+
is_string_dtype,
|
| 92 |
+
pandas_dtype,
|
| 93 |
+
)
|
| 94 |
+
from pandas.core.dtypes.dtypes import (
|
| 95 |
+
ArrowDtype,
|
| 96 |
+
CategoricalDtype,
|
| 97 |
+
DatetimeTZDtype,
|
| 98 |
+
ExtensionDtype,
|
| 99 |
+
PeriodDtype,
|
| 100 |
+
)
|
| 101 |
+
from pandas.core.dtypes.generic import (
|
| 102 |
+
ABCCategorical,
|
| 103 |
+
ABCMultiIndex,
|
| 104 |
+
)
|
| 105 |
+
from pandas.core.dtypes.missing import (
|
| 106 |
+
is_valid_na_for_dtype,
|
| 107 |
+
isna,
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
from pandas.core import (
|
| 111 |
+
algorithms,
|
| 112 |
+
missing,
|
| 113 |
+
nanops,
|
| 114 |
+
ops,
|
| 115 |
+
)
|
| 116 |
+
from pandas.core.algorithms import (
|
| 117 |
+
isin,
|
| 118 |
+
map_array,
|
| 119 |
+
unique1d,
|
| 120 |
+
)
|
| 121 |
+
from pandas.core.array_algos import datetimelike_accumulations
|
| 122 |
+
from pandas.core.arraylike import OpsMixin
|
| 123 |
+
from pandas.core.arrays._mixins import (
|
| 124 |
+
NDArrayBackedExtensionArray,
|
| 125 |
+
ravel_compat,
|
| 126 |
+
)
|
| 127 |
+
from pandas.core.arrays.arrow.array import ArrowExtensionArray
|
| 128 |
+
from pandas.core.arrays.base import ExtensionArray
|
| 129 |
+
from pandas.core.arrays.integer import IntegerArray
|
| 130 |
+
import pandas.core.common as com
|
| 131 |
+
from pandas.core.construction import (
|
| 132 |
+
array as pd_array,
|
| 133 |
+
ensure_wrapped_if_datetimelike,
|
| 134 |
+
extract_array,
|
| 135 |
+
)
|
| 136 |
+
from pandas.core.indexers import (
|
| 137 |
+
check_array_indexer,
|
| 138 |
+
check_setitem_lengths,
|
| 139 |
+
)
|
| 140 |
+
from pandas.core.ops.common import unpack_zerodim_and_defer
|
| 141 |
+
from pandas.core.ops.invalid import (
|
| 142 |
+
invalid_comparison,
|
| 143 |
+
make_invalid_op,
|
| 144 |
+
)
|
| 145 |
+
|
| 146 |
+
from pandas.tseries import frequencies
|
| 147 |
+
|
| 148 |
+
if TYPE_CHECKING:
|
| 149 |
+
from collections.abc import (
|
| 150 |
+
Iterator,
|
| 151 |
+
Sequence,
|
| 152 |
+
)
|
| 153 |
+
|
| 154 |
+
from pandas import Index
|
| 155 |
+
from pandas.core.arrays import (
|
| 156 |
+
DatetimeArray,
|
| 157 |
+
PeriodArray,
|
| 158 |
+
TimedeltaArray,
|
| 159 |
+
)
|
| 160 |
+
|
| 161 |
+
DTScalarOrNaT = Union[DatetimeLikeScalar, NaTType]
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
def _make_unpacked_invalid_op(op_name: str):
|
| 165 |
+
op = make_invalid_op(op_name)
|
| 166 |
+
return unpack_zerodim_and_defer(op_name)(op)
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
def _period_dispatch(meth: F) -> F:
|
| 170 |
+
"""
|
| 171 |
+
For PeriodArray methods, dispatch to DatetimeArray and re-wrap the results
|
| 172 |
+
in PeriodArray. We cannot use ._ndarray directly for the affected
|
| 173 |
+
methods because the i8 data has different semantics on NaT values.
|
| 174 |
+
"""
|
| 175 |
+
|
| 176 |
+
@wraps(meth)
|
| 177 |
+
def new_meth(self, *args, **kwargs):
|
| 178 |
+
if not isinstance(self.dtype, PeriodDtype):
|
| 179 |
+
return meth(self, *args, **kwargs)
|
| 180 |
+
|
| 181 |
+
arr = self.view("M8[ns]")
|
| 182 |
+
result = meth(arr, *args, **kwargs)
|
| 183 |
+
if result is NaT:
|
| 184 |
+
return NaT
|
| 185 |
+
elif isinstance(result, Timestamp):
|
| 186 |
+
return self._box_func(result._value)
|
| 187 |
+
|
| 188 |
+
res_i8 = result.view("i8")
|
| 189 |
+
return self._from_backing_data(res_i8)
|
| 190 |
+
|
| 191 |
+
return cast(F, new_meth)
|
| 192 |
+
|
| 193 |
+
|
| 194 |
+
# error: Definition of "_concat_same_type" in base class "NDArrayBacked" is
|
| 195 |
+
# incompatible with definition in base class "ExtensionArray"
|
| 196 |
+
class DatetimeLikeArrayMixin( # type: ignore[misc]
|
| 197 |
+
OpsMixin, NDArrayBackedExtensionArray
|
| 198 |
+
):
|
| 199 |
+
"""
|
| 200 |
+
Shared Base/Mixin class for DatetimeArray, TimedeltaArray, PeriodArray
|
| 201 |
+
|
| 202 |
+
Assumes that __new__/__init__ defines:
|
| 203 |
+
_ndarray
|
| 204 |
+
|
| 205 |
+
and that inheriting subclass implements:
|
| 206 |
+
freq
|
| 207 |
+
"""
|
| 208 |
+
|
| 209 |
+
# _infer_matches -> which infer_dtype strings are close enough to our own
|
| 210 |
+
_infer_matches: tuple[str, ...]
|
| 211 |
+
_is_recognized_dtype: Callable[[DtypeObj], bool]
|
| 212 |
+
_recognized_scalars: tuple[type, ...]
|
| 213 |
+
_ndarray: np.ndarray
|
| 214 |
+
freq: BaseOffset | None
|
| 215 |
+
|
| 216 |
+
@cache_readonly
|
| 217 |
+
def _can_hold_na(self) -> bool:
|
| 218 |
+
return True
|
| 219 |
+
|
| 220 |
+
def __init__(
|
| 221 |
+
self, data, dtype: Dtype | None = None, freq=None, copy: bool = False
|
| 222 |
+
) -> None:
|
| 223 |
+
raise AbstractMethodError(self)
|
| 224 |
+
|
| 225 |
+
@property
|
| 226 |
+
def _scalar_type(self) -> type[DatetimeLikeScalar]:
|
| 227 |
+
"""
|
| 228 |
+
The scalar associated with this datelike
|
| 229 |
+
|
| 230 |
+
* PeriodArray : Period
|
| 231 |
+
* DatetimeArray : Timestamp
|
| 232 |
+
* TimedeltaArray : Timedelta
|
| 233 |
+
"""
|
| 234 |
+
raise AbstractMethodError(self)
|
| 235 |
+
|
| 236 |
+
def _scalar_from_string(self, value: str) -> DTScalarOrNaT:
|
| 237 |
+
"""
|
| 238 |
+
Construct a scalar type from a string.
|
| 239 |
+
|
| 240 |
+
Parameters
|
| 241 |
+
----------
|
| 242 |
+
value : str
|
| 243 |
+
|
| 244 |
+
Returns
|
| 245 |
+
-------
|
| 246 |
+
Period, Timestamp, or Timedelta, or NaT
|
| 247 |
+
Whatever the type of ``self._scalar_type`` is.
|
| 248 |
+
|
| 249 |
+
Notes
|
| 250 |
+
-----
|
| 251 |
+
This should call ``self._check_compatible_with`` before
|
| 252 |
+
unboxing the result.
|
| 253 |
+
"""
|
| 254 |
+
raise AbstractMethodError(self)
|
| 255 |
+
|
| 256 |
+
def _unbox_scalar(
|
| 257 |
+
self, value: DTScalarOrNaT
|
| 258 |
+
) -> np.int64 | np.datetime64 | np.timedelta64:
|
| 259 |
+
"""
|
| 260 |
+
Unbox the integer value of a scalar `value`.
|
| 261 |
+
|
| 262 |
+
Parameters
|
| 263 |
+
----------
|
| 264 |
+
value : Period, Timestamp, Timedelta, or NaT
|
| 265 |
+
Depending on subclass.
|
| 266 |
+
|
| 267 |
+
Returns
|
| 268 |
+
-------
|
| 269 |
+
int
|
| 270 |
+
|
| 271 |
+
Examples
|
| 272 |
+
--------
|
| 273 |
+
>>> arr = pd.array(np.array(['1970-01-01'], 'datetime64[ns]'))
|
| 274 |
+
>>> arr._unbox_scalar(arr[0])
|
| 275 |
+
numpy.datetime64('1970-01-01T00:00:00.000000000')
|
| 276 |
+
"""
|
| 277 |
+
raise AbstractMethodError(self)
|
| 278 |
+
|
| 279 |
+
def _check_compatible_with(self, other: DTScalarOrNaT) -> None:
|
| 280 |
+
"""
|
| 281 |
+
Verify that `self` and `other` are compatible.
|
| 282 |
+
|
| 283 |
+
* DatetimeArray verifies that the timezones (if any) match
|
| 284 |
+
* PeriodArray verifies that the freq matches
|
| 285 |
+
* Timedelta has no verification
|
| 286 |
+
|
| 287 |
+
In each case, NaT is considered compatible.
|
| 288 |
+
|
| 289 |
+
Parameters
|
| 290 |
+
----------
|
| 291 |
+
other
|
| 292 |
+
|
| 293 |
+
Raises
|
| 294 |
+
------
|
| 295 |
+
Exception
|
| 296 |
+
"""
|
| 297 |
+
raise AbstractMethodError(self)
|
| 298 |
+
|
| 299 |
+
# ------------------------------------------------------------------
|
| 300 |
+
|
| 301 |
+
def _box_func(self, x):
|
| 302 |
+
"""
|
| 303 |
+
box function to get object from internal representation
|
| 304 |
+
"""
|
| 305 |
+
raise AbstractMethodError(self)
|
| 306 |
+
|
| 307 |
+
def _box_values(self, values) -> np.ndarray:
|
| 308 |
+
"""
|
| 309 |
+
apply box func to passed values
|
| 310 |
+
"""
|
| 311 |
+
return lib.map_infer(values, self._box_func, convert=False)
|
| 312 |
+
|
| 313 |
+
def __iter__(self) -> Iterator:
|
| 314 |
+
if self.ndim > 1:
|
| 315 |
+
return (self[n] for n in range(len(self)))
|
| 316 |
+
else:
|
| 317 |
+
return (self._box_func(v) for v in self.asi8)
|
| 318 |
+
|
| 319 |
+
@property
|
| 320 |
+
def asi8(self) -> npt.NDArray[np.int64]:
|
| 321 |
+
"""
|
| 322 |
+
Integer representation of the values.
|
| 323 |
+
|
| 324 |
+
Returns
|
| 325 |
+
-------
|
| 326 |
+
ndarray
|
| 327 |
+
An ndarray with int64 dtype.
|
| 328 |
+
"""
|
| 329 |
+
# do not cache or you'll create a memory leak
|
| 330 |
+
return self._ndarray.view("i8")
|
| 331 |
+
|
| 332 |
+
# ----------------------------------------------------------------
|
| 333 |
+
# Rendering Methods
|
| 334 |
+
|
| 335 |
+
def _format_native_types(
|
| 336 |
+
self, *, na_rep: str | float = "NaT", date_format=None
|
| 337 |
+
) -> npt.NDArray[np.object_]:
|
| 338 |
+
"""
|
| 339 |
+
Helper method for astype when converting to strings.
|
| 340 |
+
|
| 341 |
+
Returns
|
| 342 |
+
-------
|
| 343 |
+
ndarray[str]
|
| 344 |
+
"""
|
| 345 |
+
raise AbstractMethodError(self)
|
| 346 |
+
|
| 347 |
+
def _formatter(self, boxed: bool = False):
|
| 348 |
+
# TODO: Remove Datetime & DatetimeTZ formatters.
|
| 349 |
+
return "'{}'".format
|
| 350 |
+
|
| 351 |
+
# ----------------------------------------------------------------
|
| 352 |
+
# Array-Like / EA-Interface Methods
|
| 353 |
+
|
| 354 |
+
def __array__(
|
| 355 |
+
self, dtype: NpDtype | None = None, copy: bool | None = None
|
| 356 |
+
) -> np.ndarray:
|
| 357 |
+
# used for Timedelta/DatetimeArray, overwritten by PeriodArray
|
| 358 |
+
if is_object_dtype(dtype):
|
| 359 |
+
return np.array(list(self), dtype=object)
|
| 360 |
+
return self._ndarray
|
| 361 |
+
|
| 362 |
+
@overload
|
| 363 |
+
def __getitem__(self, item: ScalarIndexer) -> DTScalarOrNaT:
|
| 364 |
+
...
|
| 365 |
+
|
| 366 |
+
@overload
|
| 367 |
+
def __getitem__(
|
| 368 |
+
self,
|
| 369 |
+
item: SequenceIndexer | PositionalIndexerTuple,
|
| 370 |
+
) -> Self:
|
| 371 |
+
...
|
| 372 |
+
|
| 373 |
+
def __getitem__(self, key: PositionalIndexer2D) -> Self | DTScalarOrNaT:
|
| 374 |
+
"""
|
| 375 |
+
This getitem defers to the underlying array, which by-definition can
|
| 376 |
+
only handle list-likes, slices, and integer scalars
|
| 377 |
+
"""
|
| 378 |
+
# Use cast as we know we will get back a DatetimeLikeArray or DTScalar,
|
| 379 |
+
# but skip evaluating the Union at runtime for performance
|
| 380 |
+
# (see https://github.com/pandas-dev/pandas/pull/44624)
|
| 381 |
+
result = cast("Union[Self, DTScalarOrNaT]", super().__getitem__(key))
|
| 382 |
+
if lib.is_scalar(result):
|
| 383 |
+
return result
|
| 384 |
+
else:
|
| 385 |
+
# At this point we know the result is an array.
|
| 386 |
+
result = cast(Self, result)
|
| 387 |
+
result._freq = self._get_getitem_freq(key)
|
| 388 |
+
return result
|
| 389 |
+
|
| 390 |
+
def _get_getitem_freq(self, key) -> BaseOffset | None:
|
| 391 |
+
"""
|
| 392 |
+
Find the `freq` attribute to assign to the result of a __getitem__ lookup.
|
| 393 |
+
"""
|
| 394 |
+
is_period = isinstance(self.dtype, PeriodDtype)
|
| 395 |
+
if is_period:
|
| 396 |
+
freq = self.freq
|
| 397 |
+
elif self.ndim != 1:
|
| 398 |
+
freq = None
|
| 399 |
+
else:
|
| 400 |
+
key = check_array_indexer(self, key) # maybe ndarray[bool] -> slice
|
| 401 |
+
freq = None
|
| 402 |
+
if isinstance(key, slice):
|
| 403 |
+
if self.freq is not None and key.step is not None:
|
| 404 |
+
freq = key.step * self.freq
|
| 405 |
+
else:
|
| 406 |
+
freq = self.freq
|
| 407 |
+
elif key is Ellipsis:
|
| 408 |
+
# GH#21282 indexing with Ellipsis is similar to a full slice,
|
| 409 |
+
# should preserve `freq` attribute
|
| 410 |
+
freq = self.freq
|
| 411 |
+
elif com.is_bool_indexer(key):
|
| 412 |
+
new_key = lib.maybe_booleans_to_slice(key.view(np.uint8))
|
| 413 |
+
if isinstance(new_key, slice):
|
| 414 |
+
return self._get_getitem_freq(new_key)
|
| 415 |
+
return freq
|
| 416 |
+
|
| 417 |
+
# error: Argument 1 of "__setitem__" is incompatible with supertype
|
| 418 |
+
# "ExtensionArray"; supertype defines the argument type as "Union[int,
|
| 419 |
+
# ndarray]"
|
| 420 |
+
def __setitem__(
|
| 421 |
+
self,
|
| 422 |
+
key: int | Sequence[int] | Sequence[bool] | slice,
|
| 423 |
+
value: NaTType | Any | Sequence[Any],
|
| 424 |
+
) -> None:
|
| 425 |
+
# I'm fudging the types a bit here. "Any" above really depends
|
| 426 |
+
# on type(self). For PeriodArray, it's Period (or stuff coercible
|
| 427 |
+
# to a period in from_sequence). For DatetimeArray, it's Timestamp...
|
| 428 |
+
# I don't know if mypy can do that, possibly with Generics.
|
| 429 |
+
# https://mypy.readthedocs.io/en/latest/generics.html
|
| 430 |
+
|
| 431 |
+
no_op = check_setitem_lengths(key, value, self)
|
| 432 |
+
|
| 433 |
+
# Calling super() before the no_op short-circuit means that we raise
|
| 434 |
+
# on invalid 'value' even if this is a no-op, e.g. wrong-dtype empty array.
|
| 435 |
+
super().__setitem__(key, value)
|
| 436 |
+
|
| 437 |
+
if no_op:
|
| 438 |
+
return
|
| 439 |
+
|
| 440 |
+
self._maybe_clear_freq()
|
| 441 |
+
|
| 442 |
+
def _maybe_clear_freq(self) -> None:
|
| 443 |
+
# inplace operations like __setitem__ may invalidate the freq of
|
| 444 |
+
# DatetimeArray and TimedeltaArray
|
| 445 |
+
pass
|
| 446 |
+
|
| 447 |
+
def astype(self, dtype, copy: bool = True):
|
| 448 |
+
# Some notes on cases we don't have to handle here in the base class:
|
| 449 |
+
# 1. PeriodArray.astype handles period -> period
|
| 450 |
+
# 2. DatetimeArray.astype handles conversion between tz.
|
| 451 |
+
# 3. DatetimeArray.astype handles datetime -> period
|
| 452 |
+
dtype = pandas_dtype(dtype)
|
| 453 |
+
|
| 454 |
+
if dtype == object:
|
| 455 |
+
if self.dtype.kind == "M":
|
| 456 |
+
self = cast("DatetimeArray", self)
|
| 457 |
+
# *much* faster than self._box_values
|
| 458 |
+
# for e.g. test_get_loc_tuple_monotonic_above_size_cutoff
|
| 459 |
+
i8data = self.asi8
|
| 460 |
+
converted = ints_to_pydatetime(
|
| 461 |
+
i8data,
|
| 462 |
+
tz=self.tz,
|
| 463 |
+
box="timestamp",
|
| 464 |
+
reso=self._creso,
|
| 465 |
+
)
|
| 466 |
+
return converted
|
| 467 |
+
|
| 468 |
+
elif self.dtype.kind == "m":
|
| 469 |
+
return ints_to_pytimedelta(self._ndarray, box=True)
|
| 470 |
+
|
| 471 |
+
return self._box_values(self.asi8.ravel()).reshape(self.shape)
|
| 472 |
+
|
| 473 |
+
elif isinstance(dtype, ExtensionDtype):
|
| 474 |
+
return super().astype(dtype, copy=copy)
|
| 475 |
+
elif is_string_dtype(dtype):
|
| 476 |
+
return self._format_native_types()
|
| 477 |
+
elif dtype.kind in "iu":
|
| 478 |
+
# we deliberately ignore int32 vs. int64 here.
|
| 479 |
+
# See https://github.com/pandas-dev/pandas/issues/24381 for more.
|
| 480 |
+
values = self.asi8
|
| 481 |
+
if dtype != np.int64:
|
| 482 |
+
raise TypeError(
|
| 483 |
+
f"Converting from {self.dtype} to {dtype} is not supported. "
|
| 484 |
+
"Do obj.astype('int64').astype(dtype) instead"
|
| 485 |
+
)
|
| 486 |
+
|
| 487 |
+
if copy:
|
| 488 |
+
values = values.copy()
|
| 489 |
+
return values
|
| 490 |
+
elif (dtype.kind in "mM" and self.dtype != dtype) or dtype.kind == "f":
|
| 491 |
+
# disallow conversion between datetime/timedelta,
|
| 492 |
+
# and conversions for any datetimelike to float
|
| 493 |
+
msg = f"Cannot cast {type(self).__name__} to dtype {dtype}"
|
| 494 |
+
raise TypeError(msg)
|
| 495 |
+
else:
|
| 496 |
+
return np.asarray(self, dtype=dtype)
|
| 497 |
+
|
| 498 |
+
@overload
|
| 499 |
+
def view(self) -> Self:
|
| 500 |
+
...
|
| 501 |
+
|
| 502 |
+
@overload
|
| 503 |
+
def view(self, dtype: Literal["M8[ns]"]) -> DatetimeArray:
|
| 504 |
+
...
|
| 505 |
+
|
| 506 |
+
@overload
|
| 507 |
+
def view(self, dtype: Literal["m8[ns]"]) -> TimedeltaArray:
|
| 508 |
+
...
|
| 509 |
+
|
| 510 |
+
@overload
|
| 511 |
+
def view(self, dtype: Dtype | None = ...) -> ArrayLike:
|
| 512 |
+
...
|
| 513 |
+
|
| 514 |
+
# pylint: disable-next=useless-parent-delegation
|
| 515 |
+
def view(self, dtype: Dtype | None = None) -> ArrayLike:
|
| 516 |
+
# we need to explicitly call super() method as long as the `@overload`s
|
| 517 |
+
# are present in this file.
|
| 518 |
+
return super().view(dtype)
|
| 519 |
+
|
| 520 |
+
# ------------------------------------------------------------------
|
| 521 |
+
# Validation Methods
|
| 522 |
+
# TODO: try to de-duplicate these, ensure identical behavior
|
| 523 |
+
|
| 524 |
+
def _validate_comparison_value(self, other):
|
| 525 |
+
if isinstance(other, str):
|
| 526 |
+
try:
|
| 527 |
+
# GH#18435 strings get a pass from tzawareness compat
|
| 528 |
+
other = self._scalar_from_string(other)
|
| 529 |
+
except (ValueError, IncompatibleFrequency):
|
| 530 |
+
# failed to parse as Timestamp/Timedelta/Period
|
| 531 |
+
raise InvalidComparison(other)
|
| 532 |
+
|
| 533 |
+
if isinstance(other, self._recognized_scalars) or other is NaT:
|
| 534 |
+
other = self._scalar_type(other)
|
| 535 |
+
try:
|
| 536 |
+
self._check_compatible_with(other)
|
| 537 |
+
except (TypeError, IncompatibleFrequency) as err:
|
| 538 |
+
# e.g. tzawareness mismatch
|
| 539 |
+
raise InvalidComparison(other) from err
|
| 540 |
+
|
| 541 |
+
elif not is_list_like(other):
|
| 542 |
+
raise InvalidComparison(other)
|
| 543 |
+
|
| 544 |
+
elif len(other) != len(self):
|
| 545 |
+
raise ValueError("Lengths must match")
|
| 546 |
+
|
| 547 |
+
else:
|
| 548 |
+
try:
|
| 549 |
+
other = self._validate_listlike(other, allow_object=True)
|
| 550 |
+
self._check_compatible_with(other)
|
| 551 |
+
except (TypeError, IncompatibleFrequency) as err:
|
| 552 |
+
if is_object_dtype(getattr(other, "dtype", None)):
|
| 553 |
+
# We will have to operate element-wise
|
| 554 |
+
pass
|
| 555 |
+
else:
|
| 556 |
+
raise InvalidComparison(other) from err
|
| 557 |
+
|
| 558 |
+
return other
|
| 559 |
+
|
| 560 |
+
def _validate_scalar(
|
| 561 |
+
self,
|
| 562 |
+
value,
|
| 563 |
+
*,
|
| 564 |
+
allow_listlike: bool = False,
|
| 565 |
+
unbox: bool = True,
|
| 566 |
+
):
|
| 567 |
+
"""
|
| 568 |
+
Validate that the input value can be cast to our scalar_type.
|
| 569 |
+
|
| 570 |
+
Parameters
|
| 571 |
+
----------
|
| 572 |
+
value : object
|
| 573 |
+
allow_listlike: bool, default False
|
| 574 |
+
When raising an exception, whether the message should say
|
| 575 |
+
listlike inputs are allowed.
|
| 576 |
+
unbox : bool, default True
|
| 577 |
+
Whether to unbox the result before returning. Note: unbox=False
|
| 578 |
+
skips the setitem compatibility check.
|
| 579 |
+
|
| 580 |
+
Returns
|
| 581 |
+
-------
|
| 582 |
+
self._scalar_type or NaT
|
| 583 |
+
"""
|
| 584 |
+
if isinstance(value, self._scalar_type):
|
| 585 |
+
pass
|
| 586 |
+
|
| 587 |
+
elif isinstance(value, str):
|
| 588 |
+
# NB: Careful about tzawareness
|
| 589 |
+
try:
|
| 590 |
+
value = self._scalar_from_string(value)
|
| 591 |
+
except ValueError as err:
|
| 592 |
+
msg = self._validation_error_message(value, allow_listlike)
|
| 593 |
+
raise TypeError(msg) from err
|
| 594 |
+
|
| 595 |
+
elif is_valid_na_for_dtype(value, self.dtype):
|
| 596 |
+
# GH#18295
|
| 597 |
+
value = NaT
|
| 598 |
+
|
| 599 |
+
elif isna(value):
|
| 600 |
+
# if we are dt64tz and value is dt64("NaT"), dont cast to NaT,
|
| 601 |
+
# or else we'll fail to raise in _unbox_scalar
|
| 602 |
+
msg = self._validation_error_message(value, allow_listlike)
|
| 603 |
+
raise TypeError(msg)
|
| 604 |
+
|
| 605 |
+
elif isinstance(value, self._recognized_scalars):
|
| 606 |
+
# error: Argument 1 to "Timestamp" has incompatible type "object"; expected
|
| 607 |
+
# "integer[Any] | float | str | date | datetime | datetime64"
|
| 608 |
+
value = self._scalar_type(value) # type: ignore[arg-type]
|
| 609 |
+
|
| 610 |
+
else:
|
| 611 |
+
msg = self._validation_error_message(value, allow_listlike)
|
| 612 |
+
raise TypeError(msg)
|
| 613 |
+
|
| 614 |
+
if not unbox:
|
| 615 |
+
# NB: In general NDArrayBackedExtensionArray will unbox here;
|
| 616 |
+
# this option exists to prevent a performance hit in
|
| 617 |
+
# TimedeltaIndex.get_loc
|
| 618 |
+
return value
|
| 619 |
+
return self._unbox_scalar(value)
|
| 620 |
+
|
| 621 |
+
def _validation_error_message(self, value, allow_listlike: bool = False) -> str:
|
| 622 |
+
"""
|
| 623 |
+
Construct an exception message on validation error.
|
| 624 |
+
|
| 625 |
+
Some methods allow only scalar inputs, while others allow either scalar
|
| 626 |
+
or listlike.
|
| 627 |
+
|
| 628 |
+
Parameters
|
| 629 |
+
----------
|
| 630 |
+
allow_listlike: bool, default False
|
| 631 |
+
|
| 632 |
+
Returns
|
| 633 |
+
-------
|
| 634 |
+
str
|
| 635 |
+
"""
|
| 636 |
+
if hasattr(value, "dtype") and getattr(value, "ndim", 0) > 0:
|
| 637 |
+
msg_got = f"{value.dtype} array"
|
| 638 |
+
else:
|
| 639 |
+
msg_got = f"'{type(value).__name__}'"
|
| 640 |
+
if allow_listlike:
|
| 641 |
+
msg = (
|
| 642 |
+
f"value should be a '{self._scalar_type.__name__}', 'NaT', "
|
| 643 |
+
f"or array of those. Got {msg_got} instead."
|
| 644 |
+
)
|
| 645 |
+
else:
|
| 646 |
+
msg = (
|
| 647 |
+
f"value should be a '{self._scalar_type.__name__}' or 'NaT'. "
|
| 648 |
+
f"Got {msg_got} instead."
|
| 649 |
+
)
|
| 650 |
+
return msg
|
| 651 |
+
|
| 652 |
+
def _validate_listlike(self, value, allow_object: bool = False):
|
| 653 |
+
if isinstance(value, type(self)):
|
| 654 |
+
if self.dtype.kind in "mM" and not allow_object:
|
| 655 |
+
# error: "DatetimeLikeArrayMixin" has no attribute "as_unit"
|
| 656 |
+
value = value.as_unit(self.unit, round_ok=False) # type: ignore[attr-defined]
|
| 657 |
+
return value
|
| 658 |
+
|
| 659 |
+
if isinstance(value, list) and len(value) == 0:
|
| 660 |
+
# We treat empty list as our own dtype.
|
| 661 |
+
return type(self)._from_sequence([], dtype=self.dtype)
|
| 662 |
+
|
| 663 |
+
if hasattr(value, "dtype") and value.dtype == object:
|
| 664 |
+
# `array` below won't do inference if value is an Index or Series.
|
| 665 |
+
# so do so here. in the Index case, inferred_type may be cached.
|
| 666 |
+
if lib.infer_dtype(value) in self._infer_matches:
|
| 667 |
+
try:
|
| 668 |
+
value = type(self)._from_sequence(value)
|
| 669 |
+
except (ValueError, TypeError):
|
| 670 |
+
if allow_object:
|
| 671 |
+
return value
|
| 672 |
+
msg = self._validation_error_message(value, True)
|
| 673 |
+
raise TypeError(msg)
|
| 674 |
+
|
| 675 |
+
# Do type inference if necessary up front (after unpacking
|
| 676 |
+
# NumpyExtensionArray)
|
| 677 |
+
# e.g. we passed PeriodIndex.values and got an ndarray of Periods
|
| 678 |
+
value = extract_array(value, extract_numpy=True)
|
| 679 |
+
value = pd_array(value)
|
| 680 |
+
value = extract_array(value, extract_numpy=True)
|
| 681 |
+
|
| 682 |
+
if is_all_strings(value):
|
| 683 |
+
# We got a StringArray
|
| 684 |
+
try:
|
| 685 |
+
# TODO: Could use from_sequence_of_strings if implemented
|
| 686 |
+
# Note: passing dtype is necessary for PeriodArray tests
|
| 687 |
+
value = type(self)._from_sequence(value, dtype=self.dtype)
|
| 688 |
+
except ValueError:
|
| 689 |
+
pass
|
| 690 |
+
|
| 691 |
+
if isinstance(value.dtype, CategoricalDtype):
|
| 692 |
+
# e.g. we have a Categorical holding self.dtype
|
| 693 |
+
if value.categories.dtype == self.dtype:
|
| 694 |
+
# TODO: do we need equal dtype or just comparable?
|
| 695 |
+
value = value._internal_get_values()
|
| 696 |
+
value = extract_array(value, extract_numpy=True)
|
| 697 |
+
|
| 698 |
+
if allow_object and is_object_dtype(value.dtype):
|
| 699 |
+
pass
|
| 700 |
+
|
| 701 |
+
elif not type(self)._is_recognized_dtype(value.dtype):
|
| 702 |
+
msg = self._validation_error_message(value, True)
|
| 703 |
+
raise TypeError(msg)
|
| 704 |
+
|
| 705 |
+
if self.dtype.kind in "mM" and not allow_object:
|
| 706 |
+
# error: "DatetimeLikeArrayMixin" has no attribute "as_unit"
|
| 707 |
+
value = value.as_unit(self.unit, round_ok=False) # type: ignore[attr-defined]
|
| 708 |
+
return value
|
| 709 |
+
|
| 710 |
+
def _validate_setitem_value(self, value):
|
| 711 |
+
if is_list_like(value):
|
| 712 |
+
value = self._validate_listlike(value)
|
| 713 |
+
else:
|
| 714 |
+
return self._validate_scalar(value, allow_listlike=True)
|
| 715 |
+
|
| 716 |
+
return self._unbox(value)
|
| 717 |
+
|
| 718 |
+
@final
|
| 719 |
+
def _unbox(self, other) -> np.int64 | np.datetime64 | np.timedelta64 | np.ndarray:
|
| 720 |
+
"""
|
| 721 |
+
Unbox either a scalar with _unbox_scalar or an instance of our own type.
|
| 722 |
+
"""
|
| 723 |
+
if lib.is_scalar(other):
|
| 724 |
+
other = self._unbox_scalar(other)
|
| 725 |
+
else:
|
| 726 |
+
# same type as self
|
| 727 |
+
self._check_compatible_with(other)
|
| 728 |
+
other = other._ndarray
|
| 729 |
+
return other
|
| 730 |
+
|
| 731 |
+
# ------------------------------------------------------------------
|
| 732 |
+
# Additional array methods
|
| 733 |
+
# These are not part of the EA API, but we implement them because
|
| 734 |
+
# pandas assumes they're there.
|
| 735 |
+
|
| 736 |
+
@ravel_compat
|
| 737 |
+
def map(self, mapper, na_action=None):
|
| 738 |
+
from pandas import Index
|
| 739 |
+
|
| 740 |
+
result = map_array(self, mapper, na_action=na_action)
|
| 741 |
+
result = Index(result)
|
| 742 |
+
|
| 743 |
+
if isinstance(result, ABCMultiIndex):
|
| 744 |
+
return result.to_numpy()
|
| 745 |
+
else:
|
| 746 |
+
return result.array
|
| 747 |
+
|
| 748 |
+
def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
|
| 749 |
+
"""
|
| 750 |
+
Compute boolean array of whether each value is found in the
|
| 751 |
+
passed set of values.
|
| 752 |
+
|
| 753 |
+
Parameters
|
| 754 |
+
----------
|
| 755 |
+
values : np.ndarray or ExtensionArray
|
| 756 |
+
|
| 757 |
+
Returns
|
| 758 |
+
-------
|
| 759 |
+
ndarray[bool]
|
| 760 |
+
"""
|
| 761 |
+
if values.dtype.kind in "fiuc":
|
| 762 |
+
# TODO: de-duplicate with equals, validate_comparison_value
|
| 763 |
+
return np.zeros(self.shape, dtype=bool)
|
| 764 |
+
|
| 765 |
+
values = ensure_wrapped_if_datetimelike(values)
|
| 766 |
+
|
| 767 |
+
if not isinstance(values, type(self)):
|
| 768 |
+
inferable = [
|
| 769 |
+
"timedelta",
|
| 770 |
+
"timedelta64",
|
| 771 |
+
"datetime",
|
| 772 |
+
"datetime64",
|
| 773 |
+
"date",
|
| 774 |
+
"period",
|
| 775 |
+
]
|
| 776 |
+
if values.dtype == object:
|
| 777 |
+
values = lib.maybe_convert_objects(
|
| 778 |
+
values, # type: ignore[arg-type]
|
| 779 |
+
convert_non_numeric=True,
|
| 780 |
+
dtype_if_all_nat=self.dtype,
|
| 781 |
+
)
|
| 782 |
+
if values.dtype != object:
|
| 783 |
+
return self.isin(values)
|
| 784 |
+
|
| 785 |
+
inferred = lib.infer_dtype(values, skipna=False)
|
| 786 |
+
if inferred not in inferable:
|
| 787 |
+
if inferred == "string":
|
| 788 |
+
pass
|
| 789 |
+
|
| 790 |
+
elif "mixed" in inferred:
|
| 791 |
+
return isin(self.astype(object), values)
|
| 792 |
+
else:
|
| 793 |
+
return np.zeros(self.shape, dtype=bool)
|
| 794 |
+
|
| 795 |
+
try:
|
| 796 |
+
values = type(self)._from_sequence(values)
|
| 797 |
+
except ValueError:
|
| 798 |
+
return isin(self.astype(object), values)
|
| 799 |
+
else:
|
| 800 |
+
warnings.warn(
|
| 801 |
+
# GH#53111
|
| 802 |
+
f"The behavior of 'isin' with dtype={self.dtype} and "
|
| 803 |
+
"castable values (e.g. strings) is deprecated. In a "
|
| 804 |
+
"future version, these will not be considered matching "
|
| 805 |
+
"by isin. Explicitly cast to the appropriate dtype before "
|
| 806 |
+
"calling isin instead.",
|
| 807 |
+
FutureWarning,
|
| 808 |
+
stacklevel=find_stack_level(),
|
| 809 |
+
)
|
| 810 |
+
|
| 811 |
+
if self.dtype.kind in "mM":
|
| 812 |
+
self = cast("DatetimeArray | TimedeltaArray", self)
|
| 813 |
+
# error: Item "ExtensionArray" of "ExtensionArray | ndarray[Any, Any]"
|
| 814 |
+
# has no attribute "as_unit"
|
| 815 |
+
values = values.as_unit(self.unit) # type: ignore[union-attr]
|
| 816 |
+
|
| 817 |
+
try:
|
| 818 |
+
# error: Argument 1 to "_check_compatible_with" of "DatetimeLikeArrayMixin"
|
| 819 |
+
# has incompatible type "ExtensionArray | ndarray[Any, Any]"; expected
|
| 820 |
+
# "Period | Timestamp | Timedelta | NaTType"
|
| 821 |
+
self._check_compatible_with(values) # type: ignore[arg-type]
|
| 822 |
+
except (TypeError, ValueError):
|
| 823 |
+
# Includes tzawareness mismatch and IncompatibleFrequencyError
|
| 824 |
+
return np.zeros(self.shape, dtype=bool)
|
| 825 |
+
|
| 826 |
+
# error: Item "ExtensionArray" of "ExtensionArray | ndarray[Any, Any]"
|
| 827 |
+
# has no attribute "asi8"
|
| 828 |
+
return isin(self.asi8, values.asi8) # type: ignore[union-attr]
|
| 829 |
+
|
| 830 |
+
# ------------------------------------------------------------------
|
| 831 |
+
# Null Handling
|
| 832 |
+
|
| 833 |
+
def isna(self) -> npt.NDArray[np.bool_]:
|
| 834 |
+
return self._isnan
|
| 835 |
+
|
| 836 |
+
@property # NB: override with cache_readonly in immutable subclasses
|
| 837 |
+
def _isnan(self) -> npt.NDArray[np.bool_]:
|
| 838 |
+
"""
|
| 839 |
+
return if each value is nan
|
| 840 |
+
"""
|
| 841 |
+
return self.asi8 == iNaT
|
| 842 |
+
|
| 843 |
+
@property # NB: override with cache_readonly in immutable subclasses
|
| 844 |
+
def _hasna(self) -> bool:
|
| 845 |
+
"""
|
| 846 |
+
return if I have any nans; enables various perf speedups
|
| 847 |
+
"""
|
| 848 |
+
return bool(self._isnan.any())
|
| 849 |
+
|
| 850 |
+
def _maybe_mask_results(
|
| 851 |
+
self, result: np.ndarray, fill_value=iNaT, convert=None
|
| 852 |
+
) -> np.ndarray:
|
| 853 |
+
"""
|
| 854 |
+
Parameters
|
| 855 |
+
----------
|
| 856 |
+
result : np.ndarray
|
| 857 |
+
fill_value : object, default iNaT
|
| 858 |
+
convert : str, dtype or None
|
| 859 |
+
|
| 860 |
+
Returns
|
| 861 |
+
-------
|
| 862 |
+
result : ndarray with values replace by the fill_value
|
| 863 |
+
|
| 864 |
+
mask the result if needed, convert to the provided dtype if its not
|
| 865 |
+
None
|
| 866 |
+
|
| 867 |
+
This is an internal routine.
|
| 868 |
+
"""
|
| 869 |
+
if self._hasna:
|
| 870 |
+
if convert:
|
| 871 |
+
result = result.astype(convert)
|
| 872 |
+
if fill_value is None:
|
| 873 |
+
fill_value = np.nan
|
| 874 |
+
np.putmask(result, self._isnan, fill_value)
|
| 875 |
+
return result
|
| 876 |
+
|
| 877 |
+
# ------------------------------------------------------------------
|
| 878 |
+
# Frequency Properties/Methods
|
| 879 |
+
|
| 880 |
+
@property
|
| 881 |
+
def freqstr(self) -> str | None:
|
| 882 |
+
"""
|
| 883 |
+
Return the frequency object as a string if it's set, otherwise None.
|
| 884 |
+
|
| 885 |
+
Examples
|
| 886 |
+
--------
|
| 887 |
+
For DatetimeIndex:
|
| 888 |
+
|
| 889 |
+
>>> idx = pd.DatetimeIndex(["1/1/2020 10:00:00+00:00"], freq="D")
|
| 890 |
+
>>> idx.freqstr
|
| 891 |
+
'D'
|
| 892 |
+
|
| 893 |
+
The frequency can be inferred if there are more than 2 points:
|
| 894 |
+
|
| 895 |
+
>>> idx = pd.DatetimeIndex(["2018-01-01", "2018-01-03", "2018-01-05"],
|
| 896 |
+
... freq="infer")
|
| 897 |
+
>>> idx.freqstr
|
| 898 |
+
'2D'
|
| 899 |
+
|
| 900 |
+
For PeriodIndex:
|
| 901 |
+
|
| 902 |
+
>>> idx = pd.PeriodIndex(["2023-1", "2023-2", "2023-3"], freq="M")
|
| 903 |
+
>>> idx.freqstr
|
| 904 |
+
'M'
|
| 905 |
+
"""
|
| 906 |
+
if self.freq is None:
|
| 907 |
+
return None
|
| 908 |
+
return self.freq.freqstr
|
| 909 |
+
|
| 910 |
+
@property # NB: override with cache_readonly in immutable subclasses
|
| 911 |
+
def inferred_freq(self) -> str | None:
|
| 912 |
+
"""
|
| 913 |
+
Tries to return a string representing a frequency generated by infer_freq.
|
| 914 |
+
|
| 915 |
+
Returns None if it can't autodetect the frequency.
|
| 916 |
+
|
| 917 |
+
Examples
|
| 918 |
+
--------
|
| 919 |
+
For DatetimeIndex:
|
| 920 |
+
|
| 921 |
+
>>> idx = pd.DatetimeIndex(["2018-01-01", "2018-01-03", "2018-01-05"])
|
| 922 |
+
>>> idx.inferred_freq
|
| 923 |
+
'2D'
|
| 924 |
+
|
| 925 |
+
For TimedeltaIndex:
|
| 926 |
+
|
| 927 |
+
>>> tdelta_idx = pd.to_timedelta(["0 days", "10 days", "20 days"])
|
| 928 |
+
>>> tdelta_idx
|
| 929 |
+
TimedeltaIndex(['0 days', '10 days', '20 days'],
|
| 930 |
+
dtype='timedelta64[ns]', freq=None)
|
| 931 |
+
>>> tdelta_idx.inferred_freq
|
| 932 |
+
'10D'
|
| 933 |
+
"""
|
| 934 |
+
if self.ndim != 1:
|
| 935 |
+
return None
|
| 936 |
+
try:
|
| 937 |
+
return frequencies.infer_freq(self)
|
| 938 |
+
except ValueError:
|
| 939 |
+
return None
|
| 940 |
+
|
| 941 |
+
@property # NB: override with cache_readonly in immutable subclasses
|
| 942 |
+
def _resolution_obj(self) -> Resolution | None:
|
| 943 |
+
freqstr = self.freqstr
|
| 944 |
+
if freqstr is None:
|
| 945 |
+
return None
|
| 946 |
+
try:
|
| 947 |
+
return Resolution.get_reso_from_freqstr(freqstr)
|
| 948 |
+
except KeyError:
|
| 949 |
+
return None
|
| 950 |
+
|
| 951 |
+
@property # NB: override with cache_readonly in immutable subclasses
|
| 952 |
+
def resolution(self) -> str:
|
| 953 |
+
"""
|
| 954 |
+
Returns day, hour, minute, second, millisecond or microsecond
|
| 955 |
+
"""
|
| 956 |
+
# error: Item "None" of "Optional[Any]" has no attribute "attrname"
|
| 957 |
+
return self._resolution_obj.attrname # type: ignore[union-attr]
|
| 958 |
+
|
| 959 |
+
# monotonicity/uniqueness properties are called via frequencies.infer_freq,
|
| 960 |
+
# see GH#23789
|
| 961 |
+
|
| 962 |
+
@property
|
| 963 |
+
def _is_monotonic_increasing(self) -> bool:
|
| 964 |
+
return algos.is_monotonic(self.asi8, timelike=True)[0]
|
| 965 |
+
|
| 966 |
+
@property
|
| 967 |
+
def _is_monotonic_decreasing(self) -> bool:
|
| 968 |
+
return algos.is_monotonic(self.asi8, timelike=True)[1]
|
| 969 |
+
|
| 970 |
+
@property
|
| 971 |
+
def _is_unique(self) -> bool:
|
| 972 |
+
return len(unique1d(self.asi8.ravel("K"))) == self.size
|
| 973 |
+
|
| 974 |
+
# ------------------------------------------------------------------
|
| 975 |
+
# Arithmetic Methods
|
| 976 |
+
|
| 977 |
+
def _cmp_method(self, other, op):
|
| 978 |
+
if self.ndim > 1 and getattr(other, "shape", None) == self.shape:
|
| 979 |
+
# TODO: handle 2D-like listlikes
|
| 980 |
+
return op(self.ravel(), other.ravel()).reshape(self.shape)
|
| 981 |
+
|
| 982 |
+
try:
|
| 983 |
+
other = self._validate_comparison_value(other)
|
| 984 |
+
except InvalidComparison:
|
| 985 |
+
return invalid_comparison(self, other, op)
|
| 986 |
+
|
| 987 |
+
dtype = getattr(other, "dtype", None)
|
| 988 |
+
if is_object_dtype(dtype):
|
| 989 |
+
# We have to use comp_method_OBJECT_ARRAY instead of numpy
|
| 990 |
+
# comparison otherwise it would raise when comparing to None
|
| 991 |
+
result = ops.comp_method_OBJECT_ARRAY(
|
| 992 |
+
op, np.asarray(self.astype(object)), other
|
| 993 |
+
)
|
| 994 |
+
return result
|
| 995 |
+
if other is NaT:
|
| 996 |
+
if op is operator.ne:
|
| 997 |
+
result = np.ones(self.shape, dtype=bool)
|
| 998 |
+
else:
|
| 999 |
+
result = np.zeros(self.shape, dtype=bool)
|
| 1000 |
+
return result
|
| 1001 |
+
|
| 1002 |
+
if not isinstance(self.dtype, PeriodDtype):
|
| 1003 |
+
self = cast(TimelikeOps, self)
|
| 1004 |
+
if self._creso != other._creso:
|
| 1005 |
+
if not isinstance(other, type(self)):
|
| 1006 |
+
# i.e. Timedelta/Timestamp, cast to ndarray and let
|
| 1007 |
+
# compare_mismatched_resolutions handle broadcasting
|
| 1008 |
+
try:
|
| 1009 |
+
# GH#52080 see if we can losslessly cast to shared unit
|
| 1010 |
+
other = other.as_unit(self.unit, round_ok=False)
|
| 1011 |
+
except ValueError:
|
| 1012 |
+
other_arr = np.array(other.asm8)
|
| 1013 |
+
return compare_mismatched_resolutions(
|
| 1014 |
+
self._ndarray, other_arr, op
|
| 1015 |
+
)
|
| 1016 |
+
else:
|
| 1017 |
+
other_arr = other._ndarray
|
| 1018 |
+
return compare_mismatched_resolutions(self._ndarray, other_arr, op)
|
| 1019 |
+
|
| 1020 |
+
other_vals = self._unbox(other)
|
| 1021 |
+
# GH#37462 comparison on i8 values is almost 2x faster than M8/m8
|
| 1022 |
+
result = op(self._ndarray.view("i8"), other_vals.view("i8"))
|
| 1023 |
+
|
| 1024 |
+
o_mask = isna(other)
|
| 1025 |
+
mask = self._isnan | o_mask
|
| 1026 |
+
if mask.any():
|
| 1027 |
+
nat_result = op is operator.ne
|
| 1028 |
+
np.putmask(result, mask, nat_result)
|
| 1029 |
+
|
| 1030 |
+
return result
|
| 1031 |
+
|
| 1032 |
+
# pow is invalid for all three subclasses; TimedeltaArray will override
|
| 1033 |
+
# the multiplication and division ops
|
| 1034 |
+
__pow__ = _make_unpacked_invalid_op("__pow__")
|
| 1035 |
+
__rpow__ = _make_unpacked_invalid_op("__rpow__")
|
| 1036 |
+
__mul__ = _make_unpacked_invalid_op("__mul__")
|
| 1037 |
+
__rmul__ = _make_unpacked_invalid_op("__rmul__")
|
| 1038 |
+
__truediv__ = _make_unpacked_invalid_op("__truediv__")
|
| 1039 |
+
__rtruediv__ = _make_unpacked_invalid_op("__rtruediv__")
|
| 1040 |
+
__floordiv__ = _make_unpacked_invalid_op("__floordiv__")
|
| 1041 |
+
__rfloordiv__ = _make_unpacked_invalid_op("__rfloordiv__")
|
| 1042 |
+
__mod__ = _make_unpacked_invalid_op("__mod__")
|
| 1043 |
+
__rmod__ = _make_unpacked_invalid_op("__rmod__")
|
| 1044 |
+
__divmod__ = _make_unpacked_invalid_op("__divmod__")
|
| 1045 |
+
__rdivmod__ = _make_unpacked_invalid_op("__rdivmod__")
|
| 1046 |
+
|
| 1047 |
+
@final
|
| 1048 |
+
def _get_i8_values_and_mask(
|
| 1049 |
+
self, other
|
| 1050 |
+
) -> tuple[int | npt.NDArray[np.int64], None | npt.NDArray[np.bool_]]:
|
| 1051 |
+
"""
|
| 1052 |
+
Get the int64 values and b_mask to pass to add_overflowsafe.
|
| 1053 |
+
"""
|
| 1054 |
+
if isinstance(other, Period):
|
| 1055 |
+
i8values = other.ordinal
|
| 1056 |
+
mask = None
|
| 1057 |
+
elif isinstance(other, (Timestamp, Timedelta)):
|
| 1058 |
+
i8values = other._value
|
| 1059 |
+
mask = None
|
| 1060 |
+
else:
|
| 1061 |
+
# PeriodArray, DatetimeArray, TimedeltaArray
|
| 1062 |
+
mask = other._isnan
|
| 1063 |
+
i8values = other.asi8
|
| 1064 |
+
return i8values, mask
|
| 1065 |
+
|
| 1066 |
+
@final
|
| 1067 |
+
def _get_arithmetic_result_freq(self, other) -> BaseOffset | None:
|
| 1068 |
+
"""
|
| 1069 |
+
Check if we can preserve self.freq in addition or subtraction.
|
| 1070 |
+
"""
|
| 1071 |
+
# Adding or subtracting a Timedelta/Timestamp scalar is freq-preserving
|
| 1072 |
+
# whenever self.freq is a Tick
|
| 1073 |
+
if isinstance(self.dtype, PeriodDtype):
|
| 1074 |
+
return self.freq
|
| 1075 |
+
elif not lib.is_scalar(other):
|
| 1076 |
+
return None
|
| 1077 |
+
elif isinstance(self.freq, Tick):
|
| 1078 |
+
# In these cases
|
| 1079 |
+
return self.freq
|
| 1080 |
+
return None
|
| 1081 |
+
|
| 1082 |
+
@final
|
| 1083 |
+
def _add_datetimelike_scalar(self, other) -> DatetimeArray:
|
| 1084 |
+
if not lib.is_np_dtype(self.dtype, "m"):
|
| 1085 |
+
raise TypeError(
|
| 1086 |
+
f"cannot add {type(self).__name__} and {type(other).__name__}"
|
| 1087 |
+
)
|
| 1088 |
+
|
| 1089 |
+
self = cast("TimedeltaArray", self)
|
| 1090 |
+
|
| 1091 |
+
from pandas.core.arrays import DatetimeArray
|
| 1092 |
+
from pandas.core.arrays.datetimes import tz_to_dtype
|
| 1093 |
+
|
| 1094 |
+
assert other is not NaT
|
| 1095 |
+
if isna(other):
|
| 1096 |
+
# i.e. np.datetime64("NaT")
|
| 1097 |
+
# In this case we specifically interpret NaT as a datetime, not
|
| 1098 |
+
# the timedelta interpretation we would get by returning self + NaT
|
| 1099 |
+
result = self._ndarray + NaT.to_datetime64().astype(f"M8[{self.unit}]")
|
| 1100 |
+
# Preserve our resolution
|
| 1101 |
+
return DatetimeArray._simple_new(result, dtype=result.dtype)
|
| 1102 |
+
|
| 1103 |
+
other = Timestamp(other)
|
| 1104 |
+
self, other = self._ensure_matching_resos(other)
|
| 1105 |
+
self = cast("TimedeltaArray", self)
|
| 1106 |
+
|
| 1107 |
+
other_i8, o_mask = self._get_i8_values_and_mask(other)
|
| 1108 |
+
result = add_overflowsafe(self.asi8, np.asarray(other_i8, dtype="i8"))
|
| 1109 |
+
res_values = result.view(f"M8[{self.unit}]")
|
| 1110 |
+
|
| 1111 |
+
dtype = tz_to_dtype(tz=other.tz, unit=self.unit)
|
| 1112 |
+
res_values = result.view(f"M8[{self.unit}]")
|
| 1113 |
+
new_freq = self._get_arithmetic_result_freq(other)
|
| 1114 |
+
return DatetimeArray._simple_new(res_values, dtype=dtype, freq=new_freq)
|
| 1115 |
+
|
| 1116 |
+
@final
|
| 1117 |
+
def _add_datetime_arraylike(self, other: DatetimeArray) -> DatetimeArray:
|
| 1118 |
+
if not lib.is_np_dtype(self.dtype, "m"):
|
| 1119 |
+
raise TypeError(
|
| 1120 |
+
f"cannot add {type(self).__name__} and {type(other).__name__}"
|
| 1121 |
+
)
|
| 1122 |
+
|
| 1123 |
+
# defer to DatetimeArray.__add__
|
| 1124 |
+
return other + self
|
| 1125 |
+
|
| 1126 |
+
@final
|
| 1127 |
+
def _sub_datetimelike_scalar(
|
| 1128 |
+
self, other: datetime | np.datetime64
|
| 1129 |
+
) -> TimedeltaArray:
|
| 1130 |
+
if self.dtype.kind != "M":
|
| 1131 |
+
raise TypeError(f"cannot subtract a datelike from a {type(self).__name__}")
|
| 1132 |
+
|
| 1133 |
+
self = cast("DatetimeArray", self)
|
| 1134 |
+
# subtract a datetime from myself, yielding a ndarray[timedelta64[ns]]
|
| 1135 |
+
|
| 1136 |
+
if isna(other):
|
| 1137 |
+
# i.e. np.datetime64("NaT")
|
| 1138 |
+
return self - NaT
|
| 1139 |
+
|
| 1140 |
+
ts = Timestamp(other)
|
| 1141 |
+
|
| 1142 |
+
self, ts = self._ensure_matching_resos(ts)
|
| 1143 |
+
return self._sub_datetimelike(ts)
|
| 1144 |
+
|
| 1145 |
+
@final
|
| 1146 |
+
def _sub_datetime_arraylike(self, other: DatetimeArray) -> TimedeltaArray:
|
| 1147 |
+
if self.dtype.kind != "M":
|
| 1148 |
+
raise TypeError(f"cannot subtract a datelike from a {type(self).__name__}")
|
| 1149 |
+
|
| 1150 |
+
if len(self) != len(other):
|
| 1151 |
+
raise ValueError("cannot add indices of unequal length")
|
| 1152 |
+
|
| 1153 |
+
self = cast("DatetimeArray", self)
|
| 1154 |
+
|
| 1155 |
+
self, other = self._ensure_matching_resos(other)
|
| 1156 |
+
return self._sub_datetimelike(other)
|
| 1157 |
+
|
| 1158 |
+
@final
|
| 1159 |
+
def _sub_datetimelike(self, other: Timestamp | DatetimeArray) -> TimedeltaArray:
|
| 1160 |
+
self = cast("DatetimeArray", self)
|
| 1161 |
+
|
| 1162 |
+
from pandas.core.arrays import TimedeltaArray
|
| 1163 |
+
|
| 1164 |
+
try:
|
| 1165 |
+
self._assert_tzawareness_compat(other)
|
| 1166 |
+
except TypeError as err:
|
| 1167 |
+
new_message = str(err).replace("compare", "subtract")
|
| 1168 |
+
raise type(err)(new_message) from err
|
| 1169 |
+
|
| 1170 |
+
other_i8, o_mask = self._get_i8_values_and_mask(other)
|
| 1171 |
+
res_values = add_overflowsafe(self.asi8, np.asarray(-other_i8, dtype="i8"))
|
| 1172 |
+
res_m8 = res_values.view(f"timedelta64[{self.unit}]")
|
| 1173 |
+
|
| 1174 |
+
new_freq = self._get_arithmetic_result_freq(other)
|
| 1175 |
+
new_freq = cast("Tick | None", new_freq)
|
| 1176 |
+
return TimedeltaArray._simple_new(res_m8, dtype=res_m8.dtype, freq=new_freq)
|
| 1177 |
+
|
| 1178 |
+
@final
|
| 1179 |
+
def _add_period(self, other: Period) -> PeriodArray:
|
| 1180 |
+
if not lib.is_np_dtype(self.dtype, "m"):
|
| 1181 |
+
raise TypeError(f"cannot add Period to a {type(self).__name__}")
|
| 1182 |
+
|
| 1183 |
+
# We will wrap in a PeriodArray and defer to the reversed operation
|
| 1184 |
+
from pandas.core.arrays.period import PeriodArray
|
| 1185 |
+
|
| 1186 |
+
i8vals = np.broadcast_to(other.ordinal, self.shape)
|
| 1187 |
+
dtype = PeriodDtype(other.freq)
|
| 1188 |
+
parr = PeriodArray(i8vals, dtype=dtype)
|
| 1189 |
+
return parr + self
|
| 1190 |
+
|
| 1191 |
+
def _add_offset(self, offset):
|
| 1192 |
+
raise AbstractMethodError(self)
|
| 1193 |
+
|
| 1194 |
+
def _add_timedeltalike_scalar(self, other):
|
| 1195 |
+
"""
|
| 1196 |
+
Add a delta of a timedeltalike
|
| 1197 |
+
|
| 1198 |
+
Returns
|
| 1199 |
+
-------
|
| 1200 |
+
Same type as self
|
| 1201 |
+
"""
|
| 1202 |
+
if isna(other):
|
| 1203 |
+
# i.e np.timedelta64("NaT")
|
| 1204 |
+
new_values = np.empty(self.shape, dtype="i8").view(self._ndarray.dtype)
|
| 1205 |
+
new_values.fill(iNaT)
|
| 1206 |
+
return type(self)._simple_new(new_values, dtype=self.dtype)
|
| 1207 |
+
|
| 1208 |
+
# PeriodArray overrides, so we only get here with DTA/TDA
|
| 1209 |
+
self = cast("DatetimeArray | TimedeltaArray", self)
|
| 1210 |
+
other = Timedelta(other)
|
| 1211 |
+
self, other = self._ensure_matching_resos(other)
|
| 1212 |
+
return self._add_timedeltalike(other)
|
| 1213 |
+
|
| 1214 |
+
def _add_timedelta_arraylike(self, other: TimedeltaArray):
|
| 1215 |
+
"""
|
| 1216 |
+
Add a delta of a TimedeltaIndex
|
| 1217 |
+
|
| 1218 |
+
Returns
|
| 1219 |
+
-------
|
| 1220 |
+
Same type as self
|
| 1221 |
+
"""
|
| 1222 |
+
# overridden by PeriodArray
|
| 1223 |
+
|
| 1224 |
+
if len(self) != len(other):
|
| 1225 |
+
raise ValueError("cannot add indices of unequal length")
|
| 1226 |
+
|
| 1227 |
+
self = cast("DatetimeArray | TimedeltaArray", self)
|
| 1228 |
+
|
| 1229 |
+
self, other = self._ensure_matching_resos(other)
|
| 1230 |
+
return self._add_timedeltalike(other)
|
| 1231 |
+
|
| 1232 |
+
@final
|
| 1233 |
+
def _add_timedeltalike(self, other: Timedelta | TimedeltaArray):
|
| 1234 |
+
self = cast("DatetimeArray | TimedeltaArray", self)
|
| 1235 |
+
|
| 1236 |
+
other_i8, o_mask = self._get_i8_values_and_mask(other)
|
| 1237 |
+
new_values = add_overflowsafe(self.asi8, np.asarray(other_i8, dtype="i8"))
|
| 1238 |
+
res_values = new_values.view(self._ndarray.dtype)
|
| 1239 |
+
|
| 1240 |
+
new_freq = self._get_arithmetic_result_freq(other)
|
| 1241 |
+
|
| 1242 |
+
# error: Argument "dtype" to "_simple_new" of "DatetimeArray" has
|
| 1243 |
+
# incompatible type "Union[dtype[datetime64], DatetimeTZDtype,
|
| 1244 |
+
# dtype[timedelta64]]"; expected "Union[dtype[datetime64], DatetimeTZDtype]"
|
| 1245 |
+
return type(self)._simple_new(
|
| 1246 |
+
res_values, dtype=self.dtype, freq=new_freq # type: ignore[arg-type]
|
| 1247 |
+
)
|
| 1248 |
+
|
| 1249 |
+
@final
|
| 1250 |
+
def _add_nat(self):
|
| 1251 |
+
"""
|
| 1252 |
+
Add pd.NaT to self
|
| 1253 |
+
"""
|
| 1254 |
+
if isinstance(self.dtype, PeriodDtype):
|
| 1255 |
+
raise TypeError(
|
| 1256 |
+
f"Cannot add {type(self).__name__} and {type(NaT).__name__}"
|
| 1257 |
+
)
|
| 1258 |
+
self = cast("TimedeltaArray | DatetimeArray", self)
|
| 1259 |
+
|
| 1260 |
+
# GH#19124 pd.NaT is treated like a timedelta for both timedelta
|
| 1261 |
+
# and datetime dtypes
|
| 1262 |
+
result = np.empty(self.shape, dtype=np.int64)
|
| 1263 |
+
result.fill(iNaT)
|
| 1264 |
+
result = result.view(self._ndarray.dtype) # preserve reso
|
| 1265 |
+
# error: Argument "dtype" to "_simple_new" of "DatetimeArray" has
|
| 1266 |
+
# incompatible type "Union[dtype[timedelta64], dtype[datetime64],
|
| 1267 |
+
# DatetimeTZDtype]"; expected "Union[dtype[datetime64], DatetimeTZDtype]"
|
| 1268 |
+
return type(self)._simple_new(
|
| 1269 |
+
result, dtype=self.dtype, freq=None # type: ignore[arg-type]
|
| 1270 |
+
)
|
| 1271 |
+
|
| 1272 |
+
@final
|
| 1273 |
+
def _sub_nat(self):
|
| 1274 |
+
"""
|
| 1275 |
+
Subtract pd.NaT from self
|
| 1276 |
+
"""
|
| 1277 |
+
# GH#19124 Timedelta - datetime is not in general well-defined.
|
| 1278 |
+
# We make an exception for pd.NaT, which in this case quacks
|
| 1279 |
+
# like a timedelta.
|
| 1280 |
+
# For datetime64 dtypes by convention we treat NaT as a datetime, so
|
| 1281 |
+
# this subtraction returns a timedelta64 dtype.
|
| 1282 |
+
# For period dtype, timedelta64 is a close-enough return dtype.
|
| 1283 |
+
result = np.empty(self.shape, dtype=np.int64)
|
| 1284 |
+
result.fill(iNaT)
|
| 1285 |
+
if self.dtype.kind in "mM":
|
| 1286 |
+
# We can retain unit in dtype
|
| 1287 |
+
self = cast("DatetimeArray| TimedeltaArray", self)
|
| 1288 |
+
return result.view(f"timedelta64[{self.unit}]")
|
| 1289 |
+
else:
|
| 1290 |
+
return result.view("timedelta64[ns]")
|
| 1291 |
+
|
| 1292 |
+
@final
|
| 1293 |
+
def _sub_periodlike(self, other: Period | PeriodArray) -> npt.NDArray[np.object_]:
|
| 1294 |
+
# If the operation is well-defined, we return an object-dtype ndarray
|
| 1295 |
+
# of DateOffsets. Null entries are filled with pd.NaT
|
| 1296 |
+
if not isinstance(self.dtype, PeriodDtype):
|
| 1297 |
+
raise TypeError(
|
| 1298 |
+
f"cannot subtract {type(other).__name__} from {type(self).__name__}"
|
| 1299 |
+
)
|
| 1300 |
+
|
| 1301 |
+
self = cast("PeriodArray", self)
|
| 1302 |
+
self._check_compatible_with(other)
|
| 1303 |
+
|
| 1304 |
+
other_i8, o_mask = self._get_i8_values_and_mask(other)
|
| 1305 |
+
new_i8_data = add_overflowsafe(self.asi8, np.asarray(-other_i8, dtype="i8"))
|
| 1306 |
+
new_data = np.array([self.freq.base * x for x in new_i8_data])
|
| 1307 |
+
|
| 1308 |
+
if o_mask is None:
|
| 1309 |
+
# i.e. Period scalar
|
| 1310 |
+
mask = self._isnan
|
| 1311 |
+
else:
|
| 1312 |
+
# i.e. PeriodArray
|
| 1313 |
+
mask = self._isnan | o_mask
|
| 1314 |
+
new_data[mask] = NaT
|
| 1315 |
+
return new_data
|
| 1316 |
+
|
| 1317 |
+
@final
|
| 1318 |
+
def _addsub_object_array(self, other: npt.NDArray[np.object_], op):
|
| 1319 |
+
"""
|
| 1320 |
+
Add or subtract array-like of DateOffset objects
|
| 1321 |
+
|
| 1322 |
+
Parameters
|
| 1323 |
+
----------
|
| 1324 |
+
other : np.ndarray[object]
|
| 1325 |
+
op : {operator.add, operator.sub}
|
| 1326 |
+
|
| 1327 |
+
Returns
|
| 1328 |
+
-------
|
| 1329 |
+
np.ndarray[object]
|
| 1330 |
+
Except in fastpath case with length 1 where we operate on the
|
| 1331 |
+
contained scalar.
|
| 1332 |
+
"""
|
| 1333 |
+
assert op in [operator.add, operator.sub]
|
| 1334 |
+
if len(other) == 1 and self.ndim == 1:
|
| 1335 |
+
# Note: without this special case, we could annotate return type
|
| 1336 |
+
# as ndarray[object]
|
| 1337 |
+
# If both 1D then broadcasting is unambiguous
|
| 1338 |
+
return op(self, other[0])
|
| 1339 |
+
|
| 1340 |
+
warnings.warn(
|
| 1341 |
+
"Adding/subtracting object-dtype array to "
|
| 1342 |
+
f"{type(self).__name__} not vectorized.",
|
| 1343 |
+
PerformanceWarning,
|
| 1344 |
+
stacklevel=find_stack_level(),
|
| 1345 |
+
)
|
| 1346 |
+
|
| 1347 |
+
# Caller is responsible for broadcasting if necessary
|
| 1348 |
+
assert self.shape == other.shape, (self.shape, other.shape)
|
| 1349 |
+
|
| 1350 |
+
res_values = op(self.astype("O"), np.asarray(other))
|
| 1351 |
+
return res_values
|
| 1352 |
+
|
| 1353 |
+
def _accumulate(self, name: str, *, skipna: bool = True, **kwargs) -> Self:
|
| 1354 |
+
if name not in {"cummin", "cummax"}:
|
| 1355 |
+
raise TypeError(f"Accumulation {name} not supported for {type(self)}")
|
| 1356 |
+
|
| 1357 |
+
op = getattr(datetimelike_accumulations, name)
|
| 1358 |
+
result = op(self.copy(), skipna=skipna, **kwargs)
|
| 1359 |
+
|
| 1360 |
+
return type(self)._simple_new(result, dtype=self.dtype)
|
| 1361 |
+
|
| 1362 |
+
@unpack_zerodim_and_defer("__add__")
|
| 1363 |
+
def __add__(self, other):
|
| 1364 |
+
other_dtype = getattr(other, "dtype", None)
|
| 1365 |
+
other = ensure_wrapped_if_datetimelike(other)
|
| 1366 |
+
|
| 1367 |
+
# scalar others
|
| 1368 |
+
if other is NaT:
|
| 1369 |
+
result = self._add_nat()
|
| 1370 |
+
elif isinstance(other, (Tick, timedelta, np.timedelta64)):
|
| 1371 |
+
result = self._add_timedeltalike_scalar(other)
|
| 1372 |
+
elif isinstance(other, BaseOffset):
|
| 1373 |
+
# specifically _not_ a Tick
|
| 1374 |
+
result = self._add_offset(other)
|
| 1375 |
+
elif isinstance(other, (datetime, np.datetime64)):
|
| 1376 |
+
result = self._add_datetimelike_scalar(other)
|
| 1377 |
+
elif isinstance(other, Period) and lib.is_np_dtype(self.dtype, "m"):
|
| 1378 |
+
result = self._add_period(other)
|
| 1379 |
+
elif lib.is_integer(other):
|
| 1380 |
+
# This check must come after the check for np.timedelta64
|
| 1381 |
+
# as is_integer returns True for these
|
| 1382 |
+
if not isinstance(self.dtype, PeriodDtype):
|
| 1383 |
+
raise integer_op_not_supported(self)
|
| 1384 |
+
obj = cast("PeriodArray", self)
|
| 1385 |
+
result = obj._addsub_int_array_or_scalar(other * obj.dtype._n, operator.add)
|
| 1386 |
+
|
| 1387 |
+
# array-like others
|
| 1388 |
+
elif lib.is_np_dtype(other_dtype, "m"):
|
| 1389 |
+
# TimedeltaIndex, ndarray[timedelta64]
|
| 1390 |
+
result = self._add_timedelta_arraylike(other)
|
| 1391 |
+
elif is_object_dtype(other_dtype):
|
| 1392 |
+
# e.g. Array/Index of DateOffset objects
|
| 1393 |
+
result = self._addsub_object_array(other, operator.add)
|
| 1394 |
+
elif lib.is_np_dtype(other_dtype, "M") or isinstance(
|
| 1395 |
+
other_dtype, DatetimeTZDtype
|
| 1396 |
+
):
|
| 1397 |
+
# DatetimeIndex, ndarray[datetime64]
|
| 1398 |
+
return self._add_datetime_arraylike(other)
|
| 1399 |
+
elif is_integer_dtype(other_dtype):
|
| 1400 |
+
if not isinstance(self.dtype, PeriodDtype):
|
| 1401 |
+
raise integer_op_not_supported(self)
|
| 1402 |
+
obj = cast("PeriodArray", self)
|
| 1403 |
+
result = obj._addsub_int_array_or_scalar(other * obj.dtype._n, operator.add)
|
| 1404 |
+
else:
|
| 1405 |
+
# Includes Categorical, other ExtensionArrays
|
| 1406 |
+
# For PeriodDtype, if self is a TimedeltaArray and other is a
|
| 1407 |
+
# PeriodArray with a timedelta-like (i.e. Tick) freq, this
|
| 1408 |
+
# operation is valid. Defer to the PeriodArray implementation.
|
| 1409 |
+
# In remaining cases, this will end up raising TypeError.
|
| 1410 |
+
return NotImplemented
|
| 1411 |
+
|
| 1412 |
+
if isinstance(result, np.ndarray) and lib.is_np_dtype(result.dtype, "m"):
|
| 1413 |
+
from pandas.core.arrays import TimedeltaArray
|
| 1414 |
+
|
| 1415 |
+
return TimedeltaArray._from_sequence(result)
|
| 1416 |
+
return result
|
| 1417 |
+
|
| 1418 |
+
def __radd__(self, other):
|
| 1419 |
+
# alias for __add__
|
| 1420 |
+
return self.__add__(other)
|
| 1421 |
+
|
| 1422 |
+
@unpack_zerodim_and_defer("__sub__")
|
| 1423 |
+
def __sub__(self, other):
|
| 1424 |
+
other_dtype = getattr(other, "dtype", None)
|
| 1425 |
+
other = ensure_wrapped_if_datetimelike(other)
|
| 1426 |
+
|
| 1427 |
+
# scalar others
|
| 1428 |
+
if other is NaT:
|
| 1429 |
+
result = self._sub_nat()
|
| 1430 |
+
elif isinstance(other, (Tick, timedelta, np.timedelta64)):
|
| 1431 |
+
result = self._add_timedeltalike_scalar(-other)
|
| 1432 |
+
elif isinstance(other, BaseOffset):
|
| 1433 |
+
# specifically _not_ a Tick
|
| 1434 |
+
result = self._add_offset(-other)
|
| 1435 |
+
elif isinstance(other, (datetime, np.datetime64)):
|
| 1436 |
+
result = self._sub_datetimelike_scalar(other)
|
| 1437 |
+
elif lib.is_integer(other):
|
| 1438 |
+
# This check must come after the check for np.timedelta64
|
| 1439 |
+
# as is_integer returns True for these
|
| 1440 |
+
if not isinstance(self.dtype, PeriodDtype):
|
| 1441 |
+
raise integer_op_not_supported(self)
|
| 1442 |
+
obj = cast("PeriodArray", self)
|
| 1443 |
+
result = obj._addsub_int_array_or_scalar(other * obj.dtype._n, operator.sub)
|
| 1444 |
+
|
| 1445 |
+
elif isinstance(other, Period):
|
| 1446 |
+
result = self._sub_periodlike(other)
|
| 1447 |
+
|
| 1448 |
+
# array-like others
|
| 1449 |
+
elif lib.is_np_dtype(other_dtype, "m"):
|
| 1450 |
+
# TimedeltaIndex, ndarray[timedelta64]
|
| 1451 |
+
result = self._add_timedelta_arraylike(-other)
|
| 1452 |
+
elif is_object_dtype(other_dtype):
|
| 1453 |
+
# e.g. Array/Index of DateOffset objects
|
| 1454 |
+
result = self._addsub_object_array(other, operator.sub)
|
| 1455 |
+
elif lib.is_np_dtype(other_dtype, "M") or isinstance(
|
| 1456 |
+
other_dtype, DatetimeTZDtype
|
| 1457 |
+
):
|
| 1458 |
+
# DatetimeIndex, ndarray[datetime64]
|
| 1459 |
+
result = self._sub_datetime_arraylike(other)
|
| 1460 |
+
elif isinstance(other_dtype, PeriodDtype):
|
| 1461 |
+
# PeriodIndex
|
| 1462 |
+
result = self._sub_periodlike(other)
|
| 1463 |
+
elif is_integer_dtype(other_dtype):
|
| 1464 |
+
if not isinstance(self.dtype, PeriodDtype):
|
| 1465 |
+
raise integer_op_not_supported(self)
|
| 1466 |
+
obj = cast("PeriodArray", self)
|
| 1467 |
+
result = obj._addsub_int_array_or_scalar(other * obj.dtype._n, operator.sub)
|
| 1468 |
+
else:
|
| 1469 |
+
# Includes ExtensionArrays, float_dtype
|
| 1470 |
+
return NotImplemented
|
| 1471 |
+
|
| 1472 |
+
if isinstance(result, np.ndarray) and lib.is_np_dtype(result.dtype, "m"):
|
| 1473 |
+
from pandas.core.arrays import TimedeltaArray
|
| 1474 |
+
|
| 1475 |
+
return TimedeltaArray._from_sequence(result)
|
| 1476 |
+
return result
|
| 1477 |
+
|
| 1478 |
+
def __rsub__(self, other):
|
| 1479 |
+
other_dtype = getattr(other, "dtype", None)
|
| 1480 |
+
other_is_dt64 = lib.is_np_dtype(other_dtype, "M") or isinstance(
|
| 1481 |
+
other_dtype, DatetimeTZDtype
|
| 1482 |
+
)
|
| 1483 |
+
|
| 1484 |
+
if other_is_dt64 and lib.is_np_dtype(self.dtype, "m"):
|
| 1485 |
+
# ndarray[datetime64] cannot be subtracted from self, so
|
| 1486 |
+
# we need to wrap in DatetimeArray/Index and flip the operation
|
| 1487 |
+
if lib.is_scalar(other):
|
| 1488 |
+
# i.e. np.datetime64 object
|
| 1489 |
+
return Timestamp(other) - self
|
| 1490 |
+
if not isinstance(other, DatetimeLikeArrayMixin):
|
| 1491 |
+
# Avoid down-casting DatetimeIndex
|
| 1492 |
+
from pandas.core.arrays import DatetimeArray
|
| 1493 |
+
|
| 1494 |
+
other = DatetimeArray._from_sequence(other)
|
| 1495 |
+
return other - self
|
| 1496 |
+
elif self.dtype.kind == "M" and hasattr(other, "dtype") and not other_is_dt64:
|
| 1497 |
+
# GH#19959 datetime - datetime is well-defined as timedelta,
|
| 1498 |
+
# but any other type - datetime is not well-defined.
|
| 1499 |
+
raise TypeError(
|
| 1500 |
+
f"cannot subtract {type(self).__name__} from {type(other).__name__}"
|
| 1501 |
+
)
|
| 1502 |
+
elif isinstance(self.dtype, PeriodDtype) and lib.is_np_dtype(other_dtype, "m"):
|
| 1503 |
+
# TODO: Can we simplify/generalize these cases at all?
|
| 1504 |
+
raise TypeError(f"cannot subtract {type(self).__name__} from {other.dtype}")
|
| 1505 |
+
elif lib.is_np_dtype(self.dtype, "m"):
|
| 1506 |
+
self = cast("TimedeltaArray", self)
|
| 1507 |
+
return (-self) + other
|
| 1508 |
+
|
| 1509 |
+
# We get here with e.g. datetime objects
|
| 1510 |
+
return -(self - other)
|
| 1511 |
+
|
| 1512 |
+
def __iadd__(self, other) -> Self:
|
| 1513 |
+
result = self + other
|
| 1514 |
+
self[:] = result[:]
|
| 1515 |
+
|
| 1516 |
+
if not isinstance(self.dtype, PeriodDtype):
|
| 1517 |
+
# restore freq, which is invalidated by setitem
|
| 1518 |
+
self._freq = result.freq
|
| 1519 |
+
return self
|
| 1520 |
+
|
| 1521 |
+
def __isub__(self, other) -> Self:
|
| 1522 |
+
result = self - other
|
| 1523 |
+
self[:] = result[:]
|
| 1524 |
+
|
| 1525 |
+
if not isinstance(self.dtype, PeriodDtype):
|
| 1526 |
+
# restore freq, which is invalidated by setitem
|
| 1527 |
+
self._freq = result.freq
|
| 1528 |
+
return self
|
| 1529 |
+
|
| 1530 |
+
# --------------------------------------------------------------
|
| 1531 |
+
# Reductions
|
| 1532 |
+
|
| 1533 |
+
@_period_dispatch
|
| 1534 |
+
def _quantile(
|
| 1535 |
+
self,
|
| 1536 |
+
qs: npt.NDArray[np.float64],
|
| 1537 |
+
interpolation: str,
|
| 1538 |
+
) -> Self:
|
| 1539 |
+
return super()._quantile(qs=qs, interpolation=interpolation)
|
| 1540 |
+
|
| 1541 |
+
@_period_dispatch
|
| 1542 |
+
def min(self, *, axis: AxisInt | None = None, skipna: bool = True, **kwargs):
|
| 1543 |
+
"""
|
| 1544 |
+
Return the minimum value of the Array or minimum along
|
| 1545 |
+
an axis.
|
| 1546 |
+
|
| 1547 |
+
See Also
|
| 1548 |
+
--------
|
| 1549 |
+
numpy.ndarray.min
|
| 1550 |
+
Index.min : Return the minimum value in an Index.
|
| 1551 |
+
Series.min : Return the minimum value in a Series.
|
| 1552 |
+
"""
|
| 1553 |
+
nv.validate_min((), kwargs)
|
| 1554 |
+
nv.validate_minmax_axis(axis, self.ndim)
|
| 1555 |
+
|
| 1556 |
+
result = nanops.nanmin(self._ndarray, axis=axis, skipna=skipna)
|
| 1557 |
+
return self._wrap_reduction_result(axis, result)
|
| 1558 |
+
|
| 1559 |
+
@_period_dispatch
|
| 1560 |
+
def max(self, *, axis: AxisInt | None = None, skipna: bool = True, **kwargs):
|
| 1561 |
+
"""
|
| 1562 |
+
Return the maximum value of the Array or maximum along
|
| 1563 |
+
an axis.
|
| 1564 |
+
|
| 1565 |
+
See Also
|
| 1566 |
+
--------
|
| 1567 |
+
numpy.ndarray.max
|
| 1568 |
+
Index.max : Return the maximum value in an Index.
|
| 1569 |
+
Series.max : Return the maximum value in a Series.
|
| 1570 |
+
"""
|
| 1571 |
+
nv.validate_max((), kwargs)
|
| 1572 |
+
nv.validate_minmax_axis(axis, self.ndim)
|
| 1573 |
+
|
| 1574 |
+
result = nanops.nanmax(self._ndarray, axis=axis, skipna=skipna)
|
| 1575 |
+
return self._wrap_reduction_result(axis, result)
|
| 1576 |
+
|
| 1577 |
+
def mean(self, *, skipna: bool = True, axis: AxisInt | None = 0):
|
| 1578 |
+
"""
|
| 1579 |
+
Return the mean value of the Array.
|
| 1580 |
+
|
| 1581 |
+
Parameters
|
| 1582 |
+
----------
|
| 1583 |
+
skipna : bool, default True
|
| 1584 |
+
Whether to ignore any NaT elements.
|
| 1585 |
+
axis : int, optional, default 0
|
| 1586 |
+
|
| 1587 |
+
Returns
|
| 1588 |
+
-------
|
| 1589 |
+
scalar
|
| 1590 |
+
Timestamp or Timedelta.
|
| 1591 |
+
|
| 1592 |
+
See Also
|
| 1593 |
+
--------
|
| 1594 |
+
numpy.ndarray.mean : Returns the average of array elements along a given axis.
|
| 1595 |
+
Series.mean : Return the mean value in a Series.
|
| 1596 |
+
|
| 1597 |
+
Notes
|
| 1598 |
+
-----
|
| 1599 |
+
mean is only defined for Datetime and Timedelta dtypes, not for Period.
|
| 1600 |
+
|
| 1601 |
+
Examples
|
| 1602 |
+
--------
|
| 1603 |
+
For :class:`pandas.DatetimeIndex`:
|
| 1604 |
+
|
| 1605 |
+
>>> idx = pd.date_range('2001-01-01 00:00', periods=3)
|
| 1606 |
+
>>> idx
|
| 1607 |
+
DatetimeIndex(['2001-01-01', '2001-01-02', '2001-01-03'],
|
| 1608 |
+
dtype='datetime64[ns]', freq='D')
|
| 1609 |
+
>>> idx.mean()
|
| 1610 |
+
Timestamp('2001-01-02 00:00:00')
|
| 1611 |
+
|
| 1612 |
+
For :class:`pandas.TimedeltaIndex`:
|
| 1613 |
+
|
| 1614 |
+
>>> tdelta_idx = pd.to_timedelta([1, 2, 3], unit='D')
|
| 1615 |
+
>>> tdelta_idx
|
| 1616 |
+
TimedeltaIndex(['1 days', '2 days', '3 days'],
|
| 1617 |
+
dtype='timedelta64[ns]', freq=None)
|
| 1618 |
+
>>> tdelta_idx.mean()
|
| 1619 |
+
Timedelta('2 days 00:00:00')
|
| 1620 |
+
"""
|
| 1621 |
+
if isinstance(self.dtype, PeriodDtype):
|
| 1622 |
+
# See discussion in GH#24757
|
| 1623 |
+
raise TypeError(
|
| 1624 |
+
f"mean is not implemented for {type(self).__name__} since the "
|
| 1625 |
+
"meaning is ambiguous. An alternative is "
|
| 1626 |
+
"obj.to_timestamp(how='start').mean()"
|
| 1627 |
+
)
|
| 1628 |
+
|
| 1629 |
+
result = nanops.nanmean(
|
| 1630 |
+
self._ndarray, axis=axis, skipna=skipna, mask=self.isna()
|
| 1631 |
+
)
|
| 1632 |
+
return self._wrap_reduction_result(axis, result)
|
| 1633 |
+
|
| 1634 |
+
@_period_dispatch
|
| 1635 |
+
def median(self, *, axis: AxisInt | None = None, skipna: bool = True, **kwargs):
|
| 1636 |
+
nv.validate_median((), kwargs)
|
| 1637 |
+
|
| 1638 |
+
if axis is not None and abs(axis) >= self.ndim:
|
| 1639 |
+
raise ValueError("abs(axis) must be less than ndim")
|
| 1640 |
+
|
| 1641 |
+
result = nanops.nanmedian(self._ndarray, axis=axis, skipna=skipna)
|
| 1642 |
+
return self._wrap_reduction_result(axis, result)
|
| 1643 |
+
|
| 1644 |
+
def _mode(self, dropna: bool = True):
|
| 1645 |
+
mask = None
|
| 1646 |
+
if dropna:
|
| 1647 |
+
mask = self.isna()
|
| 1648 |
+
|
| 1649 |
+
i8modes = algorithms.mode(self.view("i8"), mask=mask)
|
| 1650 |
+
npmodes = i8modes.view(self._ndarray.dtype)
|
| 1651 |
+
npmodes = cast(np.ndarray, npmodes)
|
| 1652 |
+
return self._from_backing_data(npmodes)
|
| 1653 |
+
|
| 1654 |
+
# ------------------------------------------------------------------
|
| 1655 |
+
# GroupBy Methods
|
| 1656 |
+
|
| 1657 |
+
def _groupby_op(
|
| 1658 |
+
self,
|
| 1659 |
+
*,
|
| 1660 |
+
how: str,
|
| 1661 |
+
has_dropped_na: bool,
|
| 1662 |
+
min_count: int,
|
| 1663 |
+
ngroups: int,
|
| 1664 |
+
ids: npt.NDArray[np.intp],
|
| 1665 |
+
**kwargs,
|
| 1666 |
+
):
|
| 1667 |
+
dtype = self.dtype
|
| 1668 |
+
if dtype.kind == "M":
|
| 1669 |
+
# Adding/multiplying datetimes is not valid
|
| 1670 |
+
if how in ["sum", "prod", "cumsum", "cumprod", "var", "skew"]:
|
| 1671 |
+
raise TypeError(f"datetime64 type does not support {how} operations")
|
| 1672 |
+
if how in ["any", "all"]:
|
| 1673 |
+
# GH#34479
|
| 1674 |
+
warnings.warn(
|
| 1675 |
+
f"'{how}' with datetime64 dtypes is deprecated and will raise in a "
|
| 1676 |
+
f"future version. Use (obj != pd.Timestamp(0)).{how}() instead.",
|
| 1677 |
+
FutureWarning,
|
| 1678 |
+
stacklevel=find_stack_level(),
|
| 1679 |
+
)
|
| 1680 |
+
|
| 1681 |
+
elif isinstance(dtype, PeriodDtype):
|
| 1682 |
+
# Adding/multiplying Periods is not valid
|
| 1683 |
+
if how in ["sum", "prod", "cumsum", "cumprod", "var", "skew"]:
|
| 1684 |
+
raise TypeError(f"Period type does not support {how} operations")
|
| 1685 |
+
if how in ["any", "all"]:
|
| 1686 |
+
# GH#34479
|
| 1687 |
+
warnings.warn(
|
| 1688 |
+
f"'{how}' with PeriodDtype is deprecated and will raise in a "
|
| 1689 |
+
f"future version. Use (obj != pd.Period(0, freq)).{how}() instead.",
|
| 1690 |
+
FutureWarning,
|
| 1691 |
+
stacklevel=find_stack_level(),
|
| 1692 |
+
)
|
| 1693 |
+
else:
|
| 1694 |
+
# timedeltas we can add but not multiply
|
| 1695 |
+
if how in ["prod", "cumprod", "skew", "var"]:
|
| 1696 |
+
raise TypeError(f"timedelta64 type does not support {how} operations")
|
| 1697 |
+
|
| 1698 |
+
# All of the functions implemented here are ordinal, so we can
|
| 1699 |
+
# operate on the tz-naive equivalents
|
| 1700 |
+
npvalues = self._ndarray.view("M8[ns]")
|
| 1701 |
+
|
| 1702 |
+
from pandas.core.groupby.ops import WrappedCythonOp
|
| 1703 |
+
|
| 1704 |
+
kind = WrappedCythonOp.get_kind_from_how(how)
|
| 1705 |
+
op = WrappedCythonOp(how=how, kind=kind, has_dropped_na=has_dropped_na)
|
| 1706 |
+
|
| 1707 |
+
res_values = op._cython_op_ndim_compat(
|
| 1708 |
+
npvalues,
|
| 1709 |
+
min_count=min_count,
|
| 1710 |
+
ngroups=ngroups,
|
| 1711 |
+
comp_ids=ids,
|
| 1712 |
+
mask=None,
|
| 1713 |
+
**kwargs,
|
| 1714 |
+
)
|
| 1715 |
+
|
| 1716 |
+
if op.how in op.cast_blocklist:
|
| 1717 |
+
# i.e. how in ["rank"], since other cast_blocklist methods don't go
|
| 1718 |
+
# through cython_operation
|
| 1719 |
+
return res_values
|
| 1720 |
+
|
| 1721 |
+
# We did a view to M8[ns] above, now we go the other direction
|
| 1722 |
+
assert res_values.dtype == "M8[ns]"
|
| 1723 |
+
if how in ["std", "sem"]:
|
| 1724 |
+
from pandas.core.arrays import TimedeltaArray
|
| 1725 |
+
|
| 1726 |
+
if isinstance(self.dtype, PeriodDtype):
|
| 1727 |
+
raise TypeError("'std' and 'sem' are not valid for PeriodDtype")
|
| 1728 |
+
self = cast("DatetimeArray | TimedeltaArray", self)
|
| 1729 |
+
new_dtype = f"m8[{self.unit}]"
|
| 1730 |
+
res_values = res_values.view(new_dtype)
|
| 1731 |
+
return TimedeltaArray._simple_new(res_values, dtype=res_values.dtype)
|
| 1732 |
+
|
| 1733 |
+
res_values = res_values.view(self._ndarray.dtype)
|
| 1734 |
+
return self._from_backing_data(res_values)
|
| 1735 |
+
|
| 1736 |
+
|
| 1737 |
+
class DatelikeOps(DatetimeLikeArrayMixin):
|
| 1738 |
+
"""
|
| 1739 |
+
Common ops for DatetimeIndex/PeriodIndex, but not TimedeltaIndex.
|
| 1740 |
+
"""
|
| 1741 |
+
|
| 1742 |
+
@Substitution(
|
| 1743 |
+
URL="https://docs.python.org/3/library/datetime.html"
|
| 1744 |
+
"#strftime-and-strptime-behavior"
|
| 1745 |
+
)
|
| 1746 |
+
def strftime(self, date_format: str) -> npt.NDArray[np.object_]:
|
| 1747 |
+
"""
|
| 1748 |
+
Convert to Index using specified date_format.
|
| 1749 |
+
|
| 1750 |
+
Return an Index of formatted strings specified by date_format, which
|
| 1751 |
+
supports the same string format as the python standard library. Details
|
| 1752 |
+
of the string format can be found in `python string format
|
| 1753 |
+
doc <%(URL)s>`__.
|
| 1754 |
+
|
| 1755 |
+
Formats supported by the C `strftime` API but not by the python string format
|
| 1756 |
+
doc (such as `"%%R"`, `"%%r"`) are not officially supported and should be
|
| 1757 |
+
preferably replaced with their supported equivalents (such as `"%%H:%%M"`,
|
| 1758 |
+
`"%%I:%%M:%%S %%p"`).
|
| 1759 |
+
|
| 1760 |
+
Note that `PeriodIndex` support additional directives, detailed in
|
| 1761 |
+
`Period.strftime`.
|
| 1762 |
+
|
| 1763 |
+
Parameters
|
| 1764 |
+
----------
|
| 1765 |
+
date_format : str
|
| 1766 |
+
Date format string (e.g. "%%Y-%%m-%%d").
|
| 1767 |
+
|
| 1768 |
+
Returns
|
| 1769 |
+
-------
|
| 1770 |
+
ndarray[object]
|
| 1771 |
+
NumPy ndarray of formatted strings.
|
| 1772 |
+
|
| 1773 |
+
See Also
|
| 1774 |
+
--------
|
| 1775 |
+
to_datetime : Convert the given argument to datetime.
|
| 1776 |
+
DatetimeIndex.normalize : Return DatetimeIndex with times to midnight.
|
| 1777 |
+
DatetimeIndex.round : Round the DatetimeIndex to the specified freq.
|
| 1778 |
+
DatetimeIndex.floor : Floor the DatetimeIndex to the specified freq.
|
| 1779 |
+
Timestamp.strftime : Format a single Timestamp.
|
| 1780 |
+
Period.strftime : Format a single Period.
|
| 1781 |
+
|
| 1782 |
+
Examples
|
| 1783 |
+
--------
|
| 1784 |
+
>>> rng = pd.date_range(pd.Timestamp("2018-03-10 09:00"),
|
| 1785 |
+
... periods=3, freq='s')
|
| 1786 |
+
>>> rng.strftime('%%B %%d, %%Y, %%r')
|
| 1787 |
+
Index(['March 10, 2018, 09:00:00 AM', 'March 10, 2018, 09:00:01 AM',
|
| 1788 |
+
'March 10, 2018, 09:00:02 AM'],
|
| 1789 |
+
dtype='object')
|
| 1790 |
+
"""
|
| 1791 |
+
result = self._format_native_types(date_format=date_format, na_rep=np.nan)
|
| 1792 |
+
return result.astype(object, copy=False)
|
| 1793 |
+
|
| 1794 |
+
|
| 1795 |
+
_round_doc = """
|
| 1796 |
+
Perform {op} operation on the data to the specified `freq`.
|
| 1797 |
+
|
| 1798 |
+
Parameters
|
| 1799 |
+
----------
|
| 1800 |
+
freq : str or Offset
|
| 1801 |
+
The frequency level to {op} the index to. Must be a fixed
|
| 1802 |
+
frequency like 'S' (second) not 'ME' (month end). See
|
| 1803 |
+
:ref:`frequency aliases <timeseries.offset_aliases>` for
|
| 1804 |
+
a list of possible `freq` values.
|
| 1805 |
+
ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise'
|
| 1806 |
+
Only relevant for DatetimeIndex:
|
| 1807 |
+
|
| 1808 |
+
- 'infer' will attempt to infer fall dst-transition hours based on
|
| 1809 |
+
order
|
| 1810 |
+
- bool-ndarray where True signifies a DST time, False designates
|
| 1811 |
+
a non-DST time (note that this flag is only applicable for
|
| 1812 |
+
ambiguous times)
|
| 1813 |
+
- 'NaT' will return NaT where there are ambiguous times
|
| 1814 |
+
- 'raise' will raise an AmbiguousTimeError if there are ambiguous
|
| 1815 |
+
times.
|
| 1816 |
+
|
| 1817 |
+
nonexistent : 'shift_forward', 'shift_backward', 'NaT', timedelta, default 'raise'
|
| 1818 |
+
A nonexistent time does not exist in a particular timezone
|
| 1819 |
+
where clocks moved forward due to DST.
|
| 1820 |
+
|
| 1821 |
+
- 'shift_forward' will shift the nonexistent time forward to the
|
| 1822 |
+
closest existing time
|
| 1823 |
+
- 'shift_backward' will shift the nonexistent time backward to the
|
| 1824 |
+
closest existing time
|
| 1825 |
+
- 'NaT' will return NaT where there are nonexistent times
|
| 1826 |
+
- timedelta objects will shift nonexistent times by the timedelta
|
| 1827 |
+
- 'raise' will raise an NonExistentTimeError if there are
|
| 1828 |
+
nonexistent times.
|
| 1829 |
+
|
| 1830 |
+
Returns
|
| 1831 |
+
-------
|
| 1832 |
+
DatetimeIndex, TimedeltaIndex, or Series
|
| 1833 |
+
Index of the same type for a DatetimeIndex or TimedeltaIndex,
|
| 1834 |
+
or a Series with the same index for a Series.
|
| 1835 |
+
|
| 1836 |
+
Raises
|
| 1837 |
+
------
|
| 1838 |
+
ValueError if the `freq` cannot be converted.
|
| 1839 |
+
|
| 1840 |
+
Notes
|
| 1841 |
+
-----
|
| 1842 |
+
If the timestamps have a timezone, {op}ing will take place relative to the
|
| 1843 |
+
local ("wall") time and re-localized to the same timezone. When {op}ing
|
| 1844 |
+
near daylight savings time, use ``nonexistent`` and ``ambiguous`` to
|
| 1845 |
+
control the re-localization behavior.
|
| 1846 |
+
|
| 1847 |
+
Examples
|
| 1848 |
+
--------
|
| 1849 |
+
**DatetimeIndex**
|
| 1850 |
+
|
| 1851 |
+
>>> rng = pd.date_range('1/1/2018 11:59:00', periods=3, freq='min')
|
| 1852 |
+
>>> rng
|
| 1853 |
+
DatetimeIndex(['2018-01-01 11:59:00', '2018-01-01 12:00:00',
|
| 1854 |
+
'2018-01-01 12:01:00'],
|
| 1855 |
+
dtype='datetime64[ns]', freq='min')
|
| 1856 |
+
"""
|
| 1857 |
+
|
| 1858 |
+
_round_example = """>>> rng.round('h')
|
| 1859 |
+
DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00',
|
| 1860 |
+
'2018-01-01 12:00:00'],
|
| 1861 |
+
dtype='datetime64[ns]', freq=None)
|
| 1862 |
+
|
| 1863 |
+
**Series**
|
| 1864 |
+
|
| 1865 |
+
>>> pd.Series(rng).dt.round("h")
|
| 1866 |
+
0 2018-01-01 12:00:00
|
| 1867 |
+
1 2018-01-01 12:00:00
|
| 1868 |
+
2 2018-01-01 12:00:00
|
| 1869 |
+
dtype: datetime64[ns]
|
| 1870 |
+
|
| 1871 |
+
When rounding near a daylight savings time transition, use ``ambiguous`` or
|
| 1872 |
+
``nonexistent`` to control how the timestamp should be re-localized.
|
| 1873 |
+
|
| 1874 |
+
>>> rng_tz = pd.DatetimeIndex(["2021-10-31 03:30:00"], tz="Europe/Amsterdam")
|
| 1875 |
+
|
| 1876 |
+
>>> rng_tz.floor("2h", ambiguous=False)
|
| 1877 |
+
DatetimeIndex(['2021-10-31 02:00:00+01:00'],
|
| 1878 |
+
dtype='datetime64[ns, Europe/Amsterdam]', freq=None)
|
| 1879 |
+
|
| 1880 |
+
>>> rng_tz.floor("2h", ambiguous=True)
|
| 1881 |
+
DatetimeIndex(['2021-10-31 02:00:00+02:00'],
|
| 1882 |
+
dtype='datetime64[ns, Europe/Amsterdam]', freq=None)
|
| 1883 |
+
"""
|
| 1884 |
+
|
| 1885 |
+
_floor_example = """>>> rng.floor('h')
|
| 1886 |
+
DatetimeIndex(['2018-01-01 11:00:00', '2018-01-01 12:00:00',
|
| 1887 |
+
'2018-01-01 12:00:00'],
|
| 1888 |
+
dtype='datetime64[ns]', freq=None)
|
| 1889 |
+
|
| 1890 |
+
**Series**
|
| 1891 |
+
|
| 1892 |
+
>>> pd.Series(rng).dt.floor("h")
|
| 1893 |
+
0 2018-01-01 11:00:00
|
| 1894 |
+
1 2018-01-01 12:00:00
|
| 1895 |
+
2 2018-01-01 12:00:00
|
| 1896 |
+
dtype: datetime64[ns]
|
| 1897 |
+
|
| 1898 |
+
When rounding near a daylight savings time transition, use ``ambiguous`` or
|
| 1899 |
+
``nonexistent`` to control how the timestamp should be re-localized.
|
| 1900 |
+
|
| 1901 |
+
>>> rng_tz = pd.DatetimeIndex(["2021-10-31 03:30:00"], tz="Europe/Amsterdam")
|
| 1902 |
+
|
| 1903 |
+
>>> rng_tz.floor("2h", ambiguous=False)
|
| 1904 |
+
DatetimeIndex(['2021-10-31 02:00:00+01:00'],
|
| 1905 |
+
dtype='datetime64[ns, Europe/Amsterdam]', freq=None)
|
| 1906 |
+
|
| 1907 |
+
>>> rng_tz.floor("2h", ambiguous=True)
|
| 1908 |
+
DatetimeIndex(['2021-10-31 02:00:00+02:00'],
|
| 1909 |
+
dtype='datetime64[ns, Europe/Amsterdam]', freq=None)
|
| 1910 |
+
"""
|
| 1911 |
+
|
| 1912 |
+
_ceil_example = """>>> rng.ceil('h')
|
| 1913 |
+
DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00',
|
| 1914 |
+
'2018-01-01 13:00:00'],
|
| 1915 |
+
dtype='datetime64[ns]', freq=None)
|
| 1916 |
+
|
| 1917 |
+
**Series**
|
| 1918 |
+
|
| 1919 |
+
>>> pd.Series(rng).dt.ceil("h")
|
| 1920 |
+
0 2018-01-01 12:00:00
|
| 1921 |
+
1 2018-01-01 12:00:00
|
| 1922 |
+
2 2018-01-01 13:00:00
|
| 1923 |
+
dtype: datetime64[ns]
|
| 1924 |
+
|
| 1925 |
+
When rounding near a daylight savings time transition, use ``ambiguous`` or
|
| 1926 |
+
``nonexistent`` to control how the timestamp should be re-localized.
|
| 1927 |
+
|
| 1928 |
+
>>> rng_tz = pd.DatetimeIndex(["2021-10-31 01:30:00"], tz="Europe/Amsterdam")
|
| 1929 |
+
|
| 1930 |
+
>>> rng_tz.ceil("h", ambiguous=False)
|
| 1931 |
+
DatetimeIndex(['2021-10-31 02:00:00+01:00'],
|
| 1932 |
+
dtype='datetime64[ns, Europe/Amsterdam]', freq=None)
|
| 1933 |
+
|
| 1934 |
+
>>> rng_tz.ceil("h", ambiguous=True)
|
| 1935 |
+
DatetimeIndex(['2021-10-31 02:00:00+02:00'],
|
| 1936 |
+
dtype='datetime64[ns, Europe/Amsterdam]', freq=None)
|
| 1937 |
+
"""
|
| 1938 |
+
|
| 1939 |
+
|
| 1940 |
+
class TimelikeOps(DatetimeLikeArrayMixin):
|
| 1941 |
+
"""
|
| 1942 |
+
Common ops for TimedeltaIndex/DatetimeIndex, but not PeriodIndex.
|
| 1943 |
+
"""
|
| 1944 |
+
|
| 1945 |
+
_default_dtype: np.dtype
|
| 1946 |
+
|
| 1947 |
+
def __init__(
|
| 1948 |
+
self, values, dtype=None, freq=lib.no_default, copy: bool = False
|
| 1949 |
+
) -> None:
|
| 1950 |
+
warnings.warn(
|
| 1951 |
+
# GH#55623
|
| 1952 |
+
f"{type(self).__name__}.__init__ is deprecated and will be "
|
| 1953 |
+
"removed in a future version. Use pd.array instead.",
|
| 1954 |
+
FutureWarning,
|
| 1955 |
+
stacklevel=find_stack_level(),
|
| 1956 |
+
)
|
| 1957 |
+
if dtype is not None:
|
| 1958 |
+
dtype = pandas_dtype(dtype)
|
| 1959 |
+
|
| 1960 |
+
values = extract_array(values, extract_numpy=True)
|
| 1961 |
+
if isinstance(values, IntegerArray):
|
| 1962 |
+
values = values.to_numpy("int64", na_value=iNaT)
|
| 1963 |
+
|
| 1964 |
+
inferred_freq = getattr(values, "_freq", None)
|
| 1965 |
+
explicit_none = freq is None
|
| 1966 |
+
freq = freq if freq is not lib.no_default else None
|
| 1967 |
+
|
| 1968 |
+
if isinstance(values, type(self)):
|
| 1969 |
+
if explicit_none:
|
| 1970 |
+
# don't inherit from values
|
| 1971 |
+
pass
|
| 1972 |
+
elif freq is None:
|
| 1973 |
+
freq = values.freq
|
| 1974 |
+
elif freq and values.freq:
|
| 1975 |
+
freq = to_offset(freq)
|
| 1976 |
+
freq = _validate_inferred_freq(freq, values.freq)
|
| 1977 |
+
|
| 1978 |
+
if dtype is not None and dtype != values.dtype:
|
| 1979 |
+
# TODO: we only have tests for this for DTA, not TDA (2022-07-01)
|
| 1980 |
+
raise TypeError(
|
| 1981 |
+
f"dtype={dtype} does not match data dtype {values.dtype}"
|
| 1982 |
+
)
|
| 1983 |
+
|
| 1984 |
+
dtype = values.dtype
|
| 1985 |
+
values = values._ndarray
|
| 1986 |
+
|
| 1987 |
+
elif dtype is None:
|
| 1988 |
+
if isinstance(values, np.ndarray) and values.dtype.kind in "Mm":
|
| 1989 |
+
dtype = values.dtype
|
| 1990 |
+
else:
|
| 1991 |
+
dtype = self._default_dtype
|
| 1992 |
+
if isinstance(values, np.ndarray) and values.dtype == "i8":
|
| 1993 |
+
values = values.view(dtype)
|
| 1994 |
+
|
| 1995 |
+
if not isinstance(values, np.ndarray):
|
| 1996 |
+
raise ValueError(
|
| 1997 |
+
f"Unexpected type '{type(values).__name__}'. 'values' must be a "
|
| 1998 |
+
f"{type(self).__name__}, ndarray, or Series or Index "
|
| 1999 |
+
"containing one of those."
|
| 2000 |
+
)
|
| 2001 |
+
if values.ndim not in [1, 2]:
|
| 2002 |
+
raise ValueError("Only 1-dimensional input arrays are supported.")
|
| 2003 |
+
|
| 2004 |
+
if values.dtype == "i8":
|
| 2005 |
+
# for compat with datetime/timedelta/period shared methods,
|
| 2006 |
+
# we can sometimes get here with int64 values. These represent
|
| 2007 |
+
# nanosecond UTC (or tz-naive) unix timestamps
|
| 2008 |
+
if dtype is None:
|
| 2009 |
+
dtype = self._default_dtype
|
| 2010 |
+
values = values.view(self._default_dtype)
|
| 2011 |
+
elif lib.is_np_dtype(dtype, "mM"):
|
| 2012 |
+
values = values.view(dtype)
|
| 2013 |
+
elif isinstance(dtype, DatetimeTZDtype):
|
| 2014 |
+
kind = self._default_dtype.kind
|
| 2015 |
+
new_dtype = f"{kind}8[{dtype.unit}]"
|
| 2016 |
+
values = values.view(new_dtype)
|
| 2017 |
+
|
| 2018 |
+
dtype = self._validate_dtype(values, dtype)
|
| 2019 |
+
|
| 2020 |
+
if freq == "infer":
|
| 2021 |
+
raise ValueError(
|
| 2022 |
+
f"Frequency inference not allowed in {type(self).__name__}.__init__. "
|
| 2023 |
+
"Use 'pd.array()' instead."
|
| 2024 |
+
)
|
| 2025 |
+
|
| 2026 |
+
if copy:
|
| 2027 |
+
values = values.copy()
|
| 2028 |
+
if freq:
|
| 2029 |
+
freq = to_offset(freq)
|
| 2030 |
+
if values.dtype.kind == "m" and not isinstance(freq, Tick):
|
| 2031 |
+
raise TypeError("TimedeltaArray/Index freq must be a Tick")
|
| 2032 |
+
|
| 2033 |
+
NDArrayBacked.__init__(self, values=values, dtype=dtype)
|
| 2034 |
+
self._freq = freq
|
| 2035 |
+
|
| 2036 |
+
if inferred_freq is None and freq is not None:
|
| 2037 |
+
type(self)._validate_frequency(self, freq)
|
| 2038 |
+
|
| 2039 |
+
@classmethod
|
| 2040 |
+
def _validate_dtype(cls, values, dtype):
|
| 2041 |
+
raise AbstractMethodError(cls)
|
| 2042 |
+
|
| 2043 |
+
@property
|
| 2044 |
+
def freq(self):
|
| 2045 |
+
"""
|
| 2046 |
+
Return the frequency object if it is set, otherwise None.
|
| 2047 |
+
"""
|
| 2048 |
+
return self._freq
|
| 2049 |
+
|
| 2050 |
+
@freq.setter
|
| 2051 |
+
def freq(self, value) -> None:
|
| 2052 |
+
if value is not None:
|
| 2053 |
+
value = to_offset(value)
|
| 2054 |
+
self._validate_frequency(self, value)
|
| 2055 |
+
if self.dtype.kind == "m" and not isinstance(value, Tick):
|
| 2056 |
+
raise TypeError("TimedeltaArray/Index freq must be a Tick")
|
| 2057 |
+
|
| 2058 |
+
if self.ndim > 1:
|
| 2059 |
+
raise ValueError("Cannot set freq with ndim > 1")
|
| 2060 |
+
|
| 2061 |
+
self._freq = value
|
| 2062 |
+
|
| 2063 |
+
@final
|
| 2064 |
+
def _maybe_pin_freq(self, freq, validate_kwds: dict):
|
| 2065 |
+
"""
|
| 2066 |
+
Constructor helper to pin the appropriate `freq` attribute. Assumes
|
| 2067 |
+
that self._freq is currently set to any freq inferred in
|
| 2068 |
+
_from_sequence_not_strict.
|
| 2069 |
+
"""
|
| 2070 |
+
if freq is None:
|
| 2071 |
+
# user explicitly passed None -> override any inferred_freq
|
| 2072 |
+
self._freq = None
|
| 2073 |
+
elif freq == "infer":
|
| 2074 |
+
# if self._freq is *not* None then we already inferred a freq
|
| 2075 |
+
# and there is nothing left to do
|
| 2076 |
+
if self._freq is None:
|
| 2077 |
+
# Set _freq directly to bypass duplicative _validate_frequency
|
| 2078 |
+
# check.
|
| 2079 |
+
self._freq = to_offset(self.inferred_freq)
|
| 2080 |
+
elif freq is lib.no_default:
|
| 2081 |
+
# user did not specify anything, keep inferred freq if the original
|
| 2082 |
+
# data had one, otherwise do nothing
|
| 2083 |
+
pass
|
| 2084 |
+
elif self._freq is None:
|
| 2085 |
+
# We cannot inherit a freq from the data, so we need to validate
|
| 2086 |
+
# the user-passed freq
|
| 2087 |
+
freq = to_offset(freq)
|
| 2088 |
+
type(self)._validate_frequency(self, freq, **validate_kwds)
|
| 2089 |
+
self._freq = freq
|
| 2090 |
+
else:
|
| 2091 |
+
# Otherwise we just need to check that the user-passed freq
|
| 2092 |
+
# doesn't conflict with the one we already have.
|
| 2093 |
+
freq = to_offset(freq)
|
| 2094 |
+
_validate_inferred_freq(freq, self._freq)
|
| 2095 |
+
|
| 2096 |
+
@final
|
| 2097 |
+
@classmethod
|
| 2098 |
+
def _validate_frequency(cls, index, freq: BaseOffset, **kwargs):
|
| 2099 |
+
"""
|
| 2100 |
+
Validate that a frequency is compatible with the values of a given
|
| 2101 |
+
Datetime Array/Index or Timedelta Array/Index
|
| 2102 |
+
|
| 2103 |
+
Parameters
|
| 2104 |
+
----------
|
| 2105 |
+
index : DatetimeIndex or TimedeltaIndex
|
| 2106 |
+
The index on which to determine if the given frequency is valid
|
| 2107 |
+
freq : DateOffset
|
| 2108 |
+
The frequency to validate
|
| 2109 |
+
"""
|
| 2110 |
+
inferred = index.inferred_freq
|
| 2111 |
+
if index.size == 0 or inferred == freq.freqstr:
|
| 2112 |
+
return None
|
| 2113 |
+
|
| 2114 |
+
try:
|
| 2115 |
+
on_freq = cls._generate_range(
|
| 2116 |
+
start=index[0],
|
| 2117 |
+
end=None,
|
| 2118 |
+
periods=len(index),
|
| 2119 |
+
freq=freq,
|
| 2120 |
+
unit=index.unit,
|
| 2121 |
+
**kwargs,
|
| 2122 |
+
)
|
| 2123 |
+
if not np.array_equal(index.asi8, on_freq.asi8):
|
| 2124 |
+
raise ValueError
|
| 2125 |
+
except ValueError as err:
|
| 2126 |
+
if "non-fixed" in str(err):
|
| 2127 |
+
# non-fixed frequencies are not meaningful for timedelta64;
|
| 2128 |
+
# we retain that error message
|
| 2129 |
+
raise err
|
| 2130 |
+
# GH#11587 the main way this is reached is if the `np.array_equal`
|
| 2131 |
+
# check above is False. This can also be reached if index[0]
|
| 2132 |
+
# is `NaT`, in which case the call to `cls._generate_range` will
|
| 2133 |
+
# raise a ValueError, which we re-raise with a more targeted
|
| 2134 |
+
# message.
|
| 2135 |
+
raise ValueError(
|
| 2136 |
+
f"Inferred frequency {inferred} from passed values "
|
| 2137 |
+
f"does not conform to passed frequency {freq.freqstr}"
|
| 2138 |
+
) from err
|
| 2139 |
+
|
| 2140 |
+
@classmethod
|
| 2141 |
+
def _generate_range(
|
| 2142 |
+
cls, start, end, periods: int | None, freq, *args, **kwargs
|
| 2143 |
+
) -> Self:
|
| 2144 |
+
raise AbstractMethodError(cls)
|
| 2145 |
+
|
| 2146 |
+
# --------------------------------------------------------------
|
| 2147 |
+
|
| 2148 |
+
@cache_readonly
|
| 2149 |
+
def _creso(self) -> int:
|
| 2150 |
+
return get_unit_from_dtype(self._ndarray.dtype)
|
| 2151 |
+
|
| 2152 |
+
@cache_readonly
|
| 2153 |
+
def unit(self) -> str:
|
| 2154 |
+
# e.g. "ns", "us", "ms"
|
| 2155 |
+
# error: Argument 1 to "dtype_to_unit" has incompatible type
|
| 2156 |
+
# "ExtensionDtype"; expected "Union[DatetimeTZDtype, dtype[Any]]"
|
| 2157 |
+
return dtype_to_unit(self.dtype) # type: ignore[arg-type]
|
| 2158 |
+
|
| 2159 |
+
def as_unit(self, unit: str, round_ok: bool = True) -> Self:
|
| 2160 |
+
if unit not in ["s", "ms", "us", "ns"]:
|
| 2161 |
+
raise ValueError("Supported units are 's', 'ms', 'us', 'ns'")
|
| 2162 |
+
|
| 2163 |
+
dtype = np.dtype(f"{self.dtype.kind}8[{unit}]")
|
| 2164 |
+
new_values = astype_overflowsafe(self._ndarray, dtype, round_ok=round_ok)
|
| 2165 |
+
|
| 2166 |
+
if isinstance(self.dtype, np.dtype):
|
| 2167 |
+
new_dtype = new_values.dtype
|
| 2168 |
+
else:
|
| 2169 |
+
tz = cast("DatetimeArray", self).tz
|
| 2170 |
+
new_dtype = DatetimeTZDtype(tz=tz, unit=unit)
|
| 2171 |
+
|
| 2172 |
+
# error: Unexpected keyword argument "freq" for "_simple_new" of
|
| 2173 |
+
# "NDArrayBacked" [call-arg]
|
| 2174 |
+
return type(self)._simple_new(
|
| 2175 |
+
new_values, dtype=new_dtype, freq=self.freq # type: ignore[call-arg]
|
| 2176 |
+
)
|
| 2177 |
+
|
| 2178 |
+
# TODO: annotate other as DatetimeArray | TimedeltaArray | Timestamp | Timedelta
|
| 2179 |
+
# with the return type matching input type. TypeVar?
|
| 2180 |
+
def _ensure_matching_resos(self, other):
|
| 2181 |
+
if self._creso != other._creso:
|
| 2182 |
+
# Just as with Timestamp/Timedelta, we cast to the higher resolution
|
| 2183 |
+
if self._creso < other._creso:
|
| 2184 |
+
self = self.as_unit(other.unit)
|
| 2185 |
+
else:
|
| 2186 |
+
other = other.as_unit(self.unit)
|
| 2187 |
+
return self, other
|
| 2188 |
+
|
| 2189 |
+
# --------------------------------------------------------------
|
| 2190 |
+
|
| 2191 |
+
def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
|
| 2192 |
+
if (
|
| 2193 |
+
ufunc in [np.isnan, np.isinf, np.isfinite]
|
| 2194 |
+
and len(inputs) == 1
|
| 2195 |
+
and inputs[0] is self
|
| 2196 |
+
):
|
| 2197 |
+
# numpy 1.18 changed isinf and isnan to not raise on dt64/td64
|
| 2198 |
+
return getattr(ufunc, method)(self._ndarray, **kwargs)
|
| 2199 |
+
|
| 2200 |
+
return super().__array_ufunc__(ufunc, method, *inputs, **kwargs)
|
| 2201 |
+
|
| 2202 |
+
def _round(self, freq, mode, ambiguous, nonexistent):
|
| 2203 |
+
# round the local times
|
| 2204 |
+
if isinstance(self.dtype, DatetimeTZDtype):
|
| 2205 |
+
# operate on naive timestamps, then convert back to aware
|
| 2206 |
+
self = cast("DatetimeArray", self)
|
| 2207 |
+
naive = self.tz_localize(None)
|
| 2208 |
+
result = naive._round(freq, mode, ambiguous, nonexistent)
|
| 2209 |
+
return result.tz_localize(
|
| 2210 |
+
self.tz, ambiguous=ambiguous, nonexistent=nonexistent
|
| 2211 |
+
)
|
| 2212 |
+
|
| 2213 |
+
values = self.view("i8")
|
| 2214 |
+
values = cast(np.ndarray, values)
|
| 2215 |
+
nanos = get_unit_for_round(freq, self._creso)
|
| 2216 |
+
if nanos == 0:
|
| 2217 |
+
# GH 52761
|
| 2218 |
+
return self.copy()
|
| 2219 |
+
result_i8 = round_nsint64(values, mode, nanos)
|
| 2220 |
+
result = self._maybe_mask_results(result_i8, fill_value=iNaT)
|
| 2221 |
+
result = result.view(self._ndarray.dtype)
|
| 2222 |
+
return self._simple_new(result, dtype=self.dtype)
|
| 2223 |
+
|
| 2224 |
+
@Appender((_round_doc + _round_example).format(op="round"))
|
| 2225 |
+
def round(
|
| 2226 |
+
self,
|
| 2227 |
+
freq,
|
| 2228 |
+
ambiguous: TimeAmbiguous = "raise",
|
| 2229 |
+
nonexistent: TimeNonexistent = "raise",
|
| 2230 |
+
) -> Self:
|
| 2231 |
+
return self._round(freq, RoundTo.NEAREST_HALF_EVEN, ambiguous, nonexistent)
|
| 2232 |
+
|
| 2233 |
+
@Appender((_round_doc + _floor_example).format(op="floor"))
|
| 2234 |
+
def floor(
|
| 2235 |
+
self,
|
| 2236 |
+
freq,
|
| 2237 |
+
ambiguous: TimeAmbiguous = "raise",
|
| 2238 |
+
nonexistent: TimeNonexistent = "raise",
|
| 2239 |
+
) -> Self:
|
| 2240 |
+
return self._round(freq, RoundTo.MINUS_INFTY, ambiguous, nonexistent)
|
| 2241 |
+
|
| 2242 |
+
@Appender((_round_doc + _ceil_example).format(op="ceil"))
|
| 2243 |
+
def ceil(
|
| 2244 |
+
self,
|
| 2245 |
+
freq,
|
| 2246 |
+
ambiguous: TimeAmbiguous = "raise",
|
| 2247 |
+
nonexistent: TimeNonexistent = "raise",
|
| 2248 |
+
) -> Self:
|
| 2249 |
+
return self._round(freq, RoundTo.PLUS_INFTY, ambiguous, nonexistent)
|
| 2250 |
+
|
| 2251 |
+
# --------------------------------------------------------------
|
| 2252 |
+
# Reductions
|
| 2253 |
+
|
| 2254 |
+
def any(self, *, axis: AxisInt | None = None, skipna: bool = True) -> bool:
|
| 2255 |
+
# GH#34479 the nanops call will issue a FutureWarning for non-td64 dtype
|
| 2256 |
+
return nanops.nanany(self._ndarray, axis=axis, skipna=skipna, mask=self.isna())
|
| 2257 |
+
|
| 2258 |
+
def all(self, *, axis: AxisInt | None = None, skipna: bool = True) -> bool:
|
| 2259 |
+
# GH#34479 the nanops call will issue a FutureWarning for non-td64 dtype
|
| 2260 |
+
|
| 2261 |
+
return nanops.nanall(self._ndarray, axis=axis, skipna=skipna, mask=self.isna())
|
| 2262 |
+
|
| 2263 |
+
# --------------------------------------------------------------
|
| 2264 |
+
# Frequency Methods
|
| 2265 |
+
|
| 2266 |
+
def _maybe_clear_freq(self) -> None:
|
| 2267 |
+
self._freq = None
|
| 2268 |
+
|
| 2269 |
+
def _with_freq(self, freq) -> Self:
|
| 2270 |
+
"""
|
| 2271 |
+
Helper to get a view on the same data, with a new freq.
|
| 2272 |
+
|
| 2273 |
+
Parameters
|
| 2274 |
+
----------
|
| 2275 |
+
freq : DateOffset, None, or "infer"
|
| 2276 |
+
|
| 2277 |
+
Returns
|
| 2278 |
+
-------
|
| 2279 |
+
Same type as self
|
| 2280 |
+
"""
|
| 2281 |
+
# GH#29843
|
| 2282 |
+
if freq is None:
|
| 2283 |
+
# Always valid
|
| 2284 |
+
pass
|
| 2285 |
+
elif len(self) == 0 and isinstance(freq, BaseOffset):
|
| 2286 |
+
# Always valid. In the TimedeltaArray case, we require a Tick offset
|
| 2287 |
+
if self.dtype.kind == "m" and not isinstance(freq, Tick):
|
| 2288 |
+
raise TypeError("TimedeltaArray/Index freq must be a Tick")
|
| 2289 |
+
else:
|
| 2290 |
+
# As an internal method, we can ensure this assertion always holds
|
| 2291 |
+
assert freq == "infer"
|
| 2292 |
+
freq = to_offset(self.inferred_freq)
|
| 2293 |
+
|
| 2294 |
+
arr = self.view()
|
| 2295 |
+
arr._freq = freq
|
| 2296 |
+
return arr
|
| 2297 |
+
|
| 2298 |
+
# --------------------------------------------------------------
|
| 2299 |
+
# ExtensionArray Interface
|
| 2300 |
+
|
| 2301 |
+
def _values_for_json(self) -> np.ndarray:
|
| 2302 |
+
# Small performance bump vs the base class which calls np.asarray(self)
|
| 2303 |
+
if isinstance(self.dtype, np.dtype):
|
| 2304 |
+
return self._ndarray
|
| 2305 |
+
return super()._values_for_json()
|
| 2306 |
+
|
| 2307 |
+
def factorize(
|
| 2308 |
+
self,
|
| 2309 |
+
use_na_sentinel: bool = True,
|
| 2310 |
+
sort: bool = False,
|
| 2311 |
+
):
|
| 2312 |
+
if self.freq is not None:
|
| 2313 |
+
# We must be unique, so can short-circuit (and retain freq)
|
| 2314 |
+
codes = np.arange(len(self), dtype=np.intp)
|
| 2315 |
+
uniques = self.copy() # TODO: copy or view?
|
| 2316 |
+
if sort and self.freq.n < 0:
|
| 2317 |
+
codes = codes[::-1]
|
| 2318 |
+
uniques = uniques[::-1]
|
| 2319 |
+
return codes, uniques
|
| 2320 |
+
|
| 2321 |
+
if sort:
|
| 2322 |
+
# algorithms.factorize only passes sort=True here when freq is
|
| 2323 |
+
# not None, so this should not be reached.
|
| 2324 |
+
raise NotImplementedError(
|
| 2325 |
+
f"The 'sort' keyword in {type(self).__name__}.factorize is "
|
| 2326 |
+
"ignored unless arr.freq is not None. To factorize with sort, "
|
| 2327 |
+
"call pd.factorize(obj, sort=True) instead."
|
| 2328 |
+
)
|
| 2329 |
+
return super().factorize(use_na_sentinel=use_na_sentinel)
|
| 2330 |
+
|
| 2331 |
+
@classmethod
|
| 2332 |
+
def _concat_same_type(
|
| 2333 |
+
cls,
|
| 2334 |
+
to_concat: Sequence[Self],
|
| 2335 |
+
axis: AxisInt = 0,
|
| 2336 |
+
) -> Self:
|
| 2337 |
+
new_obj = super()._concat_same_type(to_concat, axis)
|
| 2338 |
+
|
| 2339 |
+
obj = to_concat[0]
|
| 2340 |
+
|
| 2341 |
+
if axis == 0:
|
| 2342 |
+
# GH 3232: If the concat result is evenly spaced, we can retain the
|
| 2343 |
+
# original frequency
|
| 2344 |
+
to_concat = [x for x in to_concat if len(x)]
|
| 2345 |
+
|
| 2346 |
+
if obj.freq is not None and all(x.freq == obj.freq for x in to_concat):
|
| 2347 |
+
pairs = zip(to_concat[:-1], to_concat[1:])
|
| 2348 |
+
if all(pair[0][-1] + obj.freq == pair[1][0] for pair in pairs):
|
| 2349 |
+
new_freq = obj.freq
|
| 2350 |
+
new_obj._freq = new_freq
|
| 2351 |
+
return new_obj
|
| 2352 |
+
|
| 2353 |
+
def copy(self, order: str = "C") -> Self:
|
| 2354 |
+
new_obj = super().copy(order=order)
|
| 2355 |
+
new_obj._freq = self.freq
|
| 2356 |
+
return new_obj
|
| 2357 |
+
|
| 2358 |
+
def interpolate(
|
| 2359 |
+
self,
|
| 2360 |
+
*,
|
| 2361 |
+
method: InterpolateOptions,
|
| 2362 |
+
axis: int,
|
| 2363 |
+
index: Index,
|
| 2364 |
+
limit,
|
| 2365 |
+
limit_direction,
|
| 2366 |
+
limit_area,
|
| 2367 |
+
copy: bool,
|
| 2368 |
+
**kwargs,
|
| 2369 |
+
) -> Self:
|
| 2370 |
+
"""
|
| 2371 |
+
See NDFrame.interpolate.__doc__.
|
| 2372 |
+
"""
|
| 2373 |
+
# NB: we return type(self) even if copy=False
|
| 2374 |
+
if method != "linear":
|
| 2375 |
+
raise NotImplementedError
|
| 2376 |
+
|
| 2377 |
+
if not copy:
|
| 2378 |
+
out_data = self._ndarray
|
| 2379 |
+
else:
|
| 2380 |
+
out_data = self._ndarray.copy()
|
| 2381 |
+
|
| 2382 |
+
missing.interpolate_2d_inplace(
|
| 2383 |
+
out_data,
|
| 2384 |
+
method=method,
|
| 2385 |
+
axis=axis,
|
| 2386 |
+
index=index,
|
| 2387 |
+
limit=limit,
|
| 2388 |
+
limit_direction=limit_direction,
|
| 2389 |
+
limit_area=limit_area,
|
| 2390 |
+
**kwargs,
|
| 2391 |
+
)
|
| 2392 |
+
if not copy:
|
| 2393 |
+
return self
|
| 2394 |
+
return type(self)._simple_new(out_data, dtype=self.dtype)
|
| 2395 |
+
|
| 2396 |
+
# --------------------------------------------------------------
|
| 2397 |
+
# Unsorted
|
| 2398 |
+
|
| 2399 |
+
@property
|
| 2400 |
+
def _is_dates_only(self) -> bool:
|
| 2401 |
+
"""
|
| 2402 |
+
Check if we are round times at midnight (and no timezone), which will
|
| 2403 |
+
be given a more compact __repr__ than other cases. For TimedeltaArray
|
| 2404 |
+
we are checking for multiples of 24H.
|
| 2405 |
+
"""
|
| 2406 |
+
if not lib.is_np_dtype(self.dtype):
|
| 2407 |
+
# i.e. we have a timezone
|
| 2408 |
+
return False
|
| 2409 |
+
|
| 2410 |
+
values_int = self.asi8
|
| 2411 |
+
consider_values = values_int != iNaT
|
| 2412 |
+
reso = get_unit_from_dtype(self.dtype)
|
| 2413 |
+
ppd = periods_per_day(reso)
|
| 2414 |
+
|
| 2415 |
+
# TODO: can we reuse is_date_array_normalized? would need a skipna kwd
|
| 2416 |
+
# (first attempt at this was less performant than this implementation)
|
| 2417 |
+
even_days = np.logical_and(consider_values, values_int % ppd != 0).sum() == 0
|
| 2418 |
+
return even_days
|
| 2419 |
+
|
| 2420 |
+
|
| 2421 |
+
# -------------------------------------------------------------------
|
| 2422 |
+
# Shared Constructor Helpers
|
| 2423 |
+
|
| 2424 |
+
|
| 2425 |
+
def ensure_arraylike_for_datetimelike(
|
| 2426 |
+
data, copy: bool, cls_name: str
|
| 2427 |
+
) -> tuple[ArrayLike, bool]:
|
| 2428 |
+
if not hasattr(data, "dtype"):
|
| 2429 |
+
# e.g. list, tuple
|
| 2430 |
+
if not isinstance(data, (list, tuple)) and np.ndim(data) == 0:
|
| 2431 |
+
# i.e. generator
|
| 2432 |
+
data = list(data)
|
| 2433 |
+
|
| 2434 |
+
data = construct_1d_object_array_from_listlike(data)
|
| 2435 |
+
copy = False
|
| 2436 |
+
elif isinstance(data, ABCMultiIndex):
|
| 2437 |
+
raise TypeError(f"Cannot create a {cls_name} from a MultiIndex.")
|
| 2438 |
+
else:
|
| 2439 |
+
data = extract_array(data, extract_numpy=True)
|
| 2440 |
+
|
| 2441 |
+
if isinstance(data, IntegerArray) or (
|
| 2442 |
+
isinstance(data, ArrowExtensionArray) and data.dtype.kind in "iu"
|
| 2443 |
+
):
|
| 2444 |
+
data = data.to_numpy("int64", na_value=iNaT)
|
| 2445 |
+
copy = False
|
| 2446 |
+
elif isinstance(data, ArrowExtensionArray):
|
| 2447 |
+
data = data._maybe_convert_datelike_array()
|
| 2448 |
+
data = data.to_numpy()
|
| 2449 |
+
copy = False
|
| 2450 |
+
elif not isinstance(data, (np.ndarray, ExtensionArray)):
|
| 2451 |
+
# GH#24539 e.g. xarray, dask object
|
| 2452 |
+
data = np.asarray(data)
|
| 2453 |
+
|
| 2454 |
+
elif isinstance(data, ABCCategorical):
|
| 2455 |
+
# GH#18664 preserve tz in going DTI->Categorical->DTI
|
| 2456 |
+
# TODO: cases where we need to do another pass through maybe_convert_dtype,
|
| 2457 |
+
# e.g. the categories are timedelta64s
|
| 2458 |
+
data = data.categories.take(data.codes, fill_value=NaT)._values
|
| 2459 |
+
copy = False
|
| 2460 |
+
|
| 2461 |
+
return data, copy
|
| 2462 |
+
|
| 2463 |
+
|
| 2464 |
+
@overload
|
| 2465 |
+
def validate_periods(periods: None) -> None:
|
| 2466 |
+
...
|
| 2467 |
+
|
| 2468 |
+
|
| 2469 |
+
@overload
|
| 2470 |
+
def validate_periods(periods: int | float) -> int:
|
| 2471 |
+
...
|
| 2472 |
+
|
| 2473 |
+
|
| 2474 |
+
def validate_periods(periods: int | float | None) -> int | None:
|
| 2475 |
+
"""
|
| 2476 |
+
If a `periods` argument is passed to the Datetime/Timedelta Array/Index
|
| 2477 |
+
constructor, cast it to an integer.
|
| 2478 |
+
|
| 2479 |
+
Parameters
|
| 2480 |
+
----------
|
| 2481 |
+
periods : None, float, int
|
| 2482 |
+
|
| 2483 |
+
Returns
|
| 2484 |
+
-------
|
| 2485 |
+
periods : None or int
|
| 2486 |
+
|
| 2487 |
+
Raises
|
| 2488 |
+
------
|
| 2489 |
+
TypeError
|
| 2490 |
+
if periods is None, float, or int
|
| 2491 |
+
"""
|
| 2492 |
+
if periods is not None:
|
| 2493 |
+
if lib.is_float(periods):
|
| 2494 |
+
warnings.warn(
|
| 2495 |
+
# GH#56036
|
| 2496 |
+
"Non-integer 'periods' in pd.date_range, pd.timedelta_range, "
|
| 2497 |
+
"pd.period_range, and pd.interval_range are deprecated and "
|
| 2498 |
+
"will raise in a future version.",
|
| 2499 |
+
FutureWarning,
|
| 2500 |
+
stacklevel=find_stack_level(),
|
| 2501 |
+
)
|
| 2502 |
+
periods = int(periods)
|
| 2503 |
+
elif not lib.is_integer(periods):
|
| 2504 |
+
raise TypeError(f"periods must be a number, got {periods}")
|
| 2505 |
+
return periods
|
| 2506 |
+
|
| 2507 |
+
|
| 2508 |
+
def _validate_inferred_freq(
|
| 2509 |
+
freq: BaseOffset | None, inferred_freq: BaseOffset | None
|
| 2510 |
+
) -> BaseOffset | None:
|
| 2511 |
+
"""
|
| 2512 |
+
If the user passes a freq and another freq is inferred from passed data,
|
| 2513 |
+
require that they match.
|
| 2514 |
+
|
| 2515 |
+
Parameters
|
| 2516 |
+
----------
|
| 2517 |
+
freq : DateOffset or None
|
| 2518 |
+
inferred_freq : DateOffset or None
|
| 2519 |
+
|
| 2520 |
+
Returns
|
| 2521 |
+
-------
|
| 2522 |
+
freq : DateOffset or None
|
| 2523 |
+
"""
|
| 2524 |
+
if inferred_freq is not None:
|
| 2525 |
+
if freq is not None and freq != inferred_freq:
|
| 2526 |
+
raise ValueError(
|
| 2527 |
+
f"Inferred frequency {inferred_freq} from passed "
|
| 2528 |
+
"values does not conform to passed frequency "
|
| 2529 |
+
f"{freq.freqstr}"
|
| 2530 |
+
)
|
| 2531 |
+
if freq is None:
|
| 2532 |
+
freq = inferred_freq
|
| 2533 |
+
|
| 2534 |
+
return freq
|
| 2535 |
+
|
| 2536 |
+
|
| 2537 |
+
def dtype_to_unit(dtype: DatetimeTZDtype | np.dtype | ArrowDtype) -> str:
|
| 2538 |
+
"""
|
| 2539 |
+
Return the unit str corresponding to the dtype's resolution.
|
| 2540 |
+
|
| 2541 |
+
Parameters
|
| 2542 |
+
----------
|
| 2543 |
+
dtype : DatetimeTZDtype or np.dtype
|
| 2544 |
+
If np.dtype, we assume it is a datetime64 dtype.
|
| 2545 |
+
|
| 2546 |
+
Returns
|
| 2547 |
+
-------
|
| 2548 |
+
str
|
| 2549 |
+
"""
|
| 2550 |
+
if isinstance(dtype, DatetimeTZDtype):
|
| 2551 |
+
return dtype.unit
|
| 2552 |
+
elif isinstance(dtype, ArrowDtype):
|
| 2553 |
+
if dtype.kind not in "mM":
|
| 2554 |
+
raise ValueError(f"{dtype=} does not have a resolution.")
|
| 2555 |
+
return dtype.pyarrow_dtype.unit
|
| 2556 |
+
return np.datetime_data(dtype)[0]
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/arrays/datetimes.py
ADDED
|
@@ -0,0 +1,2820 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from datetime import (
|
| 4 |
+
datetime,
|
| 5 |
+
timedelta,
|
| 6 |
+
tzinfo,
|
| 7 |
+
)
|
| 8 |
+
from typing import (
|
| 9 |
+
TYPE_CHECKING,
|
| 10 |
+
cast,
|
| 11 |
+
overload,
|
| 12 |
+
)
|
| 13 |
+
import warnings
|
| 14 |
+
|
| 15 |
+
import numpy as np
|
| 16 |
+
|
| 17 |
+
from pandas._libs import (
|
| 18 |
+
lib,
|
| 19 |
+
tslib,
|
| 20 |
+
)
|
| 21 |
+
from pandas._libs.tslibs import (
|
| 22 |
+
BaseOffset,
|
| 23 |
+
NaT,
|
| 24 |
+
NaTType,
|
| 25 |
+
Resolution,
|
| 26 |
+
Timestamp,
|
| 27 |
+
astype_overflowsafe,
|
| 28 |
+
fields,
|
| 29 |
+
get_resolution,
|
| 30 |
+
get_supported_dtype,
|
| 31 |
+
get_unit_from_dtype,
|
| 32 |
+
ints_to_pydatetime,
|
| 33 |
+
is_date_array_normalized,
|
| 34 |
+
is_supported_dtype,
|
| 35 |
+
is_unitless,
|
| 36 |
+
normalize_i8_timestamps,
|
| 37 |
+
timezones,
|
| 38 |
+
to_offset,
|
| 39 |
+
tz_convert_from_utc,
|
| 40 |
+
tzconversion,
|
| 41 |
+
)
|
| 42 |
+
from pandas._libs.tslibs.dtypes import abbrev_to_npy_unit
|
| 43 |
+
from pandas.errors import PerformanceWarning
|
| 44 |
+
from pandas.util._exceptions import find_stack_level
|
| 45 |
+
from pandas.util._validators import validate_inclusive
|
| 46 |
+
|
| 47 |
+
from pandas.core.dtypes.common import (
|
| 48 |
+
DT64NS_DTYPE,
|
| 49 |
+
INT64_DTYPE,
|
| 50 |
+
is_bool_dtype,
|
| 51 |
+
is_float_dtype,
|
| 52 |
+
is_string_dtype,
|
| 53 |
+
pandas_dtype,
|
| 54 |
+
)
|
| 55 |
+
from pandas.core.dtypes.dtypes import (
|
| 56 |
+
DatetimeTZDtype,
|
| 57 |
+
ExtensionDtype,
|
| 58 |
+
PeriodDtype,
|
| 59 |
+
)
|
| 60 |
+
from pandas.core.dtypes.missing import isna
|
| 61 |
+
|
| 62 |
+
from pandas.core.arrays import datetimelike as dtl
|
| 63 |
+
from pandas.core.arrays._ranges import generate_regular_range
|
| 64 |
+
import pandas.core.common as com
|
| 65 |
+
|
| 66 |
+
from pandas.tseries.frequencies import get_period_alias
|
| 67 |
+
from pandas.tseries.offsets import (
|
| 68 |
+
Day,
|
| 69 |
+
Tick,
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
if TYPE_CHECKING:
|
| 73 |
+
from collections.abc import Iterator
|
| 74 |
+
|
| 75 |
+
from pandas._typing import (
|
| 76 |
+
ArrayLike,
|
| 77 |
+
DateTimeErrorChoices,
|
| 78 |
+
DtypeObj,
|
| 79 |
+
IntervalClosedType,
|
| 80 |
+
Self,
|
| 81 |
+
TimeAmbiguous,
|
| 82 |
+
TimeNonexistent,
|
| 83 |
+
npt,
|
| 84 |
+
)
|
| 85 |
+
|
| 86 |
+
from pandas import DataFrame
|
| 87 |
+
from pandas.core.arrays import PeriodArray
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
_ITER_CHUNKSIZE = 10_000
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
@overload
|
| 94 |
+
def tz_to_dtype(tz: tzinfo, unit: str = ...) -> DatetimeTZDtype:
|
| 95 |
+
...
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
@overload
|
| 99 |
+
def tz_to_dtype(tz: None, unit: str = ...) -> np.dtype[np.datetime64]:
|
| 100 |
+
...
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
def tz_to_dtype(
|
| 104 |
+
tz: tzinfo | None, unit: str = "ns"
|
| 105 |
+
) -> np.dtype[np.datetime64] | DatetimeTZDtype:
|
| 106 |
+
"""
|
| 107 |
+
Return a datetime64[ns] dtype appropriate for the given timezone.
|
| 108 |
+
|
| 109 |
+
Parameters
|
| 110 |
+
----------
|
| 111 |
+
tz : tzinfo or None
|
| 112 |
+
unit : str, default "ns"
|
| 113 |
+
|
| 114 |
+
Returns
|
| 115 |
+
-------
|
| 116 |
+
np.dtype or Datetime64TZDType
|
| 117 |
+
"""
|
| 118 |
+
if tz is None:
|
| 119 |
+
return np.dtype(f"M8[{unit}]")
|
| 120 |
+
else:
|
| 121 |
+
return DatetimeTZDtype(tz=tz, unit=unit)
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def _field_accessor(name: str, field: str, docstring: str | None = None):
|
| 125 |
+
def f(self):
|
| 126 |
+
values = self._local_timestamps()
|
| 127 |
+
|
| 128 |
+
if field in self._bool_ops:
|
| 129 |
+
result: np.ndarray
|
| 130 |
+
|
| 131 |
+
if field.endswith(("start", "end")):
|
| 132 |
+
freq = self.freq
|
| 133 |
+
month_kw = 12
|
| 134 |
+
if freq:
|
| 135 |
+
kwds = freq.kwds
|
| 136 |
+
month_kw = kwds.get("startingMonth", kwds.get("month", 12))
|
| 137 |
+
|
| 138 |
+
result = fields.get_start_end_field(
|
| 139 |
+
values, field, self.freqstr, month_kw, reso=self._creso
|
| 140 |
+
)
|
| 141 |
+
else:
|
| 142 |
+
result = fields.get_date_field(values, field, reso=self._creso)
|
| 143 |
+
|
| 144 |
+
# these return a boolean by-definition
|
| 145 |
+
return result
|
| 146 |
+
|
| 147 |
+
if field in self._object_ops:
|
| 148 |
+
result = fields.get_date_name_field(values, field, reso=self._creso)
|
| 149 |
+
result = self._maybe_mask_results(result, fill_value=None)
|
| 150 |
+
|
| 151 |
+
else:
|
| 152 |
+
result = fields.get_date_field(values, field, reso=self._creso)
|
| 153 |
+
result = self._maybe_mask_results(
|
| 154 |
+
result, fill_value=None, convert="float64"
|
| 155 |
+
)
|
| 156 |
+
|
| 157 |
+
return result
|
| 158 |
+
|
| 159 |
+
f.__name__ = name
|
| 160 |
+
f.__doc__ = docstring
|
| 161 |
+
return property(f)
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
# error: Definition of "_concat_same_type" in base class "NDArrayBacked" is
|
| 165 |
+
# incompatible with definition in base class "ExtensionArray"
|
| 166 |
+
class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps): # type: ignore[misc]
|
| 167 |
+
"""
|
| 168 |
+
Pandas ExtensionArray for tz-naive or tz-aware datetime data.
|
| 169 |
+
|
| 170 |
+
.. warning::
|
| 171 |
+
|
| 172 |
+
DatetimeArray is currently experimental, and its API may change
|
| 173 |
+
without warning. In particular, :attr:`DatetimeArray.dtype` is
|
| 174 |
+
expected to change to always be an instance of an ``ExtensionDtype``
|
| 175 |
+
subclass.
|
| 176 |
+
|
| 177 |
+
Parameters
|
| 178 |
+
----------
|
| 179 |
+
values : Series, Index, DatetimeArray, ndarray
|
| 180 |
+
The datetime data.
|
| 181 |
+
|
| 182 |
+
For DatetimeArray `values` (or a Series or Index boxing one),
|
| 183 |
+
`dtype` and `freq` will be extracted from `values`.
|
| 184 |
+
|
| 185 |
+
dtype : numpy.dtype or DatetimeTZDtype
|
| 186 |
+
Note that the only NumPy dtype allowed is 'datetime64[ns]'.
|
| 187 |
+
freq : str or Offset, optional
|
| 188 |
+
The frequency.
|
| 189 |
+
copy : bool, default False
|
| 190 |
+
Whether to copy the underlying array of values.
|
| 191 |
+
|
| 192 |
+
Attributes
|
| 193 |
+
----------
|
| 194 |
+
None
|
| 195 |
+
|
| 196 |
+
Methods
|
| 197 |
+
-------
|
| 198 |
+
None
|
| 199 |
+
|
| 200 |
+
Examples
|
| 201 |
+
--------
|
| 202 |
+
>>> pd.arrays.DatetimeArray._from_sequence(
|
| 203 |
+
... pd.DatetimeIndex(['2023-01-01', '2023-01-02'], freq='D'))
|
| 204 |
+
<DatetimeArray>
|
| 205 |
+
['2023-01-01 00:00:00', '2023-01-02 00:00:00']
|
| 206 |
+
Length: 2, dtype: datetime64[ns]
|
| 207 |
+
"""
|
| 208 |
+
|
| 209 |
+
_typ = "datetimearray"
|
| 210 |
+
_internal_fill_value = np.datetime64("NaT", "ns")
|
| 211 |
+
_recognized_scalars = (datetime, np.datetime64)
|
| 212 |
+
_is_recognized_dtype = lambda x: lib.is_np_dtype(x, "M") or isinstance(
|
| 213 |
+
x, DatetimeTZDtype
|
| 214 |
+
)
|
| 215 |
+
_infer_matches = ("datetime", "datetime64", "date")
|
| 216 |
+
|
| 217 |
+
@property
|
| 218 |
+
def _scalar_type(self) -> type[Timestamp]:
|
| 219 |
+
return Timestamp
|
| 220 |
+
|
| 221 |
+
# define my properties & methods for delegation
|
| 222 |
+
_bool_ops: list[str] = [
|
| 223 |
+
"is_month_start",
|
| 224 |
+
"is_month_end",
|
| 225 |
+
"is_quarter_start",
|
| 226 |
+
"is_quarter_end",
|
| 227 |
+
"is_year_start",
|
| 228 |
+
"is_year_end",
|
| 229 |
+
"is_leap_year",
|
| 230 |
+
]
|
| 231 |
+
_object_ops: list[str] = ["freq", "tz"]
|
| 232 |
+
_field_ops: list[str] = [
|
| 233 |
+
"year",
|
| 234 |
+
"month",
|
| 235 |
+
"day",
|
| 236 |
+
"hour",
|
| 237 |
+
"minute",
|
| 238 |
+
"second",
|
| 239 |
+
"weekday",
|
| 240 |
+
"dayofweek",
|
| 241 |
+
"day_of_week",
|
| 242 |
+
"dayofyear",
|
| 243 |
+
"day_of_year",
|
| 244 |
+
"quarter",
|
| 245 |
+
"days_in_month",
|
| 246 |
+
"daysinmonth",
|
| 247 |
+
"microsecond",
|
| 248 |
+
"nanosecond",
|
| 249 |
+
]
|
| 250 |
+
_other_ops: list[str] = ["date", "time", "timetz"]
|
| 251 |
+
_datetimelike_ops: list[str] = (
|
| 252 |
+
_field_ops + _object_ops + _bool_ops + _other_ops + ["unit"]
|
| 253 |
+
)
|
| 254 |
+
_datetimelike_methods: list[str] = [
|
| 255 |
+
"to_period",
|
| 256 |
+
"tz_localize",
|
| 257 |
+
"tz_convert",
|
| 258 |
+
"normalize",
|
| 259 |
+
"strftime",
|
| 260 |
+
"round",
|
| 261 |
+
"floor",
|
| 262 |
+
"ceil",
|
| 263 |
+
"month_name",
|
| 264 |
+
"day_name",
|
| 265 |
+
"as_unit",
|
| 266 |
+
]
|
| 267 |
+
|
| 268 |
+
# ndim is inherited from ExtensionArray, must exist to ensure
|
| 269 |
+
# Timestamp.__richcmp__(DateTimeArray) operates pointwise
|
| 270 |
+
|
| 271 |
+
# ensure that operations with numpy arrays defer to our implementation
|
| 272 |
+
__array_priority__ = 1000
|
| 273 |
+
|
| 274 |
+
# -----------------------------------------------------------------
|
| 275 |
+
# Constructors
|
| 276 |
+
|
| 277 |
+
_dtype: np.dtype[np.datetime64] | DatetimeTZDtype
|
| 278 |
+
_freq: BaseOffset | None = None
|
| 279 |
+
_default_dtype = DT64NS_DTYPE # used in TimeLikeOps.__init__
|
| 280 |
+
|
| 281 |
+
@classmethod
|
| 282 |
+
def _from_scalars(cls, scalars, *, dtype: DtypeObj) -> Self:
|
| 283 |
+
if lib.infer_dtype(scalars, skipna=True) not in ["datetime", "datetime64"]:
|
| 284 |
+
# TODO: require any NAs be valid-for-DTA
|
| 285 |
+
# TODO: if dtype is passed, check for tzawareness compat?
|
| 286 |
+
raise ValueError
|
| 287 |
+
return cls._from_sequence(scalars, dtype=dtype)
|
| 288 |
+
|
| 289 |
+
@classmethod
|
| 290 |
+
def _validate_dtype(cls, values, dtype):
|
| 291 |
+
# used in TimeLikeOps.__init__
|
| 292 |
+
dtype = _validate_dt64_dtype(dtype)
|
| 293 |
+
_validate_dt64_dtype(values.dtype)
|
| 294 |
+
if isinstance(dtype, np.dtype):
|
| 295 |
+
if values.dtype != dtype:
|
| 296 |
+
raise ValueError("Values resolution does not match dtype.")
|
| 297 |
+
else:
|
| 298 |
+
vunit = np.datetime_data(values.dtype)[0]
|
| 299 |
+
if vunit != dtype.unit:
|
| 300 |
+
raise ValueError("Values resolution does not match dtype.")
|
| 301 |
+
return dtype
|
| 302 |
+
|
| 303 |
+
# error: Signature of "_simple_new" incompatible with supertype "NDArrayBacked"
|
| 304 |
+
@classmethod
|
| 305 |
+
def _simple_new( # type: ignore[override]
|
| 306 |
+
cls,
|
| 307 |
+
values: npt.NDArray[np.datetime64],
|
| 308 |
+
freq: BaseOffset | None = None,
|
| 309 |
+
dtype: np.dtype[np.datetime64] | DatetimeTZDtype = DT64NS_DTYPE,
|
| 310 |
+
) -> Self:
|
| 311 |
+
assert isinstance(values, np.ndarray)
|
| 312 |
+
assert dtype.kind == "M"
|
| 313 |
+
if isinstance(dtype, np.dtype):
|
| 314 |
+
assert dtype == values.dtype
|
| 315 |
+
assert not is_unitless(dtype)
|
| 316 |
+
else:
|
| 317 |
+
# DatetimeTZDtype. If we have e.g. DatetimeTZDtype[us, UTC],
|
| 318 |
+
# then values.dtype should be M8[us].
|
| 319 |
+
assert dtype._creso == get_unit_from_dtype(values.dtype)
|
| 320 |
+
|
| 321 |
+
result = super()._simple_new(values, dtype)
|
| 322 |
+
result._freq = freq
|
| 323 |
+
return result
|
| 324 |
+
|
| 325 |
+
@classmethod
|
| 326 |
+
def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False):
|
| 327 |
+
return cls._from_sequence_not_strict(scalars, dtype=dtype, copy=copy)
|
| 328 |
+
|
| 329 |
+
@classmethod
|
| 330 |
+
def _from_sequence_not_strict(
|
| 331 |
+
cls,
|
| 332 |
+
data,
|
| 333 |
+
*,
|
| 334 |
+
dtype=None,
|
| 335 |
+
copy: bool = False,
|
| 336 |
+
tz=lib.no_default,
|
| 337 |
+
freq: str | BaseOffset | lib.NoDefault | None = lib.no_default,
|
| 338 |
+
dayfirst: bool = False,
|
| 339 |
+
yearfirst: bool = False,
|
| 340 |
+
ambiguous: TimeAmbiguous = "raise",
|
| 341 |
+
) -> Self:
|
| 342 |
+
"""
|
| 343 |
+
A non-strict version of _from_sequence, called from DatetimeIndex.__new__.
|
| 344 |
+
"""
|
| 345 |
+
|
| 346 |
+
# if the user either explicitly passes tz=None or a tz-naive dtype, we
|
| 347 |
+
# disallows inferring a tz.
|
| 348 |
+
explicit_tz_none = tz is None
|
| 349 |
+
if tz is lib.no_default:
|
| 350 |
+
tz = None
|
| 351 |
+
else:
|
| 352 |
+
tz = timezones.maybe_get_tz(tz)
|
| 353 |
+
|
| 354 |
+
dtype = _validate_dt64_dtype(dtype)
|
| 355 |
+
# if dtype has an embedded tz, capture it
|
| 356 |
+
tz = _validate_tz_from_dtype(dtype, tz, explicit_tz_none)
|
| 357 |
+
|
| 358 |
+
unit = None
|
| 359 |
+
if dtype is not None:
|
| 360 |
+
unit = dtl.dtype_to_unit(dtype)
|
| 361 |
+
|
| 362 |
+
data, copy = dtl.ensure_arraylike_for_datetimelike(
|
| 363 |
+
data, copy, cls_name="DatetimeArray"
|
| 364 |
+
)
|
| 365 |
+
inferred_freq = None
|
| 366 |
+
if isinstance(data, DatetimeArray):
|
| 367 |
+
inferred_freq = data.freq
|
| 368 |
+
|
| 369 |
+
subarr, tz = _sequence_to_dt64(
|
| 370 |
+
data,
|
| 371 |
+
copy=copy,
|
| 372 |
+
tz=tz,
|
| 373 |
+
dayfirst=dayfirst,
|
| 374 |
+
yearfirst=yearfirst,
|
| 375 |
+
ambiguous=ambiguous,
|
| 376 |
+
out_unit=unit,
|
| 377 |
+
)
|
| 378 |
+
# We have to call this again after possibly inferring a tz above
|
| 379 |
+
_validate_tz_from_dtype(dtype, tz, explicit_tz_none)
|
| 380 |
+
if tz is not None and explicit_tz_none:
|
| 381 |
+
raise ValueError(
|
| 382 |
+
"Passed data is timezone-aware, incompatible with 'tz=None'. "
|
| 383 |
+
"Use obj.tz_localize(None) instead."
|
| 384 |
+
)
|
| 385 |
+
|
| 386 |
+
data_unit = np.datetime_data(subarr.dtype)[0]
|
| 387 |
+
data_dtype = tz_to_dtype(tz, data_unit)
|
| 388 |
+
result = cls._simple_new(subarr, freq=inferred_freq, dtype=data_dtype)
|
| 389 |
+
if unit is not None and unit != result.unit:
|
| 390 |
+
# If unit was specified in user-passed dtype, cast to it here
|
| 391 |
+
result = result.as_unit(unit)
|
| 392 |
+
|
| 393 |
+
validate_kwds = {"ambiguous": ambiguous}
|
| 394 |
+
result._maybe_pin_freq(freq, validate_kwds)
|
| 395 |
+
return result
|
| 396 |
+
|
| 397 |
+
@classmethod
|
| 398 |
+
def _generate_range(
|
| 399 |
+
cls,
|
| 400 |
+
start,
|
| 401 |
+
end,
|
| 402 |
+
periods: int | None,
|
| 403 |
+
freq,
|
| 404 |
+
tz=None,
|
| 405 |
+
normalize: bool = False,
|
| 406 |
+
ambiguous: TimeAmbiguous = "raise",
|
| 407 |
+
nonexistent: TimeNonexistent = "raise",
|
| 408 |
+
inclusive: IntervalClosedType = "both",
|
| 409 |
+
*,
|
| 410 |
+
unit: str | None = None,
|
| 411 |
+
) -> Self:
|
| 412 |
+
periods = dtl.validate_periods(periods)
|
| 413 |
+
if freq is None and any(x is None for x in [periods, start, end]):
|
| 414 |
+
raise ValueError("Must provide freq argument if no data is supplied")
|
| 415 |
+
|
| 416 |
+
if com.count_not_none(start, end, periods, freq) != 3:
|
| 417 |
+
raise ValueError(
|
| 418 |
+
"Of the four parameters: start, end, periods, "
|
| 419 |
+
"and freq, exactly three must be specified"
|
| 420 |
+
)
|
| 421 |
+
freq = to_offset(freq)
|
| 422 |
+
|
| 423 |
+
if start is not None:
|
| 424 |
+
start = Timestamp(start)
|
| 425 |
+
|
| 426 |
+
if end is not None:
|
| 427 |
+
end = Timestamp(end)
|
| 428 |
+
|
| 429 |
+
if start is NaT or end is NaT:
|
| 430 |
+
raise ValueError("Neither `start` nor `end` can be NaT")
|
| 431 |
+
|
| 432 |
+
if unit is not None:
|
| 433 |
+
if unit not in ["s", "ms", "us", "ns"]:
|
| 434 |
+
raise ValueError("'unit' must be one of 's', 'ms', 'us', 'ns'")
|
| 435 |
+
else:
|
| 436 |
+
unit = "ns"
|
| 437 |
+
|
| 438 |
+
if start is not None:
|
| 439 |
+
start = start.as_unit(unit, round_ok=False)
|
| 440 |
+
if end is not None:
|
| 441 |
+
end = end.as_unit(unit, round_ok=False)
|
| 442 |
+
|
| 443 |
+
left_inclusive, right_inclusive = validate_inclusive(inclusive)
|
| 444 |
+
start, end = _maybe_normalize_endpoints(start, end, normalize)
|
| 445 |
+
tz = _infer_tz_from_endpoints(start, end, tz)
|
| 446 |
+
|
| 447 |
+
if tz is not None:
|
| 448 |
+
# Localize the start and end arguments
|
| 449 |
+
start = _maybe_localize_point(start, freq, tz, ambiguous, nonexistent)
|
| 450 |
+
end = _maybe_localize_point(end, freq, tz, ambiguous, nonexistent)
|
| 451 |
+
|
| 452 |
+
if freq is not None:
|
| 453 |
+
# We break Day arithmetic (fixed 24 hour) here and opt for
|
| 454 |
+
# Day to mean calendar day (23/24/25 hour). Therefore, strip
|
| 455 |
+
# tz info from start and day to avoid DST arithmetic
|
| 456 |
+
if isinstance(freq, Day):
|
| 457 |
+
if start is not None:
|
| 458 |
+
start = start.tz_localize(None)
|
| 459 |
+
if end is not None:
|
| 460 |
+
end = end.tz_localize(None)
|
| 461 |
+
|
| 462 |
+
if isinstance(freq, Tick):
|
| 463 |
+
i8values = generate_regular_range(start, end, periods, freq, unit=unit)
|
| 464 |
+
else:
|
| 465 |
+
xdr = _generate_range(
|
| 466 |
+
start=start, end=end, periods=periods, offset=freq, unit=unit
|
| 467 |
+
)
|
| 468 |
+
i8values = np.array([x._value for x in xdr], dtype=np.int64)
|
| 469 |
+
|
| 470 |
+
endpoint_tz = start.tz if start is not None else end.tz
|
| 471 |
+
|
| 472 |
+
if tz is not None and endpoint_tz is None:
|
| 473 |
+
if not timezones.is_utc(tz):
|
| 474 |
+
# short-circuit tz_localize_to_utc which would make
|
| 475 |
+
# an unnecessary copy with UTC but be a no-op.
|
| 476 |
+
creso = abbrev_to_npy_unit(unit)
|
| 477 |
+
i8values = tzconversion.tz_localize_to_utc(
|
| 478 |
+
i8values,
|
| 479 |
+
tz,
|
| 480 |
+
ambiguous=ambiguous,
|
| 481 |
+
nonexistent=nonexistent,
|
| 482 |
+
creso=creso,
|
| 483 |
+
)
|
| 484 |
+
|
| 485 |
+
# i8values is localized datetime64 array -> have to convert
|
| 486 |
+
# start/end as well to compare
|
| 487 |
+
if start is not None:
|
| 488 |
+
start = start.tz_localize(tz, ambiguous, nonexistent)
|
| 489 |
+
if end is not None:
|
| 490 |
+
end = end.tz_localize(tz, ambiguous, nonexistent)
|
| 491 |
+
else:
|
| 492 |
+
# Create a linearly spaced date_range in local time
|
| 493 |
+
# Nanosecond-granularity timestamps aren't always correctly
|
| 494 |
+
# representable with doubles, so we limit the range that we
|
| 495 |
+
# pass to np.linspace as much as possible
|
| 496 |
+
periods = cast(int, periods)
|
| 497 |
+
i8values = (
|
| 498 |
+
np.linspace(0, end._value - start._value, periods, dtype="int64")
|
| 499 |
+
+ start._value
|
| 500 |
+
)
|
| 501 |
+
if i8values.dtype != "i8":
|
| 502 |
+
# 2022-01-09 I (brock) am not sure if it is possible for this
|
| 503 |
+
# to overflow and cast to e.g. f8, but if it does we need to cast
|
| 504 |
+
i8values = i8values.astype("i8")
|
| 505 |
+
|
| 506 |
+
if start == end:
|
| 507 |
+
if not left_inclusive and not right_inclusive:
|
| 508 |
+
i8values = i8values[1:-1]
|
| 509 |
+
else:
|
| 510 |
+
start_i8 = Timestamp(start)._value
|
| 511 |
+
end_i8 = Timestamp(end)._value
|
| 512 |
+
if not left_inclusive or not right_inclusive:
|
| 513 |
+
if not left_inclusive and len(i8values) and i8values[0] == start_i8:
|
| 514 |
+
i8values = i8values[1:]
|
| 515 |
+
if not right_inclusive and len(i8values) and i8values[-1] == end_i8:
|
| 516 |
+
i8values = i8values[:-1]
|
| 517 |
+
|
| 518 |
+
dt64_values = i8values.view(f"datetime64[{unit}]")
|
| 519 |
+
dtype = tz_to_dtype(tz, unit=unit)
|
| 520 |
+
return cls._simple_new(dt64_values, freq=freq, dtype=dtype)
|
| 521 |
+
|
| 522 |
+
# -----------------------------------------------------------------
|
| 523 |
+
# DatetimeLike Interface
|
| 524 |
+
|
| 525 |
+
def _unbox_scalar(self, value) -> np.datetime64:
|
| 526 |
+
if not isinstance(value, self._scalar_type) and value is not NaT:
|
| 527 |
+
raise ValueError("'value' should be a Timestamp.")
|
| 528 |
+
self._check_compatible_with(value)
|
| 529 |
+
if value is NaT:
|
| 530 |
+
return np.datetime64(value._value, self.unit)
|
| 531 |
+
else:
|
| 532 |
+
return value.as_unit(self.unit).asm8
|
| 533 |
+
|
| 534 |
+
def _scalar_from_string(self, value) -> Timestamp | NaTType:
|
| 535 |
+
return Timestamp(value, tz=self.tz)
|
| 536 |
+
|
| 537 |
+
def _check_compatible_with(self, other) -> None:
|
| 538 |
+
if other is NaT:
|
| 539 |
+
return
|
| 540 |
+
self._assert_tzawareness_compat(other)
|
| 541 |
+
|
| 542 |
+
# -----------------------------------------------------------------
|
| 543 |
+
# Descriptive Properties
|
| 544 |
+
|
| 545 |
+
def _box_func(self, x: np.datetime64) -> Timestamp | NaTType:
|
| 546 |
+
# GH#42228
|
| 547 |
+
value = x.view("i8")
|
| 548 |
+
ts = Timestamp._from_value_and_reso(value, reso=self._creso, tz=self.tz)
|
| 549 |
+
return ts
|
| 550 |
+
|
| 551 |
+
@property
|
| 552 |
+
# error: Return type "Union[dtype, DatetimeTZDtype]" of "dtype"
|
| 553 |
+
# incompatible with return type "ExtensionDtype" in supertype
|
| 554 |
+
# "ExtensionArray"
|
| 555 |
+
def dtype(self) -> np.dtype[np.datetime64] | DatetimeTZDtype: # type: ignore[override]
|
| 556 |
+
"""
|
| 557 |
+
The dtype for the DatetimeArray.
|
| 558 |
+
|
| 559 |
+
.. warning::
|
| 560 |
+
|
| 561 |
+
A future version of pandas will change dtype to never be a
|
| 562 |
+
``numpy.dtype``. Instead, :attr:`DatetimeArray.dtype` will
|
| 563 |
+
always be an instance of an ``ExtensionDtype`` subclass.
|
| 564 |
+
|
| 565 |
+
Returns
|
| 566 |
+
-------
|
| 567 |
+
numpy.dtype or DatetimeTZDtype
|
| 568 |
+
If the values are tz-naive, then ``np.dtype('datetime64[ns]')``
|
| 569 |
+
is returned.
|
| 570 |
+
|
| 571 |
+
If the values are tz-aware, then the ``DatetimeTZDtype``
|
| 572 |
+
is returned.
|
| 573 |
+
"""
|
| 574 |
+
return self._dtype
|
| 575 |
+
|
| 576 |
+
@property
|
| 577 |
+
def tz(self) -> tzinfo | None:
|
| 578 |
+
"""
|
| 579 |
+
Return the timezone.
|
| 580 |
+
|
| 581 |
+
Returns
|
| 582 |
+
-------
|
| 583 |
+
datetime.tzinfo, pytz.tzinfo.BaseTZInfo, dateutil.tz.tz.tzfile, or None
|
| 584 |
+
Returns None when the array is tz-naive.
|
| 585 |
+
|
| 586 |
+
Examples
|
| 587 |
+
--------
|
| 588 |
+
For Series:
|
| 589 |
+
|
| 590 |
+
>>> s = pd.Series(["1/1/2020 10:00:00+00:00", "2/1/2020 11:00:00+00:00"])
|
| 591 |
+
>>> s = pd.to_datetime(s)
|
| 592 |
+
>>> s
|
| 593 |
+
0 2020-01-01 10:00:00+00:00
|
| 594 |
+
1 2020-02-01 11:00:00+00:00
|
| 595 |
+
dtype: datetime64[ns, UTC]
|
| 596 |
+
>>> s.dt.tz
|
| 597 |
+
datetime.timezone.utc
|
| 598 |
+
|
| 599 |
+
For DatetimeIndex:
|
| 600 |
+
|
| 601 |
+
>>> idx = pd.DatetimeIndex(["1/1/2020 10:00:00+00:00",
|
| 602 |
+
... "2/1/2020 11:00:00+00:00"])
|
| 603 |
+
>>> idx.tz
|
| 604 |
+
datetime.timezone.utc
|
| 605 |
+
"""
|
| 606 |
+
# GH 18595
|
| 607 |
+
return getattr(self.dtype, "tz", None)
|
| 608 |
+
|
| 609 |
+
@tz.setter
|
| 610 |
+
def tz(self, value):
|
| 611 |
+
# GH 3746: Prevent localizing or converting the index by setting tz
|
| 612 |
+
raise AttributeError(
|
| 613 |
+
"Cannot directly set timezone. Use tz_localize() "
|
| 614 |
+
"or tz_convert() as appropriate"
|
| 615 |
+
)
|
| 616 |
+
|
| 617 |
+
@property
|
| 618 |
+
def tzinfo(self) -> tzinfo | None:
|
| 619 |
+
"""
|
| 620 |
+
Alias for tz attribute
|
| 621 |
+
"""
|
| 622 |
+
return self.tz
|
| 623 |
+
|
| 624 |
+
@property # NB: override with cache_readonly in immutable subclasses
|
| 625 |
+
def is_normalized(self) -> bool:
|
| 626 |
+
"""
|
| 627 |
+
Returns True if all of the dates are at midnight ("no time")
|
| 628 |
+
"""
|
| 629 |
+
return is_date_array_normalized(self.asi8, self.tz, reso=self._creso)
|
| 630 |
+
|
| 631 |
+
@property # NB: override with cache_readonly in immutable subclasses
|
| 632 |
+
def _resolution_obj(self) -> Resolution:
|
| 633 |
+
return get_resolution(self.asi8, self.tz, reso=self._creso)
|
| 634 |
+
|
| 635 |
+
# ----------------------------------------------------------------
|
| 636 |
+
# Array-Like / EA-Interface Methods
|
| 637 |
+
|
| 638 |
+
def __array__(self, dtype=None, copy=None) -> np.ndarray:
|
| 639 |
+
if dtype is None and self.tz:
|
| 640 |
+
# The default for tz-aware is object, to preserve tz info
|
| 641 |
+
dtype = object
|
| 642 |
+
|
| 643 |
+
return super().__array__(dtype=dtype, copy=copy)
|
| 644 |
+
|
| 645 |
+
def __iter__(self) -> Iterator:
|
| 646 |
+
"""
|
| 647 |
+
Return an iterator over the boxed values
|
| 648 |
+
|
| 649 |
+
Yields
|
| 650 |
+
------
|
| 651 |
+
tstamp : Timestamp
|
| 652 |
+
"""
|
| 653 |
+
if self.ndim > 1:
|
| 654 |
+
for i in range(len(self)):
|
| 655 |
+
yield self[i]
|
| 656 |
+
else:
|
| 657 |
+
# convert in chunks of 10k for efficiency
|
| 658 |
+
data = self.asi8
|
| 659 |
+
length = len(self)
|
| 660 |
+
chunksize = _ITER_CHUNKSIZE
|
| 661 |
+
chunks = (length // chunksize) + 1
|
| 662 |
+
|
| 663 |
+
for i in range(chunks):
|
| 664 |
+
start_i = i * chunksize
|
| 665 |
+
end_i = min((i + 1) * chunksize, length)
|
| 666 |
+
converted = ints_to_pydatetime(
|
| 667 |
+
data[start_i:end_i],
|
| 668 |
+
tz=self.tz,
|
| 669 |
+
box="timestamp",
|
| 670 |
+
reso=self._creso,
|
| 671 |
+
)
|
| 672 |
+
yield from converted
|
| 673 |
+
|
| 674 |
+
def astype(self, dtype, copy: bool = True):
|
| 675 |
+
# We handle
|
| 676 |
+
# --> datetime
|
| 677 |
+
# --> period
|
| 678 |
+
# DatetimeLikeArrayMixin Super handles the rest.
|
| 679 |
+
dtype = pandas_dtype(dtype)
|
| 680 |
+
|
| 681 |
+
if dtype == self.dtype:
|
| 682 |
+
if copy:
|
| 683 |
+
return self.copy()
|
| 684 |
+
return self
|
| 685 |
+
|
| 686 |
+
elif isinstance(dtype, ExtensionDtype):
|
| 687 |
+
if not isinstance(dtype, DatetimeTZDtype):
|
| 688 |
+
# e.g. Sparse[datetime64[ns]]
|
| 689 |
+
return super().astype(dtype, copy=copy)
|
| 690 |
+
elif self.tz is None:
|
| 691 |
+
# pre-2.0 this did self.tz_localize(dtype.tz), which did not match
|
| 692 |
+
# the Series behavior which did
|
| 693 |
+
# values.tz_localize("UTC").tz_convert(dtype.tz)
|
| 694 |
+
raise TypeError(
|
| 695 |
+
"Cannot use .astype to convert from timezone-naive dtype to "
|
| 696 |
+
"timezone-aware dtype. Use obj.tz_localize instead or "
|
| 697 |
+
"series.dt.tz_localize instead"
|
| 698 |
+
)
|
| 699 |
+
else:
|
| 700 |
+
# tzaware unit conversion e.g. datetime64[s, UTC]
|
| 701 |
+
np_dtype = np.dtype(dtype.str)
|
| 702 |
+
res_values = astype_overflowsafe(self._ndarray, np_dtype, copy=copy)
|
| 703 |
+
return type(self)._simple_new(res_values, dtype=dtype, freq=self.freq)
|
| 704 |
+
|
| 705 |
+
elif (
|
| 706 |
+
self.tz is None
|
| 707 |
+
and lib.is_np_dtype(dtype, "M")
|
| 708 |
+
and not is_unitless(dtype)
|
| 709 |
+
and is_supported_dtype(dtype)
|
| 710 |
+
):
|
| 711 |
+
# unit conversion e.g. datetime64[s]
|
| 712 |
+
res_values = astype_overflowsafe(self._ndarray, dtype, copy=True)
|
| 713 |
+
return type(self)._simple_new(res_values, dtype=res_values.dtype)
|
| 714 |
+
# TODO: preserve freq?
|
| 715 |
+
|
| 716 |
+
elif self.tz is not None and lib.is_np_dtype(dtype, "M"):
|
| 717 |
+
# pre-2.0 behavior for DTA/DTI was
|
| 718 |
+
# values.tz_convert("UTC").tz_localize(None), which did not match
|
| 719 |
+
# the Series behavior
|
| 720 |
+
raise TypeError(
|
| 721 |
+
"Cannot use .astype to convert from timezone-aware dtype to "
|
| 722 |
+
"timezone-naive dtype. Use obj.tz_localize(None) or "
|
| 723 |
+
"obj.tz_convert('UTC').tz_localize(None) instead."
|
| 724 |
+
)
|
| 725 |
+
|
| 726 |
+
elif (
|
| 727 |
+
self.tz is None
|
| 728 |
+
and lib.is_np_dtype(dtype, "M")
|
| 729 |
+
and dtype != self.dtype
|
| 730 |
+
and is_unitless(dtype)
|
| 731 |
+
):
|
| 732 |
+
raise TypeError(
|
| 733 |
+
"Casting to unit-less dtype 'datetime64' is not supported. "
|
| 734 |
+
"Pass e.g. 'datetime64[ns]' instead."
|
| 735 |
+
)
|
| 736 |
+
|
| 737 |
+
elif isinstance(dtype, PeriodDtype):
|
| 738 |
+
return self.to_period(freq=dtype.freq)
|
| 739 |
+
return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy)
|
| 740 |
+
|
| 741 |
+
# -----------------------------------------------------------------
|
| 742 |
+
# Rendering Methods
|
| 743 |
+
|
| 744 |
+
def _format_native_types(
|
| 745 |
+
self, *, na_rep: str | float = "NaT", date_format=None, **kwargs
|
| 746 |
+
) -> npt.NDArray[np.object_]:
|
| 747 |
+
if date_format is None and self._is_dates_only:
|
| 748 |
+
# Only dates and no timezone: provide a default format
|
| 749 |
+
date_format = "%Y-%m-%d"
|
| 750 |
+
|
| 751 |
+
return tslib.format_array_from_datetime(
|
| 752 |
+
self.asi8, tz=self.tz, format=date_format, na_rep=na_rep, reso=self._creso
|
| 753 |
+
)
|
| 754 |
+
|
| 755 |
+
# -----------------------------------------------------------------
|
| 756 |
+
# Comparison Methods
|
| 757 |
+
|
| 758 |
+
def _has_same_tz(self, other) -> bool:
|
| 759 |
+
# vzone shouldn't be None if value is non-datetime like
|
| 760 |
+
if isinstance(other, np.datetime64):
|
| 761 |
+
# convert to Timestamp as np.datetime64 doesn't have tz attr
|
| 762 |
+
other = Timestamp(other)
|
| 763 |
+
|
| 764 |
+
if not hasattr(other, "tzinfo"):
|
| 765 |
+
return False
|
| 766 |
+
other_tz = other.tzinfo
|
| 767 |
+
return timezones.tz_compare(self.tzinfo, other_tz)
|
| 768 |
+
|
| 769 |
+
def _assert_tzawareness_compat(self, other) -> None:
|
| 770 |
+
# adapted from _Timestamp._assert_tzawareness_compat
|
| 771 |
+
other_tz = getattr(other, "tzinfo", None)
|
| 772 |
+
other_dtype = getattr(other, "dtype", None)
|
| 773 |
+
|
| 774 |
+
if isinstance(other_dtype, DatetimeTZDtype):
|
| 775 |
+
# Get tzinfo from Series dtype
|
| 776 |
+
other_tz = other.dtype.tz
|
| 777 |
+
if other is NaT:
|
| 778 |
+
# pd.NaT quacks both aware and naive
|
| 779 |
+
pass
|
| 780 |
+
elif self.tz is None:
|
| 781 |
+
if other_tz is not None:
|
| 782 |
+
raise TypeError(
|
| 783 |
+
"Cannot compare tz-naive and tz-aware datetime-like objects."
|
| 784 |
+
)
|
| 785 |
+
elif other_tz is None:
|
| 786 |
+
raise TypeError(
|
| 787 |
+
"Cannot compare tz-naive and tz-aware datetime-like objects"
|
| 788 |
+
)
|
| 789 |
+
|
| 790 |
+
# -----------------------------------------------------------------
|
| 791 |
+
# Arithmetic Methods
|
| 792 |
+
|
| 793 |
+
def _add_offset(self, offset: BaseOffset) -> Self:
|
| 794 |
+
assert not isinstance(offset, Tick)
|
| 795 |
+
|
| 796 |
+
if self.tz is not None:
|
| 797 |
+
values = self.tz_localize(None)
|
| 798 |
+
else:
|
| 799 |
+
values = self
|
| 800 |
+
|
| 801 |
+
try:
|
| 802 |
+
res_values = offset._apply_array(values._ndarray)
|
| 803 |
+
if res_values.dtype.kind == "i":
|
| 804 |
+
# error: Argument 1 to "view" of "ndarray" has incompatible type
|
| 805 |
+
# "dtype[datetime64] | DatetimeTZDtype"; expected
|
| 806 |
+
# "dtype[Any] | type[Any] | _SupportsDType[dtype[Any]]"
|
| 807 |
+
res_values = res_values.view(values.dtype) # type: ignore[arg-type]
|
| 808 |
+
except NotImplementedError:
|
| 809 |
+
warnings.warn(
|
| 810 |
+
"Non-vectorized DateOffset being applied to Series or DatetimeIndex.",
|
| 811 |
+
PerformanceWarning,
|
| 812 |
+
stacklevel=find_stack_level(),
|
| 813 |
+
)
|
| 814 |
+
res_values = self.astype("O") + offset
|
| 815 |
+
# TODO(GH#55564): as_unit will be unnecessary
|
| 816 |
+
result = type(self)._from_sequence(res_values).as_unit(self.unit)
|
| 817 |
+
if not len(self):
|
| 818 |
+
# GH#30336 _from_sequence won't be able to infer self.tz
|
| 819 |
+
return result.tz_localize(self.tz)
|
| 820 |
+
|
| 821 |
+
else:
|
| 822 |
+
result = type(self)._simple_new(res_values, dtype=res_values.dtype)
|
| 823 |
+
if offset.normalize:
|
| 824 |
+
result = result.normalize()
|
| 825 |
+
result._freq = None
|
| 826 |
+
|
| 827 |
+
if self.tz is not None:
|
| 828 |
+
result = result.tz_localize(self.tz)
|
| 829 |
+
|
| 830 |
+
return result
|
| 831 |
+
|
| 832 |
+
# -----------------------------------------------------------------
|
| 833 |
+
# Timezone Conversion and Localization Methods
|
| 834 |
+
|
| 835 |
+
def _local_timestamps(self) -> npt.NDArray[np.int64]:
|
| 836 |
+
"""
|
| 837 |
+
Convert to an i8 (unix-like nanosecond timestamp) representation
|
| 838 |
+
while keeping the local timezone and not using UTC.
|
| 839 |
+
This is used to calculate time-of-day information as if the timestamps
|
| 840 |
+
were timezone-naive.
|
| 841 |
+
"""
|
| 842 |
+
if self.tz is None or timezones.is_utc(self.tz):
|
| 843 |
+
# Avoid the copy that would be made in tzconversion
|
| 844 |
+
return self.asi8
|
| 845 |
+
return tz_convert_from_utc(self.asi8, self.tz, reso=self._creso)
|
| 846 |
+
|
| 847 |
+
def tz_convert(self, tz) -> Self:
|
| 848 |
+
"""
|
| 849 |
+
Convert tz-aware Datetime Array/Index from one time zone to another.
|
| 850 |
+
|
| 851 |
+
Parameters
|
| 852 |
+
----------
|
| 853 |
+
tz : str, pytz.timezone, dateutil.tz.tzfile, datetime.tzinfo or None
|
| 854 |
+
Time zone for time. Corresponding timestamps would be converted
|
| 855 |
+
to this time zone of the Datetime Array/Index. A `tz` of None will
|
| 856 |
+
convert to UTC and remove the timezone information.
|
| 857 |
+
|
| 858 |
+
Returns
|
| 859 |
+
-------
|
| 860 |
+
Array or Index
|
| 861 |
+
|
| 862 |
+
Raises
|
| 863 |
+
------
|
| 864 |
+
TypeError
|
| 865 |
+
If Datetime Array/Index is tz-naive.
|
| 866 |
+
|
| 867 |
+
See Also
|
| 868 |
+
--------
|
| 869 |
+
DatetimeIndex.tz : A timezone that has a variable offset from UTC.
|
| 870 |
+
DatetimeIndex.tz_localize : Localize tz-naive DatetimeIndex to a
|
| 871 |
+
given time zone, or remove timezone from a tz-aware DatetimeIndex.
|
| 872 |
+
|
| 873 |
+
Examples
|
| 874 |
+
--------
|
| 875 |
+
With the `tz` parameter, we can change the DatetimeIndex
|
| 876 |
+
to other time zones:
|
| 877 |
+
|
| 878 |
+
>>> dti = pd.date_range(start='2014-08-01 09:00',
|
| 879 |
+
... freq='h', periods=3, tz='Europe/Berlin')
|
| 880 |
+
|
| 881 |
+
>>> dti
|
| 882 |
+
DatetimeIndex(['2014-08-01 09:00:00+02:00',
|
| 883 |
+
'2014-08-01 10:00:00+02:00',
|
| 884 |
+
'2014-08-01 11:00:00+02:00'],
|
| 885 |
+
dtype='datetime64[ns, Europe/Berlin]', freq='h')
|
| 886 |
+
|
| 887 |
+
>>> dti.tz_convert('US/Central')
|
| 888 |
+
DatetimeIndex(['2014-08-01 02:00:00-05:00',
|
| 889 |
+
'2014-08-01 03:00:00-05:00',
|
| 890 |
+
'2014-08-01 04:00:00-05:00'],
|
| 891 |
+
dtype='datetime64[ns, US/Central]', freq='h')
|
| 892 |
+
|
| 893 |
+
With the ``tz=None``, we can remove the timezone (after converting
|
| 894 |
+
to UTC if necessary):
|
| 895 |
+
|
| 896 |
+
>>> dti = pd.date_range(start='2014-08-01 09:00', freq='h',
|
| 897 |
+
... periods=3, tz='Europe/Berlin')
|
| 898 |
+
|
| 899 |
+
>>> dti
|
| 900 |
+
DatetimeIndex(['2014-08-01 09:00:00+02:00',
|
| 901 |
+
'2014-08-01 10:00:00+02:00',
|
| 902 |
+
'2014-08-01 11:00:00+02:00'],
|
| 903 |
+
dtype='datetime64[ns, Europe/Berlin]', freq='h')
|
| 904 |
+
|
| 905 |
+
>>> dti.tz_convert(None)
|
| 906 |
+
DatetimeIndex(['2014-08-01 07:00:00',
|
| 907 |
+
'2014-08-01 08:00:00',
|
| 908 |
+
'2014-08-01 09:00:00'],
|
| 909 |
+
dtype='datetime64[ns]', freq='h')
|
| 910 |
+
"""
|
| 911 |
+
tz = timezones.maybe_get_tz(tz)
|
| 912 |
+
|
| 913 |
+
if self.tz is None:
|
| 914 |
+
# tz naive, use tz_localize
|
| 915 |
+
raise TypeError(
|
| 916 |
+
"Cannot convert tz-naive timestamps, use tz_localize to localize"
|
| 917 |
+
)
|
| 918 |
+
|
| 919 |
+
# No conversion since timestamps are all UTC to begin with
|
| 920 |
+
dtype = tz_to_dtype(tz, unit=self.unit)
|
| 921 |
+
return self._simple_new(self._ndarray, dtype=dtype, freq=self.freq)
|
| 922 |
+
|
| 923 |
+
@dtl.ravel_compat
|
| 924 |
+
def tz_localize(
|
| 925 |
+
self,
|
| 926 |
+
tz,
|
| 927 |
+
ambiguous: TimeAmbiguous = "raise",
|
| 928 |
+
nonexistent: TimeNonexistent = "raise",
|
| 929 |
+
) -> Self:
|
| 930 |
+
"""
|
| 931 |
+
Localize tz-naive Datetime Array/Index to tz-aware Datetime Array/Index.
|
| 932 |
+
|
| 933 |
+
This method takes a time zone (tz) naive Datetime Array/Index object
|
| 934 |
+
and makes this time zone aware. It does not move the time to another
|
| 935 |
+
time zone.
|
| 936 |
+
|
| 937 |
+
This method can also be used to do the inverse -- to create a time
|
| 938 |
+
zone unaware object from an aware object. To that end, pass `tz=None`.
|
| 939 |
+
|
| 940 |
+
Parameters
|
| 941 |
+
----------
|
| 942 |
+
tz : str, pytz.timezone, dateutil.tz.tzfile, datetime.tzinfo or None
|
| 943 |
+
Time zone to convert timestamps to. Passing ``None`` will
|
| 944 |
+
remove the time zone information preserving local time.
|
| 945 |
+
ambiguous : 'infer', 'NaT', bool array, default 'raise'
|
| 946 |
+
When clocks moved backward due to DST, ambiguous times may arise.
|
| 947 |
+
For example in Central European Time (UTC+01), when going from
|
| 948 |
+
03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at
|
| 949 |
+
00:30:00 UTC and at 01:30:00 UTC. In such a situation, the
|
| 950 |
+
`ambiguous` parameter dictates how ambiguous times should be
|
| 951 |
+
handled.
|
| 952 |
+
|
| 953 |
+
- 'infer' will attempt to infer fall dst-transition hours based on
|
| 954 |
+
order
|
| 955 |
+
- bool-ndarray where True signifies a DST time, False signifies a
|
| 956 |
+
non-DST time (note that this flag is only applicable for
|
| 957 |
+
ambiguous times)
|
| 958 |
+
- 'NaT' will return NaT where there are ambiguous times
|
| 959 |
+
- 'raise' will raise an AmbiguousTimeError if there are ambiguous
|
| 960 |
+
times.
|
| 961 |
+
|
| 962 |
+
nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \
|
| 963 |
+
default 'raise'
|
| 964 |
+
A nonexistent time does not exist in a particular timezone
|
| 965 |
+
where clocks moved forward due to DST.
|
| 966 |
+
|
| 967 |
+
- 'shift_forward' will shift the nonexistent time forward to the
|
| 968 |
+
closest existing time
|
| 969 |
+
- 'shift_backward' will shift the nonexistent time backward to the
|
| 970 |
+
closest existing time
|
| 971 |
+
- 'NaT' will return NaT where there are nonexistent times
|
| 972 |
+
- timedelta objects will shift nonexistent times by the timedelta
|
| 973 |
+
- 'raise' will raise an NonExistentTimeError if there are
|
| 974 |
+
nonexistent times.
|
| 975 |
+
|
| 976 |
+
Returns
|
| 977 |
+
-------
|
| 978 |
+
Same type as self
|
| 979 |
+
Array/Index converted to the specified time zone.
|
| 980 |
+
|
| 981 |
+
Raises
|
| 982 |
+
------
|
| 983 |
+
TypeError
|
| 984 |
+
If the Datetime Array/Index is tz-aware and tz is not None.
|
| 985 |
+
|
| 986 |
+
See Also
|
| 987 |
+
--------
|
| 988 |
+
DatetimeIndex.tz_convert : Convert tz-aware DatetimeIndex from
|
| 989 |
+
one time zone to another.
|
| 990 |
+
|
| 991 |
+
Examples
|
| 992 |
+
--------
|
| 993 |
+
>>> tz_naive = pd.date_range('2018-03-01 09:00', periods=3)
|
| 994 |
+
>>> tz_naive
|
| 995 |
+
DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00',
|
| 996 |
+
'2018-03-03 09:00:00'],
|
| 997 |
+
dtype='datetime64[ns]', freq='D')
|
| 998 |
+
|
| 999 |
+
Localize DatetimeIndex in US/Eastern time zone:
|
| 1000 |
+
|
| 1001 |
+
>>> tz_aware = tz_naive.tz_localize(tz='US/Eastern')
|
| 1002 |
+
>>> tz_aware
|
| 1003 |
+
DatetimeIndex(['2018-03-01 09:00:00-05:00',
|
| 1004 |
+
'2018-03-02 09:00:00-05:00',
|
| 1005 |
+
'2018-03-03 09:00:00-05:00'],
|
| 1006 |
+
dtype='datetime64[ns, US/Eastern]', freq=None)
|
| 1007 |
+
|
| 1008 |
+
With the ``tz=None``, we can remove the time zone information
|
| 1009 |
+
while keeping the local time (not converted to UTC):
|
| 1010 |
+
|
| 1011 |
+
>>> tz_aware.tz_localize(None)
|
| 1012 |
+
DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00',
|
| 1013 |
+
'2018-03-03 09:00:00'],
|
| 1014 |
+
dtype='datetime64[ns]', freq=None)
|
| 1015 |
+
|
| 1016 |
+
Be careful with DST changes. When there is sequential data, pandas can
|
| 1017 |
+
infer the DST time:
|
| 1018 |
+
|
| 1019 |
+
>>> s = pd.to_datetime(pd.Series(['2018-10-28 01:30:00',
|
| 1020 |
+
... '2018-10-28 02:00:00',
|
| 1021 |
+
... '2018-10-28 02:30:00',
|
| 1022 |
+
... '2018-10-28 02:00:00',
|
| 1023 |
+
... '2018-10-28 02:30:00',
|
| 1024 |
+
... '2018-10-28 03:00:00',
|
| 1025 |
+
... '2018-10-28 03:30:00']))
|
| 1026 |
+
>>> s.dt.tz_localize('CET', ambiguous='infer')
|
| 1027 |
+
0 2018-10-28 01:30:00+02:00
|
| 1028 |
+
1 2018-10-28 02:00:00+02:00
|
| 1029 |
+
2 2018-10-28 02:30:00+02:00
|
| 1030 |
+
3 2018-10-28 02:00:00+01:00
|
| 1031 |
+
4 2018-10-28 02:30:00+01:00
|
| 1032 |
+
5 2018-10-28 03:00:00+01:00
|
| 1033 |
+
6 2018-10-28 03:30:00+01:00
|
| 1034 |
+
dtype: datetime64[ns, CET]
|
| 1035 |
+
|
| 1036 |
+
In some cases, inferring the DST is impossible. In such cases, you can
|
| 1037 |
+
pass an ndarray to the ambiguous parameter to set the DST explicitly
|
| 1038 |
+
|
| 1039 |
+
>>> s = pd.to_datetime(pd.Series(['2018-10-28 01:20:00',
|
| 1040 |
+
... '2018-10-28 02:36:00',
|
| 1041 |
+
... '2018-10-28 03:46:00']))
|
| 1042 |
+
>>> s.dt.tz_localize('CET', ambiguous=np.array([True, True, False]))
|
| 1043 |
+
0 2018-10-28 01:20:00+02:00
|
| 1044 |
+
1 2018-10-28 02:36:00+02:00
|
| 1045 |
+
2 2018-10-28 03:46:00+01:00
|
| 1046 |
+
dtype: datetime64[ns, CET]
|
| 1047 |
+
|
| 1048 |
+
If the DST transition causes nonexistent times, you can shift these
|
| 1049 |
+
dates forward or backwards with a timedelta object or `'shift_forward'`
|
| 1050 |
+
or `'shift_backwards'`.
|
| 1051 |
+
|
| 1052 |
+
>>> s = pd.to_datetime(pd.Series(['2015-03-29 02:30:00',
|
| 1053 |
+
... '2015-03-29 03:30:00']))
|
| 1054 |
+
>>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_forward')
|
| 1055 |
+
0 2015-03-29 03:00:00+02:00
|
| 1056 |
+
1 2015-03-29 03:30:00+02:00
|
| 1057 |
+
dtype: datetime64[ns, Europe/Warsaw]
|
| 1058 |
+
|
| 1059 |
+
>>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_backward')
|
| 1060 |
+
0 2015-03-29 01:59:59.999999999+01:00
|
| 1061 |
+
1 2015-03-29 03:30:00+02:00
|
| 1062 |
+
dtype: datetime64[ns, Europe/Warsaw]
|
| 1063 |
+
|
| 1064 |
+
>>> s.dt.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta('1h'))
|
| 1065 |
+
0 2015-03-29 03:30:00+02:00
|
| 1066 |
+
1 2015-03-29 03:30:00+02:00
|
| 1067 |
+
dtype: datetime64[ns, Europe/Warsaw]
|
| 1068 |
+
"""
|
| 1069 |
+
nonexistent_options = ("raise", "NaT", "shift_forward", "shift_backward")
|
| 1070 |
+
if nonexistent not in nonexistent_options and not isinstance(
|
| 1071 |
+
nonexistent, timedelta
|
| 1072 |
+
):
|
| 1073 |
+
raise ValueError(
|
| 1074 |
+
"The nonexistent argument must be one of 'raise', "
|
| 1075 |
+
"'NaT', 'shift_forward', 'shift_backward' or "
|
| 1076 |
+
"a timedelta object"
|
| 1077 |
+
)
|
| 1078 |
+
|
| 1079 |
+
if self.tz is not None:
|
| 1080 |
+
if tz is None:
|
| 1081 |
+
new_dates = tz_convert_from_utc(self.asi8, self.tz, reso=self._creso)
|
| 1082 |
+
else:
|
| 1083 |
+
raise TypeError("Already tz-aware, use tz_convert to convert.")
|
| 1084 |
+
else:
|
| 1085 |
+
tz = timezones.maybe_get_tz(tz)
|
| 1086 |
+
# Convert to UTC
|
| 1087 |
+
|
| 1088 |
+
new_dates = tzconversion.tz_localize_to_utc(
|
| 1089 |
+
self.asi8,
|
| 1090 |
+
tz,
|
| 1091 |
+
ambiguous=ambiguous,
|
| 1092 |
+
nonexistent=nonexistent,
|
| 1093 |
+
creso=self._creso,
|
| 1094 |
+
)
|
| 1095 |
+
new_dates_dt64 = new_dates.view(f"M8[{self.unit}]")
|
| 1096 |
+
dtype = tz_to_dtype(tz, unit=self.unit)
|
| 1097 |
+
|
| 1098 |
+
freq = None
|
| 1099 |
+
if timezones.is_utc(tz) or (len(self) == 1 and not isna(new_dates_dt64[0])):
|
| 1100 |
+
# we can preserve freq
|
| 1101 |
+
# TODO: Also for fixed-offsets
|
| 1102 |
+
freq = self.freq
|
| 1103 |
+
elif tz is None and self.tz is None:
|
| 1104 |
+
# no-op
|
| 1105 |
+
freq = self.freq
|
| 1106 |
+
return self._simple_new(new_dates_dt64, dtype=dtype, freq=freq)
|
| 1107 |
+
|
| 1108 |
+
# ----------------------------------------------------------------
|
| 1109 |
+
# Conversion Methods - Vectorized analogues of Timestamp methods
|
| 1110 |
+
|
| 1111 |
+
def to_pydatetime(self) -> npt.NDArray[np.object_]:
|
| 1112 |
+
"""
|
| 1113 |
+
Return an ndarray of ``datetime.datetime`` objects.
|
| 1114 |
+
|
| 1115 |
+
Returns
|
| 1116 |
+
-------
|
| 1117 |
+
numpy.ndarray
|
| 1118 |
+
|
| 1119 |
+
Examples
|
| 1120 |
+
--------
|
| 1121 |
+
>>> idx = pd.date_range('2018-02-27', periods=3)
|
| 1122 |
+
>>> idx.to_pydatetime()
|
| 1123 |
+
array([datetime.datetime(2018, 2, 27, 0, 0),
|
| 1124 |
+
datetime.datetime(2018, 2, 28, 0, 0),
|
| 1125 |
+
datetime.datetime(2018, 3, 1, 0, 0)], dtype=object)
|
| 1126 |
+
"""
|
| 1127 |
+
return ints_to_pydatetime(self.asi8, tz=self.tz, reso=self._creso)
|
| 1128 |
+
|
| 1129 |
+
def normalize(self) -> Self:
|
| 1130 |
+
"""
|
| 1131 |
+
Convert times to midnight.
|
| 1132 |
+
|
| 1133 |
+
The time component of the date-time is converted to midnight i.e.
|
| 1134 |
+
00:00:00. This is useful in cases, when the time does not matter.
|
| 1135 |
+
Length is unaltered. The timezones are unaffected.
|
| 1136 |
+
|
| 1137 |
+
This method is available on Series with datetime values under
|
| 1138 |
+
the ``.dt`` accessor, and directly on Datetime Array/Index.
|
| 1139 |
+
|
| 1140 |
+
Returns
|
| 1141 |
+
-------
|
| 1142 |
+
DatetimeArray, DatetimeIndex or Series
|
| 1143 |
+
The same type as the original data. Series will have the same
|
| 1144 |
+
name and index. DatetimeIndex will have the same name.
|
| 1145 |
+
|
| 1146 |
+
See Also
|
| 1147 |
+
--------
|
| 1148 |
+
floor : Floor the datetimes to the specified freq.
|
| 1149 |
+
ceil : Ceil the datetimes to the specified freq.
|
| 1150 |
+
round : Round the datetimes to the specified freq.
|
| 1151 |
+
|
| 1152 |
+
Examples
|
| 1153 |
+
--------
|
| 1154 |
+
>>> idx = pd.date_range(start='2014-08-01 10:00', freq='h',
|
| 1155 |
+
... periods=3, tz='Asia/Calcutta')
|
| 1156 |
+
>>> idx
|
| 1157 |
+
DatetimeIndex(['2014-08-01 10:00:00+05:30',
|
| 1158 |
+
'2014-08-01 11:00:00+05:30',
|
| 1159 |
+
'2014-08-01 12:00:00+05:30'],
|
| 1160 |
+
dtype='datetime64[ns, Asia/Calcutta]', freq='h')
|
| 1161 |
+
>>> idx.normalize()
|
| 1162 |
+
DatetimeIndex(['2014-08-01 00:00:00+05:30',
|
| 1163 |
+
'2014-08-01 00:00:00+05:30',
|
| 1164 |
+
'2014-08-01 00:00:00+05:30'],
|
| 1165 |
+
dtype='datetime64[ns, Asia/Calcutta]', freq=None)
|
| 1166 |
+
"""
|
| 1167 |
+
new_values = normalize_i8_timestamps(self.asi8, self.tz, reso=self._creso)
|
| 1168 |
+
dt64_values = new_values.view(self._ndarray.dtype)
|
| 1169 |
+
|
| 1170 |
+
dta = type(self)._simple_new(dt64_values, dtype=dt64_values.dtype)
|
| 1171 |
+
dta = dta._with_freq("infer")
|
| 1172 |
+
if self.tz is not None:
|
| 1173 |
+
dta = dta.tz_localize(self.tz)
|
| 1174 |
+
return dta
|
| 1175 |
+
|
| 1176 |
+
def to_period(self, freq=None) -> PeriodArray:
|
| 1177 |
+
"""
|
| 1178 |
+
Cast to PeriodArray/PeriodIndex at a particular frequency.
|
| 1179 |
+
|
| 1180 |
+
Converts DatetimeArray/Index to PeriodArray/PeriodIndex.
|
| 1181 |
+
|
| 1182 |
+
Parameters
|
| 1183 |
+
----------
|
| 1184 |
+
freq : str or Period, optional
|
| 1185 |
+
One of pandas' :ref:`period aliases <timeseries.period_aliases>`
|
| 1186 |
+
or an Period object. Will be inferred by default.
|
| 1187 |
+
|
| 1188 |
+
Returns
|
| 1189 |
+
-------
|
| 1190 |
+
PeriodArray/PeriodIndex
|
| 1191 |
+
|
| 1192 |
+
Raises
|
| 1193 |
+
------
|
| 1194 |
+
ValueError
|
| 1195 |
+
When converting a DatetimeArray/Index with non-regular values,
|
| 1196 |
+
so that a frequency cannot be inferred.
|
| 1197 |
+
|
| 1198 |
+
See Also
|
| 1199 |
+
--------
|
| 1200 |
+
PeriodIndex: Immutable ndarray holding ordinal values.
|
| 1201 |
+
DatetimeIndex.to_pydatetime: Return DatetimeIndex as object.
|
| 1202 |
+
|
| 1203 |
+
Examples
|
| 1204 |
+
--------
|
| 1205 |
+
>>> df = pd.DataFrame({"y": [1, 2, 3]},
|
| 1206 |
+
... index=pd.to_datetime(["2000-03-31 00:00:00",
|
| 1207 |
+
... "2000-05-31 00:00:00",
|
| 1208 |
+
... "2000-08-31 00:00:00"]))
|
| 1209 |
+
>>> df.index.to_period("M")
|
| 1210 |
+
PeriodIndex(['2000-03', '2000-05', '2000-08'],
|
| 1211 |
+
dtype='period[M]')
|
| 1212 |
+
|
| 1213 |
+
Infer the daily frequency
|
| 1214 |
+
|
| 1215 |
+
>>> idx = pd.date_range("2017-01-01", periods=2)
|
| 1216 |
+
>>> idx.to_period()
|
| 1217 |
+
PeriodIndex(['2017-01-01', '2017-01-02'],
|
| 1218 |
+
dtype='period[D]')
|
| 1219 |
+
"""
|
| 1220 |
+
from pandas.core.arrays import PeriodArray
|
| 1221 |
+
|
| 1222 |
+
if self.tz is not None:
|
| 1223 |
+
warnings.warn(
|
| 1224 |
+
"Converting to PeriodArray/Index representation "
|
| 1225 |
+
"will drop timezone information.",
|
| 1226 |
+
UserWarning,
|
| 1227 |
+
stacklevel=find_stack_level(),
|
| 1228 |
+
)
|
| 1229 |
+
|
| 1230 |
+
if freq is None:
|
| 1231 |
+
freq = self.freqstr or self.inferred_freq
|
| 1232 |
+
if isinstance(self.freq, BaseOffset) and hasattr(
|
| 1233 |
+
self.freq, "_period_dtype_code"
|
| 1234 |
+
):
|
| 1235 |
+
freq = PeriodDtype(self.freq)._freqstr
|
| 1236 |
+
|
| 1237 |
+
if freq is None:
|
| 1238 |
+
raise ValueError(
|
| 1239 |
+
"You must pass a freq argument as current index has none."
|
| 1240 |
+
)
|
| 1241 |
+
|
| 1242 |
+
res = get_period_alias(freq)
|
| 1243 |
+
|
| 1244 |
+
# https://github.com/pandas-dev/pandas/issues/33358
|
| 1245 |
+
if res is None:
|
| 1246 |
+
res = freq
|
| 1247 |
+
|
| 1248 |
+
freq = res
|
| 1249 |
+
return PeriodArray._from_datetime64(self._ndarray, freq, tz=self.tz)
|
| 1250 |
+
|
| 1251 |
+
# -----------------------------------------------------------------
|
| 1252 |
+
# Properties - Vectorized Timestamp Properties/Methods
|
| 1253 |
+
|
| 1254 |
+
def month_name(self, locale=None) -> npt.NDArray[np.object_]:
|
| 1255 |
+
"""
|
| 1256 |
+
Return the month names with specified locale.
|
| 1257 |
+
|
| 1258 |
+
Parameters
|
| 1259 |
+
----------
|
| 1260 |
+
locale : str, optional
|
| 1261 |
+
Locale determining the language in which to return the month name.
|
| 1262 |
+
Default is English locale (``'en_US.utf8'``). Use the command
|
| 1263 |
+
``locale -a`` on your terminal on Unix systems to find your locale
|
| 1264 |
+
language code.
|
| 1265 |
+
|
| 1266 |
+
Returns
|
| 1267 |
+
-------
|
| 1268 |
+
Series or Index
|
| 1269 |
+
Series or Index of month names.
|
| 1270 |
+
|
| 1271 |
+
Examples
|
| 1272 |
+
--------
|
| 1273 |
+
>>> s = pd.Series(pd.date_range(start='2018-01', freq='ME', periods=3))
|
| 1274 |
+
>>> s
|
| 1275 |
+
0 2018-01-31
|
| 1276 |
+
1 2018-02-28
|
| 1277 |
+
2 2018-03-31
|
| 1278 |
+
dtype: datetime64[ns]
|
| 1279 |
+
>>> s.dt.month_name()
|
| 1280 |
+
0 January
|
| 1281 |
+
1 February
|
| 1282 |
+
2 March
|
| 1283 |
+
dtype: object
|
| 1284 |
+
|
| 1285 |
+
>>> idx = pd.date_range(start='2018-01', freq='ME', periods=3)
|
| 1286 |
+
>>> idx
|
| 1287 |
+
DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31'],
|
| 1288 |
+
dtype='datetime64[ns]', freq='ME')
|
| 1289 |
+
>>> idx.month_name()
|
| 1290 |
+
Index(['January', 'February', 'March'], dtype='object')
|
| 1291 |
+
|
| 1292 |
+
Using the ``locale`` parameter you can set a different locale language,
|
| 1293 |
+
for example: ``idx.month_name(locale='pt_BR.utf8')`` will return month
|
| 1294 |
+
names in Brazilian Portuguese language.
|
| 1295 |
+
|
| 1296 |
+
>>> idx = pd.date_range(start='2018-01', freq='ME', periods=3)
|
| 1297 |
+
>>> idx
|
| 1298 |
+
DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31'],
|
| 1299 |
+
dtype='datetime64[ns]', freq='ME')
|
| 1300 |
+
>>> idx.month_name(locale='pt_BR.utf8') # doctest: +SKIP
|
| 1301 |
+
Index(['Janeiro', 'Fevereiro', 'Março'], dtype='object')
|
| 1302 |
+
"""
|
| 1303 |
+
values = self._local_timestamps()
|
| 1304 |
+
|
| 1305 |
+
result = fields.get_date_name_field(
|
| 1306 |
+
values, "month_name", locale=locale, reso=self._creso
|
| 1307 |
+
)
|
| 1308 |
+
result = self._maybe_mask_results(result, fill_value=None)
|
| 1309 |
+
return result
|
| 1310 |
+
|
| 1311 |
+
def day_name(self, locale=None) -> npt.NDArray[np.object_]:
|
| 1312 |
+
"""
|
| 1313 |
+
Return the day names with specified locale.
|
| 1314 |
+
|
| 1315 |
+
Parameters
|
| 1316 |
+
----------
|
| 1317 |
+
locale : str, optional
|
| 1318 |
+
Locale determining the language in which to return the day name.
|
| 1319 |
+
Default is English locale (``'en_US.utf8'``). Use the command
|
| 1320 |
+
``locale -a`` on your terminal on Unix systems to find your locale
|
| 1321 |
+
language code.
|
| 1322 |
+
|
| 1323 |
+
Returns
|
| 1324 |
+
-------
|
| 1325 |
+
Series or Index
|
| 1326 |
+
Series or Index of day names.
|
| 1327 |
+
|
| 1328 |
+
Examples
|
| 1329 |
+
--------
|
| 1330 |
+
>>> s = pd.Series(pd.date_range(start='2018-01-01', freq='D', periods=3))
|
| 1331 |
+
>>> s
|
| 1332 |
+
0 2018-01-01
|
| 1333 |
+
1 2018-01-02
|
| 1334 |
+
2 2018-01-03
|
| 1335 |
+
dtype: datetime64[ns]
|
| 1336 |
+
>>> s.dt.day_name()
|
| 1337 |
+
0 Monday
|
| 1338 |
+
1 Tuesday
|
| 1339 |
+
2 Wednesday
|
| 1340 |
+
dtype: object
|
| 1341 |
+
|
| 1342 |
+
>>> idx = pd.date_range(start='2018-01-01', freq='D', periods=3)
|
| 1343 |
+
>>> idx
|
| 1344 |
+
DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'],
|
| 1345 |
+
dtype='datetime64[ns]', freq='D')
|
| 1346 |
+
>>> idx.day_name()
|
| 1347 |
+
Index(['Monday', 'Tuesday', 'Wednesday'], dtype='object')
|
| 1348 |
+
|
| 1349 |
+
Using the ``locale`` parameter you can set a different locale language,
|
| 1350 |
+
for example: ``idx.day_name(locale='pt_BR.utf8')`` will return day
|
| 1351 |
+
names in Brazilian Portuguese language.
|
| 1352 |
+
|
| 1353 |
+
>>> idx = pd.date_range(start='2018-01-01', freq='D', periods=3)
|
| 1354 |
+
>>> idx
|
| 1355 |
+
DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'],
|
| 1356 |
+
dtype='datetime64[ns]', freq='D')
|
| 1357 |
+
>>> idx.day_name(locale='pt_BR.utf8') # doctest: +SKIP
|
| 1358 |
+
Index(['Segunda', 'Terça', 'Quarta'], dtype='object')
|
| 1359 |
+
"""
|
| 1360 |
+
values = self._local_timestamps()
|
| 1361 |
+
|
| 1362 |
+
result = fields.get_date_name_field(
|
| 1363 |
+
values, "day_name", locale=locale, reso=self._creso
|
| 1364 |
+
)
|
| 1365 |
+
result = self._maybe_mask_results(result, fill_value=None)
|
| 1366 |
+
return result
|
| 1367 |
+
|
| 1368 |
+
@property
|
| 1369 |
+
def time(self) -> npt.NDArray[np.object_]:
|
| 1370 |
+
"""
|
| 1371 |
+
Returns numpy array of :class:`datetime.time` objects.
|
| 1372 |
+
|
| 1373 |
+
The time part of the Timestamps.
|
| 1374 |
+
|
| 1375 |
+
Examples
|
| 1376 |
+
--------
|
| 1377 |
+
For Series:
|
| 1378 |
+
|
| 1379 |
+
>>> s = pd.Series(["1/1/2020 10:00:00+00:00", "2/1/2020 11:00:00+00:00"])
|
| 1380 |
+
>>> s = pd.to_datetime(s)
|
| 1381 |
+
>>> s
|
| 1382 |
+
0 2020-01-01 10:00:00+00:00
|
| 1383 |
+
1 2020-02-01 11:00:00+00:00
|
| 1384 |
+
dtype: datetime64[ns, UTC]
|
| 1385 |
+
>>> s.dt.time
|
| 1386 |
+
0 10:00:00
|
| 1387 |
+
1 11:00:00
|
| 1388 |
+
dtype: object
|
| 1389 |
+
|
| 1390 |
+
For DatetimeIndex:
|
| 1391 |
+
|
| 1392 |
+
>>> idx = pd.DatetimeIndex(["1/1/2020 10:00:00+00:00",
|
| 1393 |
+
... "2/1/2020 11:00:00+00:00"])
|
| 1394 |
+
>>> idx.time
|
| 1395 |
+
array([datetime.time(10, 0), datetime.time(11, 0)], dtype=object)
|
| 1396 |
+
"""
|
| 1397 |
+
# If the Timestamps have a timezone that is not UTC,
|
| 1398 |
+
# convert them into their i8 representation while
|
| 1399 |
+
# keeping their timezone and not using UTC
|
| 1400 |
+
timestamps = self._local_timestamps()
|
| 1401 |
+
|
| 1402 |
+
return ints_to_pydatetime(timestamps, box="time", reso=self._creso)
|
| 1403 |
+
|
| 1404 |
+
@property
|
| 1405 |
+
def timetz(self) -> npt.NDArray[np.object_]:
|
| 1406 |
+
"""
|
| 1407 |
+
Returns numpy array of :class:`datetime.time` objects with timezones.
|
| 1408 |
+
|
| 1409 |
+
The time part of the Timestamps.
|
| 1410 |
+
|
| 1411 |
+
Examples
|
| 1412 |
+
--------
|
| 1413 |
+
For Series:
|
| 1414 |
+
|
| 1415 |
+
>>> s = pd.Series(["1/1/2020 10:00:00+00:00", "2/1/2020 11:00:00+00:00"])
|
| 1416 |
+
>>> s = pd.to_datetime(s)
|
| 1417 |
+
>>> s
|
| 1418 |
+
0 2020-01-01 10:00:00+00:00
|
| 1419 |
+
1 2020-02-01 11:00:00+00:00
|
| 1420 |
+
dtype: datetime64[ns, UTC]
|
| 1421 |
+
>>> s.dt.timetz
|
| 1422 |
+
0 10:00:00+00:00
|
| 1423 |
+
1 11:00:00+00:00
|
| 1424 |
+
dtype: object
|
| 1425 |
+
|
| 1426 |
+
For DatetimeIndex:
|
| 1427 |
+
|
| 1428 |
+
>>> idx = pd.DatetimeIndex(["1/1/2020 10:00:00+00:00",
|
| 1429 |
+
... "2/1/2020 11:00:00+00:00"])
|
| 1430 |
+
>>> idx.timetz
|
| 1431 |
+
array([datetime.time(10, 0, tzinfo=datetime.timezone.utc),
|
| 1432 |
+
datetime.time(11, 0, tzinfo=datetime.timezone.utc)], dtype=object)
|
| 1433 |
+
"""
|
| 1434 |
+
return ints_to_pydatetime(self.asi8, self.tz, box="time", reso=self._creso)
|
| 1435 |
+
|
| 1436 |
+
@property
|
| 1437 |
+
def date(self) -> npt.NDArray[np.object_]:
|
| 1438 |
+
"""
|
| 1439 |
+
Returns numpy array of python :class:`datetime.date` objects.
|
| 1440 |
+
|
| 1441 |
+
Namely, the date part of Timestamps without time and
|
| 1442 |
+
timezone information.
|
| 1443 |
+
|
| 1444 |
+
Examples
|
| 1445 |
+
--------
|
| 1446 |
+
For Series:
|
| 1447 |
+
|
| 1448 |
+
>>> s = pd.Series(["1/1/2020 10:00:00+00:00", "2/1/2020 11:00:00+00:00"])
|
| 1449 |
+
>>> s = pd.to_datetime(s)
|
| 1450 |
+
>>> s
|
| 1451 |
+
0 2020-01-01 10:00:00+00:00
|
| 1452 |
+
1 2020-02-01 11:00:00+00:00
|
| 1453 |
+
dtype: datetime64[ns, UTC]
|
| 1454 |
+
>>> s.dt.date
|
| 1455 |
+
0 2020-01-01
|
| 1456 |
+
1 2020-02-01
|
| 1457 |
+
dtype: object
|
| 1458 |
+
|
| 1459 |
+
For DatetimeIndex:
|
| 1460 |
+
|
| 1461 |
+
>>> idx = pd.DatetimeIndex(["1/1/2020 10:00:00+00:00",
|
| 1462 |
+
... "2/1/2020 11:00:00+00:00"])
|
| 1463 |
+
>>> idx.date
|
| 1464 |
+
array([datetime.date(2020, 1, 1), datetime.date(2020, 2, 1)], dtype=object)
|
| 1465 |
+
"""
|
| 1466 |
+
# If the Timestamps have a timezone that is not UTC,
|
| 1467 |
+
# convert them into their i8 representation while
|
| 1468 |
+
# keeping their timezone and not using UTC
|
| 1469 |
+
timestamps = self._local_timestamps()
|
| 1470 |
+
|
| 1471 |
+
return ints_to_pydatetime(timestamps, box="date", reso=self._creso)
|
| 1472 |
+
|
| 1473 |
+
def isocalendar(self) -> DataFrame:
|
| 1474 |
+
"""
|
| 1475 |
+
Calculate year, week, and day according to the ISO 8601 standard.
|
| 1476 |
+
|
| 1477 |
+
Returns
|
| 1478 |
+
-------
|
| 1479 |
+
DataFrame
|
| 1480 |
+
With columns year, week and day.
|
| 1481 |
+
|
| 1482 |
+
See Also
|
| 1483 |
+
--------
|
| 1484 |
+
Timestamp.isocalendar : Function return a 3-tuple containing ISO year,
|
| 1485 |
+
week number, and weekday for the given Timestamp object.
|
| 1486 |
+
datetime.date.isocalendar : Return a named tuple object with
|
| 1487 |
+
three components: year, week and weekday.
|
| 1488 |
+
|
| 1489 |
+
Examples
|
| 1490 |
+
--------
|
| 1491 |
+
>>> idx = pd.date_range(start='2019-12-29', freq='D', periods=4)
|
| 1492 |
+
>>> idx.isocalendar()
|
| 1493 |
+
year week day
|
| 1494 |
+
2019-12-29 2019 52 7
|
| 1495 |
+
2019-12-30 2020 1 1
|
| 1496 |
+
2019-12-31 2020 1 2
|
| 1497 |
+
2020-01-01 2020 1 3
|
| 1498 |
+
>>> idx.isocalendar().week
|
| 1499 |
+
2019-12-29 52
|
| 1500 |
+
2019-12-30 1
|
| 1501 |
+
2019-12-31 1
|
| 1502 |
+
2020-01-01 1
|
| 1503 |
+
Freq: D, Name: week, dtype: UInt32
|
| 1504 |
+
"""
|
| 1505 |
+
from pandas import DataFrame
|
| 1506 |
+
|
| 1507 |
+
values = self._local_timestamps()
|
| 1508 |
+
sarray = fields.build_isocalendar_sarray(values, reso=self._creso)
|
| 1509 |
+
iso_calendar_df = DataFrame(
|
| 1510 |
+
sarray, columns=["year", "week", "day"], dtype="UInt32"
|
| 1511 |
+
)
|
| 1512 |
+
if self._hasna:
|
| 1513 |
+
iso_calendar_df.iloc[self._isnan] = None
|
| 1514 |
+
return iso_calendar_df
|
| 1515 |
+
|
| 1516 |
+
year = _field_accessor(
|
| 1517 |
+
"year",
|
| 1518 |
+
"Y",
|
| 1519 |
+
"""
|
| 1520 |
+
The year of the datetime.
|
| 1521 |
+
|
| 1522 |
+
Examples
|
| 1523 |
+
--------
|
| 1524 |
+
>>> datetime_series = pd.Series(
|
| 1525 |
+
... pd.date_range("2000-01-01", periods=3, freq="YE")
|
| 1526 |
+
... )
|
| 1527 |
+
>>> datetime_series
|
| 1528 |
+
0 2000-12-31
|
| 1529 |
+
1 2001-12-31
|
| 1530 |
+
2 2002-12-31
|
| 1531 |
+
dtype: datetime64[ns]
|
| 1532 |
+
>>> datetime_series.dt.year
|
| 1533 |
+
0 2000
|
| 1534 |
+
1 2001
|
| 1535 |
+
2 2002
|
| 1536 |
+
dtype: int32
|
| 1537 |
+
""",
|
| 1538 |
+
)
|
| 1539 |
+
month = _field_accessor(
|
| 1540 |
+
"month",
|
| 1541 |
+
"M",
|
| 1542 |
+
"""
|
| 1543 |
+
The month as January=1, December=12.
|
| 1544 |
+
|
| 1545 |
+
Examples
|
| 1546 |
+
--------
|
| 1547 |
+
>>> datetime_series = pd.Series(
|
| 1548 |
+
... pd.date_range("2000-01-01", periods=3, freq="ME")
|
| 1549 |
+
... )
|
| 1550 |
+
>>> datetime_series
|
| 1551 |
+
0 2000-01-31
|
| 1552 |
+
1 2000-02-29
|
| 1553 |
+
2 2000-03-31
|
| 1554 |
+
dtype: datetime64[ns]
|
| 1555 |
+
>>> datetime_series.dt.month
|
| 1556 |
+
0 1
|
| 1557 |
+
1 2
|
| 1558 |
+
2 3
|
| 1559 |
+
dtype: int32
|
| 1560 |
+
""",
|
| 1561 |
+
)
|
| 1562 |
+
day = _field_accessor(
|
| 1563 |
+
"day",
|
| 1564 |
+
"D",
|
| 1565 |
+
"""
|
| 1566 |
+
The day of the datetime.
|
| 1567 |
+
|
| 1568 |
+
Examples
|
| 1569 |
+
--------
|
| 1570 |
+
>>> datetime_series = pd.Series(
|
| 1571 |
+
... pd.date_range("2000-01-01", periods=3, freq="D")
|
| 1572 |
+
... )
|
| 1573 |
+
>>> datetime_series
|
| 1574 |
+
0 2000-01-01
|
| 1575 |
+
1 2000-01-02
|
| 1576 |
+
2 2000-01-03
|
| 1577 |
+
dtype: datetime64[ns]
|
| 1578 |
+
>>> datetime_series.dt.day
|
| 1579 |
+
0 1
|
| 1580 |
+
1 2
|
| 1581 |
+
2 3
|
| 1582 |
+
dtype: int32
|
| 1583 |
+
""",
|
| 1584 |
+
)
|
| 1585 |
+
hour = _field_accessor(
|
| 1586 |
+
"hour",
|
| 1587 |
+
"h",
|
| 1588 |
+
"""
|
| 1589 |
+
The hours of the datetime.
|
| 1590 |
+
|
| 1591 |
+
Examples
|
| 1592 |
+
--------
|
| 1593 |
+
>>> datetime_series = pd.Series(
|
| 1594 |
+
... pd.date_range("2000-01-01", periods=3, freq="h")
|
| 1595 |
+
... )
|
| 1596 |
+
>>> datetime_series
|
| 1597 |
+
0 2000-01-01 00:00:00
|
| 1598 |
+
1 2000-01-01 01:00:00
|
| 1599 |
+
2 2000-01-01 02:00:00
|
| 1600 |
+
dtype: datetime64[ns]
|
| 1601 |
+
>>> datetime_series.dt.hour
|
| 1602 |
+
0 0
|
| 1603 |
+
1 1
|
| 1604 |
+
2 2
|
| 1605 |
+
dtype: int32
|
| 1606 |
+
""",
|
| 1607 |
+
)
|
| 1608 |
+
minute = _field_accessor(
|
| 1609 |
+
"minute",
|
| 1610 |
+
"m",
|
| 1611 |
+
"""
|
| 1612 |
+
The minutes of the datetime.
|
| 1613 |
+
|
| 1614 |
+
Examples
|
| 1615 |
+
--------
|
| 1616 |
+
>>> datetime_series = pd.Series(
|
| 1617 |
+
... pd.date_range("2000-01-01", periods=3, freq="min")
|
| 1618 |
+
... )
|
| 1619 |
+
>>> datetime_series
|
| 1620 |
+
0 2000-01-01 00:00:00
|
| 1621 |
+
1 2000-01-01 00:01:00
|
| 1622 |
+
2 2000-01-01 00:02:00
|
| 1623 |
+
dtype: datetime64[ns]
|
| 1624 |
+
>>> datetime_series.dt.minute
|
| 1625 |
+
0 0
|
| 1626 |
+
1 1
|
| 1627 |
+
2 2
|
| 1628 |
+
dtype: int32
|
| 1629 |
+
""",
|
| 1630 |
+
)
|
| 1631 |
+
second = _field_accessor(
|
| 1632 |
+
"second",
|
| 1633 |
+
"s",
|
| 1634 |
+
"""
|
| 1635 |
+
The seconds of the datetime.
|
| 1636 |
+
|
| 1637 |
+
Examples
|
| 1638 |
+
--------
|
| 1639 |
+
>>> datetime_series = pd.Series(
|
| 1640 |
+
... pd.date_range("2000-01-01", periods=3, freq="s")
|
| 1641 |
+
... )
|
| 1642 |
+
>>> datetime_series
|
| 1643 |
+
0 2000-01-01 00:00:00
|
| 1644 |
+
1 2000-01-01 00:00:01
|
| 1645 |
+
2 2000-01-01 00:00:02
|
| 1646 |
+
dtype: datetime64[ns]
|
| 1647 |
+
>>> datetime_series.dt.second
|
| 1648 |
+
0 0
|
| 1649 |
+
1 1
|
| 1650 |
+
2 2
|
| 1651 |
+
dtype: int32
|
| 1652 |
+
""",
|
| 1653 |
+
)
|
| 1654 |
+
microsecond = _field_accessor(
|
| 1655 |
+
"microsecond",
|
| 1656 |
+
"us",
|
| 1657 |
+
"""
|
| 1658 |
+
The microseconds of the datetime.
|
| 1659 |
+
|
| 1660 |
+
Examples
|
| 1661 |
+
--------
|
| 1662 |
+
>>> datetime_series = pd.Series(
|
| 1663 |
+
... pd.date_range("2000-01-01", periods=3, freq="us")
|
| 1664 |
+
... )
|
| 1665 |
+
>>> datetime_series
|
| 1666 |
+
0 2000-01-01 00:00:00.000000
|
| 1667 |
+
1 2000-01-01 00:00:00.000001
|
| 1668 |
+
2 2000-01-01 00:00:00.000002
|
| 1669 |
+
dtype: datetime64[ns]
|
| 1670 |
+
>>> datetime_series.dt.microsecond
|
| 1671 |
+
0 0
|
| 1672 |
+
1 1
|
| 1673 |
+
2 2
|
| 1674 |
+
dtype: int32
|
| 1675 |
+
""",
|
| 1676 |
+
)
|
| 1677 |
+
nanosecond = _field_accessor(
|
| 1678 |
+
"nanosecond",
|
| 1679 |
+
"ns",
|
| 1680 |
+
"""
|
| 1681 |
+
The nanoseconds of the datetime.
|
| 1682 |
+
|
| 1683 |
+
Examples
|
| 1684 |
+
--------
|
| 1685 |
+
>>> datetime_series = pd.Series(
|
| 1686 |
+
... pd.date_range("2000-01-01", periods=3, freq="ns")
|
| 1687 |
+
... )
|
| 1688 |
+
>>> datetime_series
|
| 1689 |
+
0 2000-01-01 00:00:00.000000000
|
| 1690 |
+
1 2000-01-01 00:00:00.000000001
|
| 1691 |
+
2 2000-01-01 00:00:00.000000002
|
| 1692 |
+
dtype: datetime64[ns]
|
| 1693 |
+
>>> datetime_series.dt.nanosecond
|
| 1694 |
+
0 0
|
| 1695 |
+
1 1
|
| 1696 |
+
2 2
|
| 1697 |
+
dtype: int32
|
| 1698 |
+
""",
|
| 1699 |
+
)
|
| 1700 |
+
_dayofweek_doc = """
|
| 1701 |
+
The day of the week with Monday=0, Sunday=6.
|
| 1702 |
+
|
| 1703 |
+
Return the day of the week. It is assumed the week starts on
|
| 1704 |
+
Monday, which is denoted by 0 and ends on Sunday which is denoted
|
| 1705 |
+
by 6. This method is available on both Series with datetime
|
| 1706 |
+
values (using the `dt` accessor) or DatetimeIndex.
|
| 1707 |
+
|
| 1708 |
+
Returns
|
| 1709 |
+
-------
|
| 1710 |
+
Series or Index
|
| 1711 |
+
Containing integers indicating the day number.
|
| 1712 |
+
|
| 1713 |
+
See Also
|
| 1714 |
+
--------
|
| 1715 |
+
Series.dt.dayofweek : Alias.
|
| 1716 |
+
Series.dt.weekday : Alias.
|
| 1717 |
+
Series.dt.day_name : Returns the name of the day of the week.
|
| 1718 |
+
|
| 1719 |
+
Examples
|
| 1720 |
+
--------
|
| 1721 |
+
>>> s = pd.date_range('2016-12-31', '2017-01-08', freq='D').to_series()
|
| 1722 |
+
>>> s.dt.dayofweek
|
| 1723 |
+
2016-12-31 5
|
| 1724 |
+
2017-01-01 6
|
| 1725 |
+
2017-01-02 0
|
| 1726 |
+
2017-01-03 1
|
| 1727 |
+
2017-01-04 2
|
| 1728 |
+
2017-01-05 3
|
| 1729 |
+
2017-01-06 4
|
| 1730 |
+
2017-01-07 5
|
| 1731 |
+
2017-01-08 6
|
| 1732 |
+
Freq: D, dtype: int32
|
| 1733 |
+
"""
|
| 1734 |
+
day_of_week = _field_accessor("day_of_week", "dow", _dayofweek_doc)
|
| 1735 |
+
dayofweek = day_of_week
|
| 1736 |
+
weekday = day_of_week
|
| 1737 |
+
|
| 1738 |
+
day_of_year = _field_accessor(
|
| 1739 |
+
"dayofyear",
|
| 1740 |
+
"doy",
|
| 1741 |
+
"""
|
| 1742 |
+
The ordinal day of the year.
|
| 1743 |
+
|
| 1744 |
+
Examples
|
| 1745 |
+
--------
|
| 1746 |
+
For Series:
|
| 1747 |
+
|
| 1748 |
+
>>> s = pd.Series(["1/1/2020 10:00:00+00:00", "2/1/2020 11:00:00+00:00"])
|
| 1749 |
+
>>> s = pd.to_datetime(s)
|
| 1750 |
+
>>> s
|
| 1751 |
+
0 2020-01-01 10:00:00+00:00
|
| 1752 |
+
1 2020-02-01 11:00:00+00:00
|
| 1753 |
+
dtype: datetime64[ns, UTC]
|
| 1754 |
+
>>> s.dt.dayofyear
|
| 1755 |
+
0 1
|
| 1756 |
+
1 32
|
| 1757 |
+
dtype: int32
|
| 1758 |
+
|
| 1759 |
+
For DatetimeIndex:
|
| 1760 |
+
|
| 1761 |
+
>>> idx = pd.DatetimeIndex(["1/1/2020 10:00:00+00:00",
|
| 1762 |
+
... "2/1/2020 11:00:00+00:00"])
|
| 1763 |
+
>>> idx.dayofyear
|
| 1764 |
+
Index([1, 32], dtype='int32')
|
| 1765 |
+
""",
|
| 1766 |
+
)
|
| 1767 |
+
dayofyear = day_of_year
|
| 1768 |
+
quarter = _field_accessor(
|
| 1769 |
+
"quarter",
|
| 1770 |
+
"q",
|
| 1771 |
+
"""
|
| 1772 |
+
The quarter of the date.
|
| 1773 |
+
|
| 1774 |
+
Examples
|
| 1775 |
+
--------
|
| 1776 |
+
For Series:
|
| 1777 |
+
|
| 1778 |
+
>>> s = pd.Series(["1/1/2020 10:00:00+00:00", "4/1/2020 11:00:00+00:00"])
|
| 1779 |
+
>>> s = pd.to_datetime(s)
|
| 1780 |
+
>>> s
|
| 1781 |
+
0 2020-01-01 10:00:00+00:00
|
| 1782 |
+
1 2020-04-01 11:00:00+00:00
|
| 1783 |
+
dtype: datetime64[ns, UTC]
|
| 1784 |
+
>>> s.dt.quarter
|
| 1785 |
+
0 1
|
| 1786 |
+
1 2
|
| 1787 |
+
dtype: int32
|
| 1788 |
+
|
| 1789 |
+
For DatetimeIndex:
|
| 1790 |
+
|
| 1791 |
+
>>> idx = pd.DatetimeIndex(["1/1/2020 10:00:00+00:00",
|
| 1792 |
+
... "2/1/2020 11:00:00+00:00"])
|
| 1793 |
+
>>> idx.quarter
|
| 1794 |
+
Index([1, 1], dtype='int32')
|
| 1795 |
+
""",
|
| 1796 |
+
)
|
| 1797 |
+
days_in_month = _field_accessor(
|
| 1798 |
+
"days_in_month",
|
| 1799 |
+
"dim",
|
| 1800 |
+
"""
|
| 1801 |
+
The number of days in the month.
|
| 1802 |
+
|
| 1803 |
+
Examples
|
| 1804 |
+
--------
|
| 1805 |
+
>>> s = pd.Series(["1/1/2020 10:00:00+00:00", "2/1/2020 11:00:00+00:00"])
|
| 1806 |
+
>>> s = pd.to_datetime(s)
|
| 1807 |
+
>>> s
|
| 1808 |
+
0 2020-01-01 10:00:00+00:00
|
| 1809 |
+
1 2020-02-01 11:00:00+00:00
|
| 1810 |
+
dtype: datetime64[ns, UTC]
|
| 1811 |
+
>>> s.dt.daysinmonth
|
| 1812 |
+
0 31
|
| 1813 |
+
1 29
|
| 1814 |
+
dtype: int32
|
| 1815 |
+
""",
|
| 1816 |
+
)
|
| 1817 |
+
daysinmonth = days_in_month
|
| 1818 |
+
_is_month_doc = """
|
| 1819 |
+
Indicates whether the date is the {first_or_last} day of the month.
|
| 1820 |
+
|
| 1821 |
+
Returns
|
| 1822 |
+
-------
|
| 1823 |
+
Series or array
|
| 1824 |
+
For Series, returns a Series with boolean values.
|
| 1825 |
+
For DatetimeIndex, returns a boolean array.
|
| 1826 |
+
|
| 1827 |
+
See Also
|
| 1828 |
+
--------
|
| 1829 |
+
is_month_start : Return a boolean indicating whether the date
|
| 1830 |
+
is the first day of the month.
|
| 1831 |
+
is_month_end : Return a boolean indicating whether the date
|
| 1832 |
+
is the last day of the month.
|
| 1833 |
+
|
| 1834 |
+
Examples
|
| 1835 |
+
--------
|
| 1836 |
+
This method is available on Series with datetime values under
|
| 1837 |
+
the ``.dt`` accessor, and directly on DatetimeIndex.
|
| 1838 |
+
|
| 1839 |
+
>>> s = pd.Series(pd.date_range("2018-02-27", periods=3))
|
| 1840 |
+
>>> s
|
| 1841 |
+
0 2018-02-27
|
| 1842 |
+
1 2018-02-28
|
| 1843 |
+
2 2018-03-01
|
| 1844 |
+
dtype: datetime64[ns]
|
| 1845 |
+
>>> s.dt.is_month_start
|
| 1846 |
+
0 False
|
| 1847 |
+
1 False
|
| 1848 |
+
2 True
|
| 1849 |
+
dtype: bool
|
| 1850 |
+
>>> s.dt.is_month_end
|
| 1851 |
+
0 False
|
| 1852 |
+
1 True
|
| 1853 |
+
2 False
|
| 1854 |
+
dtype: bool
|
| 1855 |
+
|
| 1856 |
+
>>> idx = pd.date_range("2018-02-27", periods=3)
|
| 1857 |
+
>>> idx.is_month_start
|
| 1858 |
+
array([False, False, True])
|
| 1859 |
+
>>> idx.is_month_end
|
| 1860 |
+
array([False, True, False])
|
| 1861 |
+
"""
|
| 1862 |
+
is_month_start = _field_accessor(
|
| 1863 |
+
"is_month_start", "is_month_start", _is_month_doc.format(first_or_last="first")
|
| 1864 |
+
)
|
| 1865 |
+
|
| 1866 |
+
is_month_end = _field_accessor(
|
| 1867 |
+
"is_month_end", "is_month_end", _is_month_doc.format(first_or_last="last")
|
| 1868 |
+
)
|
| 1869 |
+
|
| 1870 |
+
is_quarter_start = _field_accessor(
|
| 1871 |
+
"is_quarter_start",
|
| 1872 |
+
"is_quarter_start",
|
| 1873 |
+
"""
|
| 1874 |
+
Indicator for whether the date is the first day of a quarter.
|
| 1875 |
+
|
| 1876 |
+
Returns
|
| 1877 |
+
-------
|
| 1878 |
+
is_quarter_start : Series or DatetimeIndex
|
| 1879 |
+
The same type as the original data with boolean values. Series will
|
| 1880 |
+
have the same name and index. DatetimeIndex will have the same
|
| 1881 |
+
name.
|
| 1882 |
+
|
| 1883 |
+
See Also
|
| 1884 |
+
--------
|
| 1885 |
+
quarter : Return the quarter of the date.
|
| 1886 |
+
is_quarter_end : Similar property for indicating the quarter end.
|
| 1887 |
+
|
| 1888 |
+
Examples
|
| 1889 |
+
--------
|
| 1890 |
+
This method is available on Series with datetime values under
|
| 1891 |
+
the ``.dt`` accessor, and directly on DatetimeIndex.
|
| 1892 |
+
|
| 1893 |
+
>>> df = pd.DataFrame({'dates': pd.date_range("2017-03-30",
|
| 1894 |
+
... periods=4)})
|
| 1895 |
+
>>> df.assign(quarter=df.dates.dt.quarter,
|
| 1896 |
+
... is_quarter_start=df.dates.dt.is_quarter_start)
|
| 1897 |
+
dates quarter is_quarter_start
|
| 1898 |
+
0 2017-03-30 1 False
|
| 1899 |
+
1 2017-03-31 1 False
|
| 1900 |
+
2 2017-04-01 2 True
|
| 1901 |
+
3 2017-04-02 2 False
|
| 1902 |
+
|
| 1903 |
+
>>> idx = pd.date_range('2017-03-30', periods=4)
|
| 1904 |
+
>>> idx
|
| 1905 |
+
DatetimeIndex(['2017-03-30', '2017-03-31', '2017-04-01', '2017-04-02'],
|
| 1906 |
+
dtype='datetime64[ns]', freq='D')
|
| 1907 |
+
|
| 1908 |
+
>>> idx.is_quarter_start
|
| 1909 |
+
array([False, False, True, False])
|
| 1910 |
+
""",
|
| 1911 |
+
)
|
| 1912 |
+
is_quarter_end = _field_accessor(
|
| 1913 |
+
"is_quarter_end",
|
| 1914 |
+
"is_quarter_end",
|
| 1915 |
+
"""
|
| 1916 |
+
Indicator for whether the date is the last day of a quarter.
|
| 1917 |
+
|
| 1918 |
+
Returns
|
| 1919 |
+
-------
|
| 1920 |
+
is_quarter_end : Series or DatetimeIndex
|
| 1921 |
+
The same type as the original data with boolean values. Series will
|
| 1922 |
+
have the same name and index. DatetimeIndex will have the same
|
| 1923 |
+
name.
|
| 1924 |
+
|
| 1925 |
+
See Also
|
| 1926 |
+
--------
|
| 1927 |
+
quarter : Return the quarter of the date.
|
| 1928 |
+
is_quarter_start : Similar property indicating the quarter start.
|
| 1929 |
+
|
| 1930 |
+
Examples
|
| 1931 |
+
--------
|
| 1932 |
+
This method is available on Series with datetime values under
|
| 1933 |
+
the ``.dt`` accessor, and directly on DatetimeIndex.
|
| 1934 |
+
|
| 1935 |
+
>>> df = pd.DataFrame({'dates': pd.date_range("2017-03-30",
|
| 1936 |
+
... periods=4)})
|
| 1937 |
+
>>> df.assign(quarter=df.dates.dt.quarter,
|
| 1938 |
+
... is_quarter_end=df.dates.dt.is_quarter_end)
|
| 1939 |
+
dates quarter is_quarter_end
|
| 1940 |
+
0 2017-03-30 1 False
|
| 1941 |
+
1 2017-03-31 1 True
|
| 1942 |
+
2 2017-04-01 2 False
|
| 1943 |
+
3 2017-04-02 2 False
|
| 1944 |
+
|
| 1945 |
+
>>> idx = pd.date_range('2017-03-30', periods=4)
|
| 1946 |
+
>>> idx
|
| 1947 |
+
DatetimeIndex(['2017-03-30', '2017-03-31', '2017-04-01', '2017-04-02'],
|
| 1948 |
+
dtype='datetime64[ns]', freq='D')
|
| 1949 |
+
|
| 1950 |
+
>>> idx.is_quarter_end
|
| 1951 |
+
array([False, True, False, False])
|
| 1952 |
+
""",
|
| 1953 |
+
)
|
| 1954 |
+
is_year_start = _field_accessor(
|
| 1955 |
+
"is_year_start",
|
| 1956 |
+
"is_year_start",
|
| 1957 |
+
"""
|
| 1958 |
+
Indicate whether the date is the first day of a year.
|
| 1959 |
+
|
| 1960 |
+
Returns
|
| 1961 |
+
-------
|
| 1962 |
+
Series or DatetimeIndex
|
| 1963 |
+
The same type as the original data with boolean values. Series will
|
| 1964 |
+
have the same name and index. DatetimeIndex will have the same
|
| 1965 |
+
name.
|
| 1966 |
+
|
| 1967 |
+
See Also
|
| 1968 |
+
--------
|
| 1969 |
+
is_year_end : Similar property indicating the last day of the year.
|
| 1970 |
+
|
| 1971 |
+
Examples
|
| 1972 |
+
--------
|
| 1973 |
+
This method is available on Series with datetime values under
|
| 1974 |
+
the ``.dt`` accessor, and directly on DatetimeIndex.
|
| 1975 |
+
|
| 1976 |
+
>>> dates = pd.Series(pd.date_range("2017-12-30", periods=3))
|
| 1977 |
+
>>> dates
|
| 1978 |
+
0 2017-12-30
|
| 1979 |
+
1 2017-12-31
|
| 1980 |
+
2 2018-01-01
|
| 1981 |
+
dtype: datetime64[ns]
|
| 1982 |
+
|
| 1983 |
+
>>> dates.dt.is_year_start
|
| 1984 |
+
0 False
|
| 1985 |
+
1 False
|
| 1986 |
+
2 True
|
| 1987 |
+
dtype: bool
|
| 1988 |
+
|
| 1989 |
+
>>> idx = pd.date_range("2017-12-30", periods=3)
|
| 1990 |
+
>>> idx
|
| 1991 |
+
DatetimeIndex(['2017-12-30', '2017-12-31', '2018-01-01'],
|
| 1992 |
+
dtype='datetime64[ns]', freq='D')
|
| 1993 |
+
|
| 1994 |
+
>>> idx.is_year_start
|
| 1995 |
+
array([False, False, True])
|
| 1996 |
+
""",
|
| 1997 |
+
)
|
| 1998 |
+
is_year_end = _field_accessor(
|
| 1999 |
+
"is_year_end",
|
| 2000 |
+
"is_year_end",
|
| 2001 |
+
"""
|
| 2002 |
+
Indicate whether the date is the last day of the year.
|
| 2003 |
+
|
| 2004 |
+
Returns
|
| 2005 |
+
-------
|
| 2006 |
+
Series or DatetimeIndex
|
| 2007 |
+
The same type as the original data with boolean values. Series will
|
| 2008 |
+
have the same name and index. DatetimeIndex will have the same
|
| 2009 |
+
name.
|
| 2010 |
+
|
| 2011 |
+
See Also
|
| 2012 |
+
--------
|
| 2013 |
+
is_year_start : Similar property indicating the start of the year.
|
| 2014 |
+
|
| 2015 |
+
Examples
|
| 2016 |
+
--------
|
| 2017 |
+
This method is available on Series with datetime values under
|
| 2018 |
+
the ``.dt`` accessor, and directly on DatetimeIndex.
|
| 2019 |
+
|
| 2020 |
+
>>> dates = pd.Series(pd.date_range("2017-12-30", periods=3))
|
| 2021 |
+
>>> dates
|
| 2022 |
+
0 2017-12-30
|
| 2023 |
+
1 2017-12-31
|
| 2024 |
+
2 2018-01-01
|
| 2025 |
+
dtype: datetime64[ns]
|
| 2026 |
+
|
| 2027 |
+
>>> dates.dt.is_year_end
|
| 2028 |
+
0 False
|
| 2029 |
+
1 True
|
| 2030 |
+
2 False
|
| 2031 |
+
dtype: bool
|
| 2032 |
+
|
| 2033 |
+
>>> idx = pd.date_range("2017-12-30", periods=3)
|
| 2034 |
+
>>> idx
|
| 2035 |
+
DatetimeIndex(['2017-12-30', '2017-12-31', '2018-01-01'],
|
| 2036 |
+
dtype='datetime64[ns]', freq='D')
|
| 2037 |
+
|
| 2038 |
+
>>> idx.is_year_end
|
| 2039 |
+
array([False, True, False])
|
| 2040 |
+
""",
|
| 2041 |
+
)
|
| 2042 |
+
is_leap_year = _field_accessor(
|
| 2043 |
+
"is_leap_year",
|
| 2044 |
+
"is_leap_year",
|
| 2045 |
+
"""
|
| 2046 |
+
Boolean indicator if the date belongs to a leap year.
|
| 2047 |
+
|
| 2048 |
+
A leap year is a year, which has 366 days (instead of 365) including
|
| 2049 |
+
29th of February as an intercalary day.
|
| 2050 |
+
Leap years are years which are multiples of four with the exception
|
| 2051 |
+
of years divisible by 100 but not by 400.
|
| 2052 |
+
|
| 2053 |
+
Returns
|
| 2054 |
+
-------
|
| 2055 |
+
Series or ndarray
|
| 2056 |
+
Booleans indicating if dates belong to a leap year.
|
| 2057 |
+
|
| 2058 |
+
Examples
|
| 2059 |
+
--------
|
| 2060 |
+
This method is available on Series with datetime values under
|
| 2061 |
+
the ``.dt`` accessor, and directly on DatetimeIndex.
|
| 2062 |
+
|
| 2063 |
+
>>> idx = pd.date_range("2012-01-01", "2015-01-01", freq="YE")
|
| 2064 |
+
>>> idx
|
| 2065 |
+
DatetimeIndex(['2012-12-31', '2013-12-31', '2014-12-31'],
|
| 2066 |
+
dtype='datetime64[ns]', freq='YE-DEC')
|
| 2067 |
+
>>> idx.is_leap_year
|
| 2068 |
+
array([ True, False, False])
|
| 2069 |
+
|
| 2070 |
+
>>> dates_series = pd.Series(idx)
|
| 2071 |
+
>>> dates_series
|
| 2072 |
+
0 2012-12-31
|
| 2073 |
+
1 2013-12-31
|
| 2074 |
+
2 2014-12-31
|
| 2075 |
+
dtype: datetime64[ns]
|
| 2076 |
+
>>> dates_series.dt.is_leap_year
|
| 2077 |
+
0 True
|
| 2078 |
+
1 False
|
| 2079 |
+
2 False
|
| 2080 |
+
dtype: bool
|
| 2081 |
+
""",
|
| 2082 |
+
)
|
| 2083 |
+
|
| 2084 |
+
def to_julian_date(self) -> npt.NDArray[np.float64]:
|
| 2085 |
+
"""
|
| 2086 |
+
Convert Datetime Array to float64 ndarray of Julian Dates.
|
| 2087 |
+
0 Julian date is noon January 1, 4713 BC.
|
| 2088 |
+
https://en.wikipedia.org/wiki/Julian_day
|
| 2089 |
+
"""
|
| 2090 |
+
|
| 2091 |
+
# http://mysite.verizon.net/aesir_research/date/jdalg2.htm
|
| 2092 |
+
year = np.asarray(self.year)
|
| 2093 |
+
month = np.asarray(self.month)
|
| 2094 |
+
day = np.asarray(self.day)
|
| 2095 |
+
testarr = month < 3
|
| 2096 |
+
year[testarr] -= 1
|
| 2097 |
+
month[testarr] += 12
|
| 2098 |
+
return (
|
| 2099 |
+
day
|
| 2100 |
+
+ np.fix((153 * month - 457) / 5)
|
| 2101 |
+
+ 365 * year
|
| 2102 |
+
+ np.floor(year / 4)
|
| 2103 |
+
- np.floor(year / 100)
|
| 2104 |
+
+ np.floor(year / 400)
|
| 2105 |
+
+ 1_721_118.5
|
| 2106 |
+
+ (
|
| 2107 |
+
self.hour
|
| 2108 |
+
+ self.minute / 60
|
| 2109 |
+
+ self.second / 3600
|
| 2110 |
+
+ self.microsecond / 3600 / 10**6
|
| 2111 |
+
+ self.nanosecond / 3600 / 10**9
|
| 2112 |
+
)
|
| 2113 |
+
/ 24
|
| 2114 |
+
)
|
| 2115 |
+
|
| 2116 |
+
# -----------------------------------------------------------------
|
| 2117 |
+
# Reductions
|
| 2118 |
+
|
| 2119 |
+
def std(
|
| 2120 |
+
self,
|
| 2121 |
+
axis=None,
|
| 2122 |
+
dtype=None,
|
| 2123 |
+
out=None,
|
| 2124 |
+
ddof: int = 1,
|
| 2125 |
+
keepdims: bool = False,
|
| 2126 |
+
skipna: bool = True,
|
| 2127 |
+
):
|
| 2128 |
+
"""
|
| 2129 |
+
Return sample standard deviation over requested axis.
|
| 2130 |
+
|
| 2131 |
+
Normalized by `N-1` by default. This can be changed using ``ddof``.
|
| 2132 |
+
|
| 2133 |
+
Parameters
|
| 2134 |
+
----------
|
| 2135 |
+
axis : int, optional
|
| 2136 |
+
Axis for the function to be applied on. For :class:`pandas.Series`
|
| 2137 |
+
this parameter is unused and defaults to ``None``.
|
| 2138 |
+
ddof : int, default 1
|
| 2139 |
+
Degrees of Freedom. The divisor used in calculations is `N - ddof`,
|
| 2140 |
+
where `N` represents the number of elements.
|
| 2141 |
+
skipna : bool, default True
|
| 2142 |
+
Exclude NA/null values. If an entire row/column is ``NA``, the result
|
| 2143 |
+
will be ``NA``.
|
| 2144 |
+
|
| 2145 |
+
Returns
|
| 2146 |
+
-------
|
| 2147 |
+
Timedelta
|
| 2148 |
+
|
| 2149 |
+
See Also
|
| 2150 |
+
--------
|
| 2151 |
+
numpy.ndarray.std : Returns the standard deviation of the array elements
|
| 2152 |
+
along given axis.
|
| 2153 |
+
Series.std : Return sample standard deviation over requested axis.
|
| 2154 |
+
|
| 2155 |
+
Examples
|
| 2156 |
+
--------
|
| 2157 |
+
For :class:`pandas.DatetimeIndex`:
|
| 2158 |
+
|
| 2159 |
+
>>> idx = pd.date_range('2001-01-01 00:00', periods=3)
|
| 2160 |
+
>>> idx
|
| 2161 |
+
DatetimeIndex(['2001-01-01', '2001-01-02', '2001-01-03'],
|
| 2162 |
+
dtype='datetime64[ns]', freq='D')
|
| 2163 |
+
>>> idx.std()
|
| 2164 |
+
Timedelta('1 days 00:00:00')
|
| 2165 |
+
"""
|
| 2166 |
+
# Because std is translation-invariant, we can get self.std
|
| 2167 |
+
# by calculating (self - Timestamp(0)).std, and we can do it
|
| 2168 |
+
# without creating a copy by using a view on self._ndarray
|
| 2169 |
+
from pandas.core.arrays import TimedeltaArray
|
| 2170 |
+
|
| 2171 |
+
# Find the td64 dtype with the same resolution as our dt64 dtype
|
| 2172 |
+
dtype_str = self._ndarray.dtype.name.replace("datetime64", "timedelta64")
|
| 2173 |
+
dtype = np.dtype(dtype_str)
|
| 2174 |
+
|
| 2175 |
+
tda = TimedeltaArray._simple_new(self._ndarray.view(dtype), dtype=dtype)
|
| 2176 |
+
|
| 2177 |
+
return tda.std(axis=axis, out=out, ddof=ddof, keepdims=keepdims, skipna=skipna)
|
| 2178 |
+
|
| 2179 |
+
|
| 2180 |
+
# -------------------------------------------------------------------
|
| 2181 |
+
# Constructor Helpers
|
| 2182 |
+
|
| 2183 |
+
|
| 2184 |
+
def _sequence_to_dt64(
|
| 2185 |
+
data: ArrayLike,
|
| 2186 |
+
*,
|
| 2187 |
+
copy: bool = False,
|
| 2188 |
+
tz: tzinfo | None = None,
|
| 2189 |
+
dayfirst: bool = False,
|
| 2190 |
+
yearfirst: bool = False,
|
| 2191 |
+
ambiguous: TimeAmbiguous = "raise",
|
| 2192 |
+
out_unit: str | None = None,
|
| 2193 |
+
):
|
| 2194 |
+
"""
|
| 2195 |
+
Parameters
|
| 2196 |
+
----------
|
| 2197 |
+
data : np.ndarray or ExtensionArray
|
| 2198 |
+
dtl.ensure_arraylike_for_datetimelike has already been called.
|
| 2199 |
+
copy : bool, default False
|
| 2200 |
+
tz : tzinfo or None, default None
|
| 2201 |
+
dayfirst : bool, default False
|
| 2202 |
+
yearfirst : bool, default False
|
| 2203 |
+
ambiguous : str, bool, or arraylike, default 'raise'
|
| 2204 |
+
See pandas._libs.tslibs.tzconversion.tz_localize_to_utc.
|
| 2205 |
+
out_unit : str or None, default None
|
| 2206 |
+
Desired output resolution.
|
| 2207 |
+
|
| 2208 |
+
Returns
|
| 2209 |
+
-------
|
| 2210 |
+
result : numpy.ndarray
|
| 2211 |
+
The sequence converted to a numpy array with dtype ``datetime64[unit]``.
|
| 2212 |
+
Where `unit` is "ns" unless specified otherwise by `out_unit`.
|
| 2213 |
+
tz : tzinfo or None
|
| 2214 |
+
Either the user-provided tzinfo or one inferred from the data.
|
| 2215 |
+
|
| 2216 |
+
Raises
|
| 2217 |
+
------
|
| 2218 |
+
TypeError : PeriodDType data is passed
|
| 2219 |
+
"""
|
| 2220 |
+
|
| 2221 |
+
# By this point we are assured to have either a numpy array or Index
|
| 2222 |
+
data, copy = maybe_convert_dtype(data, copy, tz=tz)
|
| 2223 |
+
data_dtype = getattr(data, "dtype", None)
|
| 2224 |
+
|
| 2225 |
+
if out_unit is None:
|
| 2226 |
+
out_unit = "ns"
|
| 2227 |
+
out_dtype = np.dtype(f"M8[{out_unit}]")
|
| 2228 |
+
|
| 2229 |
+
if data_dtype == object or is_string_dtype(data_dtype):
|
| 2230 |
+
# TODO: We do not have tests specific to string-dtypes,
|
| 2231 |
+
# also complex or categorical or other extension
|
| 2232 |
+
data = cast(np.ndarray, data)
|
| 2233 |
+
copy = False
|
| 2234 |
+
if lib.infer_dtype(data, skipna=False) == "integer":
|
| 2235 |
+
# Much more performant than going through array_to_datetime
|
| 2236 |
+
data = data.astype(np.int64)
|
| 2237 |
+
elif tz is not None and ambiguous == "raise":
|
| 2238 |
+
obj_data = np.asarray(data, dtype=object)
|
| 2239 |
+
result = tslib.array_to_datetime_with_tz(
|
| 2240 |
+
obj_data,
|
| 2241 |
+
tz=tz,
|
| 2242 |
+
dayfirst=dayfirst,
|
| 2243 |
+
yearfirst=yearfirst,
|
| 2244 |
+
creso=abbrev_to_npy_unit(out_unit),
|
| 2245 |
+
)
|
| 2246 |
+
return result, tz
|
| 2247 |
+
else:
|
| 2248 |
+
converted, inferred_tz = objects_to_datetime64(
|
| 2249 |
+
data,
|
| 2250 |
+
dayfirst=dayfirst,
|
| 2251 |
+
yearfirst=yearfirst,
|
| 2252 |
+
allow_object=False,
|
| 2253 |
+
out_unit=out_unit or "ns",
|
| 2254 |
+
)
|
| 2255 |
+
copy = False
|
| 2256 |
+
if tz and inferred_tz:
|
| 2257 |
+
# two timezones: convert to intended from base UTC repr
|
| 2258 |
+
# GH#42505 by convention, these are _already_ UTC
|
| 2259 |
+
result = converted
|
| 2260 |
+
|
| 2261 |
+
elif inferred_tz:
|
| 2262 |
+
tz = inferred_tz
|
| 2263 |
+
result = converted
|
| 2264 |
+
|
| 2265 |
+
else:
|
| 2266 |
+
result, _ = _construct_from_dt64_naive(
|
| 2267 |
+
converted, tz=tz, copy=copy, ambiguous=ambiguous
|
| 2268 |
+
)
|
| 2269 |
+
return result, tz
|
| 2270 |
+
|
| 2271 |
+
data_dtype = data.dtype
|
| 2272 |
+
|
| 2273 |
+
# `data` may have originally been a Categorical[datetime64[ns, tz]],
|
| 2274 |
+
# so we need to handle these types.
|
| 2275 |
+
if isinstance(data_dtype, DatetimeTZDtype):
|
| 2276 |
+
# DatetimeArray -> ndarray
|
| 2277 |
+
data = cast(DatetimeArray, data)
|
| 2278 |
+
tz = _maybe_infer_tz(tz, data.tz)
|
| 2279 |
+
result = data._ndarray
|
| 2280 |
+
|
| 2281 |
+
elif lib.is_np_dtype(data_dtype, "M"):
|
| 2282 |
+
# tz-naive DatetimeArray or ndarray[datetime64]
|
| 2283 |
+
if isinstance(data, DatetimeArray):
|
| 2284 |
+
data = data._ndarray
|
| 2285 |
+
|
| 2286 |
+
data = cast(np.ndarray, data)
|
| 2287 |
+
result, copy = _construct_from_dt64_naive(
|
| 2288 |
+
data, tz=tz, copy=copy, ambiguous=ambiguous
|
| 2289 |
+
)
|
| 2290 |
+
|
| 2291 |
+
else:
|
| 2292 |
+
# must be integer dtype otherwise
|
| 2293 |
+
# assume this data are epoch timestamps
|
| 2294 |
+
if data.dtype != INT64_DTYPE:
|
| 2295 |
+
data = data.astype(np.int64, copy=False)
|
| 2296 |
+
copy = False
|
| 2297 |
+
data = cast(np.ndarray, data)
|
| 2298 |
+
result = data.view(out_dtype)
|
| 2299 |
+
|
| 2300 |
+
if copy:
|
| 2301 |
+
result = result.copy()
|
| 2302 |
+
|
| 2303 |
+
assert isinstance(result, np.ndarray), type(result)
|
| 2304 |
+
assert result.dtype.kind == "M"
|
| 2305 |
+
assert result.dtype != "M8"
|
| 2306 |
+
assert is_supported_dtype(result.dtype)
|
| 2307 |
+
return result, tz
|
| 2308 |
+
|
| 2309 |
+
|
| 2310 |
+
def _construct_from_dt64_naive(
|
| 2311 |
+
data: np.ndarray, *, tz: tzinfo | None, copy: bool, ambiguous: TimeAmbiguous
|
| 2312 |
+
) -> tuple[np.ndarray, bool]:
|
| 2313 |
+
"""
|
| 2314 |
+
Convert datetime64 data to a supported dtype, localizing if necessary.
|
| 2315 |
+
"""
|
| 2316 |
+
# Caller is responsible for ensuring
|
| 2317 |
+
# lib.is_np_dtype(data.dtype)
|
| 2318 |
+
|
| 2319 |
+
new_dtype = data.dtype
|
| 2320 |
+
if not is_supported_dtype(new_dtype):
|
| 2321 |
+
# Cast to the nearest supported unit, generally "s"
|
| 2322 |
+
new_dtype = get_supported_dtype(new_dtype)
|
| 2323 |
+
data = astype_overflowsafe(data, dtype=new_dtype, copy=False)
|
| 2324 |
+
copy = False
|
| 2325 |
+
|
| 2326 |
+
if data.dtype.byteorder == ">":
|
| 2327 |
+
# TODO: better way to handle this? non-copying alternative?
|
| 2328 |
+
# without this, test_constructor_datetime64_bigendian fails
|
| 2329 |
+
data = data.astype(data.dtype.newbyteorder("<"))
|
| 2330 |
+
new_dtype = data.dtype
|
| 2331 |
+
copy = False
|
| 2332 |
+
|
| 2333 |
+
if tz is not None:
|
| 2334 |
+
# Convert tz-naive to UTC
|
| 2335 |
+
# TODO: if tz is UTC, are there situations where we *don't* want a
|
| 2336 |
+
# copy? tz_localize_to_utc always makes one.
|
| 2337 |
+
shape = data.shape
|
| 2338 |
+
if data.ndim > 1:
|
| 2339 |
+
data = data.ravel()
|
| 2340 |
+
|
| 2341 |
+
data_unit = get_unit_from_dtype(new_dtype)
|
| 2342 |
+
data = tzconversion.tz_localize_to_utc(
|
| 2343 |
+
data.view("i8"), tz, ambiguous=ambiguous, creso=data_unit
|
| 2344 |
+
)
|
| 2345 |
+
data = data.view(new_dtype)
|
| 2346 |
+
data = data.reshape(shape)
|
| 2347 |
+
|
| 2348 |
+
assert data.dtype == new_dtype, data.dtype
|
| 2349 |
+
result = data
|
| 2350 |
+
|
| 2351 |
+
return result, copy
|
| 2352 |
+
|
| 2353 |
+
|
| 2354 |
+
def objects_to_datetime64(
|
| 2355 |
+
data: np.ndarray,
|
| 2356 |
+
dayfirst,
|
| 2357 |
+
yearfirst,
|
| 2358 |
+
utc: bool = False,
|
| 2359 |
+
errors: DateTimeErrorChoices = "raise",
|
| 2360 |
+
allow_object: bool = False,
|
| 2361 |
+
out_unit: str = "ns",
|
| 2362 |
+
):
|
| 2363 |
+
"""
|
| 2364 |
+
Convert data to array of timestamps.
|
| 2365 |
+
|
| 2366 |
+
Parameters
|
| 2367 |
+
----------
|
| 2368 |
+
data : np.ndarray[object]
|
| 2369 |
+
dayfirst : bool
|
| 2370 |
+
yearfirst : bool
|
| 2371 |
+
utc : bool, default False
|
| 2372 |
+
Whether to convert/localize timestamps to UTC.
|
| 2373 |
+
errors : {'raise', 'ignore', 'coerce'}
|
| 2374 |
+
allow_object : bool
|
| 2375 |
+
Whether to return an object-dtype ndarray instead of raising if the
|
| 2376 |
+
data contains more than one timezone.
|
| 2377 |
+
out_unit : str, default "ns"
|
| 2378 |
+
|
| 2379 |
+
Returns
|
| 2380 |
+
-------
|
| 2381 |
+
result : ndarray
|
| 2382 |
+
np.datetime64[out_unit] if returned values represent wall times or UTC
|
| 2383 |
+
timestamps.
|
| 2384 |
+
object if mixed timezones
|
| 2385 |
+
inferred_tz : tzinfo or None
|
| 2386 |
+
If not None, then the datetime64 values in `result` denote UTC timestamps.
|
| 2387 |
+
|
| 2388 |
+
Raises
|
| 2389 |
+
------
|
| 2390 |
+
ValueError : if data cannot be converted to datetimes
|
| 2391 |
+
TypeError : When a type cannot be converted to datetime
|
| 2392 |
+
"""
|
| 2393 |
+
assert errors in ["raise", "ignore", "coerce"]
|
| 2394 |
+
|
| 2395 |
+
# if str-dtype, convert
|
| 2396 |
+
data = np.asarray(data, dtype=np.object_)
|
| 2397 |
+
|
| 2398 |
+
result, tz_parsed = tslib.array_to_datetime(
|
| 2399 |
+
data,
|
| 2400 |
+
errors=errors,
|
| 2401 |
+
utc=utc,
|
| 2402 |
+
dayfirst=dayfirst,
|
| 2403 |
+
yearfirst=yearfirst,
|
| 2404 |
+
creso=abbrev_to_npy_unit(out_unit),
|
| 2405 |
+
)
|
| 2406 |
+
|
| 2407 |
+
if tz_parsed is not None:
|
| 2408 |
+
# We can take a shortcut since the datetime64 numpy array
|
| 2409 |
+
# is in UTC
|
| 2410 |
+
return result, tz_parsed
|
| 2411 |
+
elif result.dtype.kind == "M":
|
| 2412 |
+
return result, tz_parsed
|
| 2413 |
+
elif result.dtype == object:
|
| 2414 |
+
# GH#23675 when called via `pd.to_datetime`, returning an object-dtype
|
| 2415 |
+
# array is allowed. When called via `pd.DatetimeIndex`, we can
|
| 2416 |
+
# only accept datetime64 dtype, so raise TypeError if object-dtype
|
| 2417 |
+
# is returned, as that indicates the values can be recognized as
|
| 2418 |
+
# datetimes but they have conflicting timezones/awareness
|
| 2419 |
+
if allow_object:
|
| 2420 |
+
return result, tz_parsed
|
| 2421 |
+
raise TypeError("DatetimeIndex has mixed timezones")
|
| 2422 |
+
else: # pragma: no cover
|
| 2423 |
+
# GH#23675 this TypeError should never be hit, whereas the TypeError
|
| 2424 |
+
# in the object-dtype branch above is reachable.
|
| 2425 |
+
raise TypeError(result)
|
| 2426 |
+
|
| 2427 |
+
|
| 2428 |
+
def maybe_convert_dtype(data, copy: bool, tz: tzinfo | None = None):
|
| 2429 |
+
"""
|
| 2430 |
+
Convert data based on dtype conventions, issuing
|
| 2431 |
+
errors where appropriate.
|
| 2432 |
+
|
| 2433 |
+
Parameters
|
| 2434 |
+
----------
|
| 2435 |
+
data : np.ndarray or pd.Index
|
| 2436 |
+
copy : bool
|
| 2437 |
+
tz : tzinfo or None, default None
|
| 2438 |
+
|
| 2439 |
+
Returns
|
| 2440 |
+
-------
|
| 2441 |
+
data : np.ndarray or pd.Index
|
| 2442 |
+
copy : bool
|
| 2443 |
+
|
| 2444 |
+
Raises
|
| 2445 |
+
------
|
| 2446 |
+
TypeError : PeriodDType data is passed
|
| 2447 |
+
"""
|
| 2448 |
+
if not hasattr(data, "dtype"):
|
| 2449 |
+
# e.g. collections.deque
|
| 2450 |
+
return data, copy
|
| 2451 |
+
|
| 2452 |
+
if is_float_dtype(data.dtype):
|
| 2453 |
+
# pre-2.0 we treated these as wall-times, inconsistent with ints
|
| 2454 |
+
# GH#23675, GH#45573 deprecated to treat symmetrically with integer dtypes.
|
| 2455 |
+
# Note: data.astype(np.int64) fails ARM tests, see
|
| 2456 |
+
# https://github.com/pandas-dev/pandas/issues/49468.
|
| 2457 |
+
data = data.astype(DT64NS_DTYPE).view("i8")
|
| 2458 |
+
copy = False
|
| 2459 |
+
|
| 2460 |
+
elif lib.is_np_dtype(data.dtype, "m") or is_bool_dtype(data.dtype):
|
| 2461 |
+
# GH#29794 enforcing deprecation introduced in GH#23539
|
| 2462 |
+
raise TypeError(f"dtype {data.dtype} cannot be converted to datetime64[ns]")
|
| 2463 |
+
elif isinstance(data.dtype, PeriodDtype):
|
| 2464 |
+
# Note: without explicitly raising here, PeriodIndex
|
| 2465 |
+
# test_setops.test_join_does_not_recur fails
|
| 2466 |
+
raise TypeError(
|
| 2467 |
+
"Passing PeriodDtype data is invalid. Use `data.to_timestamp()` instead"
|
| 2468 |
+
)
|
| 2469 |
+
|
| 2470 |
+
elif isinstance(data.dtype, ExtensionDtype) and not isinstance(
|
| 2471 |
+
data.dtype, DatetimeTZDtype
|
| 2472 |
+
):
|
| 2473 |
+
# TODO: We have no tests for these
|
| 2474 |
+
data = np.array(data, dtype=np.object_)
|
| 2475 |
+
copy = False
|
| 2476 |
+
|
| 2477 |
+
return data, copy
|
| 2478 |
+
|
| 2479 |
+
|
| 2480 |
+
# -------------------------------------------------------------------
|
| 2481 |
+
# Validation and Inference
|
| 2482 |
+
|
| 2483 |
+
|
| 2484 |
+
def _maybe_infer_tz(tz: tzinfo | None, inferred_tz: tzinfo | None) -> tzinfo | None:
|
| 2485 |
+
"""
|
| 2486 |
+
If a timezone is inferred from data, check that it is compatible with
|
| 2487 |
+
the user-provided timezone, if any.
|
| 2488 |
+
|
| 2489 |
+
Parameters
|
| 2490 |
+
----------
|
| 2491 |
+
tz : tzinfo or None
|
| 2492 |
+
inferred_tz : tzinfo or None
|
| 2493 |
+
|
| 2494 |
+
Returns
|
| 2495 |
+
-------
|
| 2496 |
+
tz : tzinfo or None
|
| 2497 |
+
|
| 2498 |
+
Raises
|
| 2499 |
+
------
|
| 2500 |
+
TypeError : if both timezones are present but do not match
|
| 2501 |
+
"""
|
| 2502 |
+
if tz is None:
|
| 2503 |
+
tz = inferred_tz
|
| 2504 |
+
elif inferred_tz is None:
|
| 2505 |
+
pass
|
| 2506 |
+
elif not timezones.tz_compare(tz, inferred_tz):
|
| 2507 |
+
raise TypeError(
|
| 2508 |
+
f"data is already tz-aware {inferred_tz}, unable to "
|
| 2509 |
+
f"set specified tz: {tz}"
|
| 2510 |
+
)
|
| 2511 |
+
return tz
|
| 2512 |
+
|
| 2513 |
+
|
| 2514 |
+
def _validate_dt64_dtype(dtype):
|
| 2515 |
+
"""
|
| 2516 |
+
Check that a dtype, if passed, represents either a numpy datetime64[ns]
|
| 2517 |
+
dtype or a pandas DatetimeTZDtype.
|
| 2518 |
+
|
| 2519 |
+
Parameters
|
| 2520 |
+
----------
|
| 2521 |
+
dtype : object
|
| 2522 |
+
|
| 2523 |
+
Returns
|
| 2524 |
+
-------
|
| 2525 |
+
dtype : None, numpy.dtype, or DatetimeTZDtype
|
| 2526 |
+
|
| 2527 |
+
Raises
|
| 2528 |
+
------
|
| 2529 |
+
ValueError : invalid dtype
|
| 2530 |
+
|
| 2531 |
+
Notes
|
| 2532 |
+
-----
|
| 2533 |
+
Unlike _validate_tz_from_dtype, this does _not_ allow non-existent
|
| 2534 |
+
tz errors to go through
|
| 2535 |
+
"""
|
| 2536 |
+
if dtype is not None:
|
| 2537 |
+
dtype = pandas_dtype(dtype)
|
| 2538 |
+
if dtype == np.dtype("M8"):
|
| 2539 |
+
# no precision, disallowed GH#24806
|
| 2540 |
+
msg = (
|
| 2541 |
+
"Passing in 'datetime64' dtype with no precision is not allowed. "
|
| 2542 |
+
"Please pass in 'datetime64[ns]' instead."
|
| 2543 |
+
)
|
| 2544 |
+
raise ValueError(msg)
|
| 2545 |
+
|
| 2546 |
+
if (
|
| 2547 |
+
isinstance(dtype, np.dtype)
|
| 2548 |
+
and (dtype.kind != "M" or not is_supported_dtype(dtype))
|
| 2549 |
+
) or not isinstance(dtype, (np.dtype, DatetimeTZDtype)):
|
| 2550 |
+
raise ValueError(
|
| 2551 |
+
f"Unexpected value for 'dtype': '{dtype}'. "
|
| 2552 |
+
"Must be 'datetime64[s]', 'datetime64[ms]', 'datetime64[us]', "
|
| 2553 |
+
"'datetime64[ns]' or DatetimeTZDtype'."
|
| 2554 |
+
)
|
| 2555 |
+
|
| 2556 |
+
if getattr(dtype, "tz", None):
|
| 2557 |
+
# https://github.com/pandas-dev/pandas/issues/18595
|
| 2558 |
+
# Ensure that we have a standard timezone for pytz objects.
|
| 2559 |
+
# Without this, things like adding an array of timedeltas and
|
| 2560 |
+
# a tz-aware Timestamp (with a tz specific to its datetime) will
|
| 2561 |
+
# be incorrect(ish?) for the array as a whole
|
| 2562 |
+
dtype = cast(DatetimeTZDtype, dtype)
|
| 2563 |
+
dtype = DatetimeTZDtype(
|
| 2564 |
+
unit=dtype.unit, tz=timezones.tz_standardize(dtype.tz)
|
| 2565 |
+
)
|
| 2566 |
+
|
| 2567 |
+
return dtype
|
| 2568 |
+
|
| 2569 |
+
|
| 2570 |
+
def _validate_tz_from_dtype(
|
| 2571 |
+
dtype, tz: tzinfo | None, explicit_tz_none: bool = False
|
| 2572 |
+
) -> tzinfo | None:
|
| 2573 |
+
"""
|
| 2574 |
+
If the given dtype is a DatetimeTZDtype, extract the implied
|
| 2575 |
+
tzinfo object from it and check that it does not conflict with the given
|
| 2576 |
+
tz.
|
| 2577 |
+
|
| 2578 |
+
Parameters
|
| 2579 |
+
----------
|
| 2580 |
+
dtype : dtype, str
|
| 2581 |
+
tz : None, tzinfo
|
| 2582 |
+
explicit_tz_none : bool, default False
|
| 2583 |
+
Whether tz=None was passed explicitly, as opposed to lib.no_default.
|
| 2584 |
+
|
| 2585 |
+
Returns
|
| 2586 |
+
-------
|
| 2587 |
+
tz : consensus tzinfo
|
| 2588 |
+
|
| 2589 |
+
Raises
|
| 2590 |
+
------
|
| 2591 |
+
ValueError : on tzinfo mismatch
|
| 2592 |
+
"""
|
| 2593 |
+
if dtype is not None:
|
| 2594 |
+
if isinstance(dtype, str):
|
| 2595 |
+
try:
|
| 2596 |
+
dtype = DatetimeTZDtype.construct_from_string(dtype)
|
| 2597 |
+
except TypeError:
|
| 2598 |
+
# Things like `datetime64[ns]`, which is OK for the
|
| 2599 |
+
# constructors, but also nonsense, which should be validated
|
| 2600 |
+
# but not by us. We *do* allow non-existent tz errors to
|
| 2601 |
+
# go through
|
| 2602 |
+
pass
|
| 2603 |
+
dtz = getattr(dtype, "tz", None)
|
| 2604 |
+
if dtz is not None:
|
| 2605 |
+
if tz is not None and not timezones.tz_compare(tz, dtz):
|
| 2606 |
+
raise ValueError("cannot supply both a tz and a dtype with a tz")
|
| 2607 |
+
if explicit_tz_none:
|
| 2608 |
+
raise ValueError("Cannot pass both a timezone-aware dtype and tz=None")
|
| 2609 |
+
tz = dtz
|
| 2610 |
+
|
| 2611 |
+
if tz is not None and lib.is_np_dtype(dtype, "M"):
|
| 2612 |
+
# We also need to check for the case where the user passed a
|
| 2613 |
+
# tz-naive dtype (i.e. datetime64[ns])
|
| 2614 |
+
if tz is not None and not timezones.tz_compare(tz, dtz):
|
| 2615 |
+
raise ValueError(
|
| 2616 |
+
"cannot supply both a tz and a "
|
| 2617 |
+
"timezone-naive dtype (i.e. datetime64[ns])"
|
| 2618 |
+
)
|
| 2619 |
+
|
| 2620 |
+
return tz
|
| 2621 |
+
|
| 2622 |
+
|
| 2623 |
+
def _infer_tz_from_endpoints(
|
| 2624 |
+
start: Timestamp, end: Timestamp, tz: tzinfo | None
|
| 2625 |
+
) -> tzinfo | None:
|
| 2626 |
+
"""
|
| 2627 |
+
If a timezone is not explicitly given via `tz`, see if one can
|
| 2628 |
+
be inferred from the `start` and `end` endpoints. If more than one
|
| 2629 |
+
of these inputs provides a timezone, require that they all agree.
|
| 2630 |
+
|
| 2631 |
+
Parameters
|
| 2632 |
+
----------
|
| 2633 |
+
start : Timestamp
|
| 2634 |
+
end : Timestamp
|
| 2635 |
+
tz : tzinfo or None
|
| 2636 |
+
|
| 2637 |
+
Returns
|
| 2638 |
+
-------
|
| 2639 |
+
tz : tzinfo or None
|
| 2640 |
+
|
| 2641 |
+
Raises
|
| 2642 |
+
------
|
| 2643 |
+
TypeError : if start and end timezones do not agree
|
| 2644 |
+
"""
|
| 2645 |
+
try:
|
| 2646 |
+
inferred_tz = timezones.infer_tzinfo(start, end)
|
| 2647 |
+
except AssertionError as err:
|
| 2648 |
+
# infer_tzinfo raises AssertionError if passed mismatched timezones
|
| 2649 |
+
raise TypeError(
|
| 2650 |
+
"Start and end cannot both be tz-aware with different timezones"
|
| 2651 |
+
) from err
|
| 2652 |
+
|
| 2653 |
+
inferred_tz = timezones.maybe_get_tz(inferred_tz)
|
| 2654 |
+
tz = timezones.maybe_get_tz(tz)
|
| 2655 |
+
|
| 2656 |
+
if tz is not None and inferred_tz is not None:
|
| 2657 |
+
if not timezones.tz_compare(inferred_tz, tz):
|
| 2658 |
+
raise AssertionError("Inferred time zone not equal to passed time zone")
|
| 2659 |
+
|
| 2660 |
+
elif inferred_tz is not None:
|
| 2661 |
+
tz = inferred_tz
|
| 2662 |
+
|
| 2663 |
+
return tz
|
| 2664 |
+
|
| 2665 |
+
|
| 2666 |
+
def _maybe_normalize_endpoints(
|
| 2667 |
+
start: Timestamp | None, end: Timestamp | None, normalize: bool
|
| 2668 |
+
):
|
| 2669 |
+
if normalize:
|
| 2670 |
+
if start is not None:
|
| 2671 |
+
start = start.normalize()
|
| 2672 |
+
|
| 2673 |
+
if end is not None:
|
| 2674 |
+
end = end.normalize()
|
| 2675 |
+
|
| 2676 |
+
return start, end
|
| 2677 |
+
|
| 2678 |
+
|
| 2679 |
+
def _maybe_localize_point(
|
| 2680 |
+
ts: Timestamp | None, freq, tz, ambiguous, nonexistent
|
| 2681 |
+
) -> Timestamp | None:
|
| 2682 |
+
"""
|
| 2683 |
+
Localize a start or end Timestamp to the timezone of the corresponding
|
| 2684 |
+
start or end Timestamp
|
| 2685 |
+
|
| 2686 |
+
Parameters
|
| 2687 |
+
----------
|
| 2688 |
+
ts : start or end Timestamp to potentially localize
|
| 2689 |
+
freq : Tick, DateOffset, or None
|
| 2690 |
+
tz : str, timezone object or None
|
| 2691 |
+
ambiguous: str, localization behavior for ambiguous times
|
| 2692 |
+
nonexistent: str, localization behavior for nonexistent times
|
| 2693 |
+
|
| 2694 |
+
Returns
|
| 2695 |
+
-------
|
| 2696 |
+
ts : Timestamp
|
| 2697 |
+
"""
|
| 2698 |
+
# Make sure start and end are timezone localized if:
|
| 2699 |
+
# 1) freq = a Timedelta-like frequency (Tick)
|
| 2700 |
+
# 2) freq = None i.e. generating a linspaced range
|
| 2701 |
+
if ts is not None and ts.tzinfo is None:
|
| 2702 |
+
# Note: We can't ambiguous='infer' a singular ambiguous time; however,
|
| 2703 |
+
# we have historically defaulted ambiguous=False
|
| 2704 |
+
ambiguous = ambiguous if ambiguous != "infer" else False
|
| 2705 |
+
localize_args = {"ambiguous": ambiguous, "nonexistent": nonexistent, "tz": None}
|
| 2706 |
+
if isinstance(freq, Tick) or freq is None:
|
| 2707 |
+
localize_args["tz"] = tz
|
| 2708 |
+
ts = ts.tz_localize(**localize_args)
|
| 2709 |
+
return ts
|
| 2710 |
+
|
| 2711 |
+
|
| 2712 |
+
def _generate_range(
|
| 2713 |
+
start: Timestamp | None,
|
| 2714 |
+
end: Timestamp | None,
|
| 2715 |
+
periods: int | None,
|
| 2716 |
+
offset: BaseOffset,
|
| 2717 |
+
*,
|
| 2718 |
+
unit: str,
|
| 2719 |
+
):
|
| 2720 |
+
"""
|
| 2721 |
+
Generates a sequence of dates corresponding to the specified time
|
| 2722 |
+
offset. Similar to dateutil.rrule except uses pandas DateOffset
|
| 2723 |
+
objects to represent time increments.
|
| 2724 |
+
|
| 2725 |
+
Parameters
|
| 2726 |
+
----------
|
| 2727 |
+
start : Timestamp or None
|
| 2728 |
+
end : Timestamp or None
|
| 2729 |
+
periods : int or None
|
| 2730 |
+
offset : DateOffset
|
| 2731 |
+
unit : str
|
| 2732 |
+
|
| 2733 |
+
Notes
|
| 2734 |
+
-----
|
| 2735 |
+
* This method is faster for generating weekdays than dateutil.rrule
|
| 2736 |
+
* At least two of (start, end, periods) must be specified.
|
| 2737 |
+
* If both start and end are specified, the returned dates will
|
| 2738 |
+
satisfy start <= date <= end.
|
| 2739 |
+
|
| 2740 |
+
Returns
|
| 2741 |
+
-------
|
| 2742 |
+
dates : generator object
|
| 2743 |
+
"""
|
| 2744 |
+
offset = to_offset(offset)
|
| 2745 |
+
|
| 2746 |
+
# Argument 1 to "Timestamp" has incompatible type "Optional[Timestamp]";
|
| 2747 |
+
# expected "Union[integer[Any], float, str, date, datetime64]"
|
| 2748 |
+
start = Timestamp(start) # type: ignore[arg-type]
|
| 2749 |
+
if start is not NaT:
|
| 2750 |
+
start = start.as_unit(unit)
|
| 2751 |
+
else:
|
| 2752 |
+
start = None
|
| 2753 |
+
|
| 2754 |
+
# Argument 1 to "Timestamp" has incompatible type "Optional[Timestamp]";
|
| 2755 |
+
# expected "Union[integer[Any], float, str, date, datetime64]"
|
| 2756 |
+
end = Timestamp(end) # type: ignore[arg-type]
|
| 2757 |
+
if end is not NaT:
|
| 2758 |
+
end = end.as_unit(unit)
|
| 2759 |
+
else:
|
| 2760 |
+
end = None
|
| 2761 |
+
|
| 2762 |
+
if start and not offset.is_on_offset(start):
|
| 2763 |
+
# Incompatible types in assignment (expression has type "datetime",
|
| 2764 |
+
# variable has type "Optional[Timestamp]")
|
| 2765 |
+
start = offset.rollforward(start) # type: ignore[assignment]
|
| 2766 |
+
|
| 2767 |
+
elif end and not offset.is_on_offset(end):
|
| 2768 |
+
# Incompatible types in assignment (expression has type "datetime",
|
| 2769 |
+
# variable has type "Optional[Timestamp]")
|
| 2770 |
+
end = offset.rollback(end) # type: ignore[assignment]
|
| 2771 |
+
|
| 2772 |
+
# Unsupported operand types for < ("Timestamp" and "None")
|
| 2773 |
+
if periods is None and end < start and offset.n >= 0: # type: ignore[operator]
|
| 2774 |
+
end = None
|
| 2775 |
+
periods = 0
|
| 2776 |
+
|
| 2777 |
+
if end is None:
|
| 2778 |
+
# error: No overload variant of "__radd__" of "BaseOffset" matches
|
| 2779 |
+
# argument type "None"
|
| 2780 |
+
end = start + (periods - 1) * offset # type: ignore[operator]
|
| 2781 |
+
|
| 2782 |
+
if start is None:
|
| 2783 |
+
# error: No overload variant of "__radd__" of "BaseOffset" matches
|
| 2784 |
+
# argument type "None"
|
| 2785 |
+
start = end - (periods - 1) * offset # type: ignore[operator]
|
| 2786 |
+
|
| 2787 |
+
start = cast(Timestamp, start)
|
| 2788 |
+
end = cast(Timestamp, end)
|
| 2789 |
+
|
| 2790 |
+
cur = start
|
| 2791 |
+
if offset.n >= 0:
|
| 2792 |
+
while cur <= end:
|
| 2793 |
+
yield cur
|
| 2794 |
+
|
| 2795 |
+
if cur == end:
|
| 2796 |
+
# GH#24252 avoid overflows by not performing the addition
|
| 2797 |
+
# in offset.apply unless we have to
|
| 2798 |
+
break
|
| 2799 |
+
|
| 2800 |
+
# faster than cur + offset
|
| 2801 |
+
next_date = offset._apply(cur)
|
| 2802 |
+
next_date = next_date.as_unit(unit)
|
| 2803 |
+
if next_date <= cur:
|
| 2804 |
+
raise ValueError(f"Offset {offset} did not increment date")
|
| 2805 |
+
cur = next_date
|
| 2806 |
+
else:
|
| 2807 |
+
while cur >= end:
|
| 2808 |
+
yield cur
|
| 2809 |
+
|
| 2810 |
+
if cur == end:
|
| 2811 |
+
# GH#24252 avoid overflows by not performing the addition
|
| 2812 |
+
# in offset.apply unless we have to
|
| 2813 |
+
break
|
| 2814 |
+
|
| 2815 |
+
# faster than cur + offset
|
| 2816 |
+
next_date = offset._apply(cur)
|
| 2817 |
+
next_date = next_date.as_unit(unit)
|
| 2818 |
+
if next_date >= cur:
|
| 2819 |
+
raise ValueError(f"Offset {offset} did not decrement date")
|
| 2820 |
+
cur = next_date
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/arrays/numeric.py
ADDED
|
@@ -0,0 +1,286 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import numbers
|
| 4 |
+
from typing import (
|
| 5 |
+
TYPE_CHECKING,
|
| 6 |
+
Any,
|
| 7 |
+
Callable,
|
| 8 |
+
)
|
| 9 |
+
|
| 10 |
+
import numpy as np
|
| 11 |
+
|
| 12 |
+
from pandas._libs import (
|
| 13 |
+
lib,
|
| 14 |
+
missing as libmissing,
|
| 15 |
+
)
|
| 16 |
+
from pandas.errors import AbstractMethodError
|
| 17 |
+
from pandas.util._decorators import cache_readonly
|
| 18 |
+
|
| 19 |
+
from pandas.core.dtypes.common import (
|
| 20 |
+
is_integer_dtype,
|
| 21 |
+
is_string_dtype,
|
| 22 |
+
pandas_dtype,
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
from pandas.core.arrays.masked import (
|
| 26 |
+
BaseMaskedArray,
|
| 27 |
+
BaseMaskedDtype,
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
if TYPE_CHECKING:
|
| 31 |
+
from collections.abc import Mapping
|
| 32 |
+
|
| 33 |
+
import pyarrow
|
| 34 |
+
|
| 35 |
+
from pandas._typing import (
|
| 36 |
+
Dtype,
|
| 37 |
+
DtypeObj,
|
| 38 |
+
Self,
|
| 39 |
+
npt,
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
class NumericDtype(BaseMaskedDtype):
|
| 44 |
+
_default_np_dtype: np.dtype
|
| 45 |
+
_checker: Callable[[Any], bool] # is_foo_dtype
|
| 46 |
+
|
| 47 |
+
def __repr__(self) -> str:
|
| 48 |
+
return f"{self.name}Dtype()"
|
| 49 |
+
|
| 50 |
+
@cache_readonly
|
| 51 |
+
def is_signed_integer(self) -> bool:
|
| 52 |
+
return self.kind == "i"
|
| 53 |
+
|
| 54 |
+
@cache_readonly
|
| 55 |
+
def is_unsigned_integer(self) -> bool:
|
| 56 |
+
return self.kind == "u"
|
| 57 |
+
|
| 58 |
+
@property
|
| 59 |
+
def _is_numeric(self) -> bool:
|
| 60 |
+
return True
|
| 61 |
+
|
| 62 |
+
def __from_arrow__(
|
| 63 |
+
self, array: pyarrow.Array | pyarrow.ChunkedArray
|
| 64 |
+
) -> BaseMaskedArray:
|
| 65 |
+
"""
|
| 66 |
+
Construct IntegerArray/FloatingArray from pyarrow Array/ChunkedArray.
|
| 67 |
+
"""
|
| 68 |
+
import pyarrow
|
| 69 |
+
|
| 70 |
+
from pandas.core.arrays.arrow._arrow_utils import (
|
| 71 |
+
pyarrow_array_to_numpy_and_mask,
|
| 72 |
+
)
|
| 73 |
+
|
| 74 |
+
array_class = self.construct_array_type()
|
| 75 |
+
|
| 76 |
+
pyarrow_type = pyarrow.from_numpy_dtype(self.type)
|
| 77 |
+
if not array.type.equals(pyarrow_type) and not pyarrow.types.is_null(
|
| 78 |
+
array.type
|
| 79 |
+
):
|
| 80 |
+
# test_from_arrow_type_error raise for string, but allow
|
| 81 |
+
# through itemsize conversion GH#31896
|
| 82 |
+
rt_dtype = pandas_dtype(array.type.to_pandas_dtype())
|
| 83 |
+
if rt_dtype.kind not in "iuf":
|
| 84 |
+
# Could allow "c" or potentially disallow float<->int conversion,
|
| 85 |
+
# but at the moment we specifically test that uint<->int works
|
| 86 |
+
raise TypeError(
|
| 87 |
+
f"Expected array of {self} type, got {array.type} instead"
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
array = array.cast(pyarrow_type)
|
| 91 |
+
|
| 92 |
+
if isinstance(array, pyarrow.ChunkedArray):
|
| 93 |
+
# TODO this "if" can be removed when requiring pyarrow >= 10.0, which fixed
|
| 94 |
+
# combine_chunks for empty arrays https://github.com/apache/arrow/pull/13757
|
| 95 |
+
if array.num_chunks == 0:
|
| 96 |
+
array = pyarrow.array([], type=array.type)
|
| 97 |
+
else:
|
| 98 |
+
array = array.combine_chunks()
|
| 99 |
+
|
| 100 |
+
data, mask = pyarrow_array_to_numpy_and_mask(array, dtype=self.numpy_dtype)
|
| 101 |
+
return array_class(data.copy(), ~mask, copy=False)
|
| 102 |
+
|
| 103 |
+
@classmethod
|
| 104 |
+
def _get_dtype_mapping(cls) -> Mapping[np.dtype, NumericDtype]:
|
| 105 |
+
raise AbstractMethodError(cls)
|
| 106 |
+
|
| 107 |
+
@classmethod
|
| 108 |
+
def _standardize_dtype(cls, dtype: NumericDtype | str | np.dtype) -> NumericDtype:
|
| 109 |
+
"""
|
| 110 |
+
Convert a string representation or a numpy dtype to NumericDtype.
|
| 111 |
+
"""
|
| 112 |
+
if isinstance(dtype, str) and (dtype.startswith(("Int", "UInt", "Float"))):
|
| 113 |
+
# Avoid DeprecationWarning from NumPy about np.dtype("Int64")
|
| 114 |
+
# https://github.com/numpy/numpy/pull/7476
|
| 115 |
+
dtype = dtype.lower()
|
| 116 |
+
|
| 117 |
+
if not isinstance(dtype, NumericDtype):
|
| 118 |
+
mapping = cls._get_dtype_mapping()
|
| 119 |
+
try:
|
| 120 |
+
dtype = mapping[np.dtype(dtype)]
|
| 121 |
+
except KeyError as err:
|
| 122 |
+
raise ValueError(f"invalid dtype specified {dtype}") from err
|
| 123 |
+
return dtype
|
| 124 |
+
|
| 125 |
+
@classmethod
|
| 126 |
+
def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarray:
|
| 127 |
+
"""
|
| 128 |
+
Safely cast the values to the given dtype.
|
| 129 |
+
|
| 130 |
+
"safe" in this context means the casting is lossless.
|
| 131 |
+
"""
|
| 132 |
+
raise AbstractMethodError(cls)
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
def _coerce_to_data_and_mask(
|
| 136 |
+
values, dtype, copy: bool, dtype_cls: type[NumericDtype], default_dtype: np.dtype
|
| 137 |
+
):
|
| 138 |
+
checker = dtype_cls._checker
|
| 139 |
+
|
| 140 |
+
mask = None
|
| 141 |
+
inferred_type = None
|
| 142 |
+
|
| 143 |
+
if dtype is None and hasattr(values, "dtype"):
|
| 144 |
+
if checker(values.dtype):
|
| 145 |
+
dtype = values.dtype
|
| 146 |
+
|
| 147 |
+
if dtype is not None:
|
| 148 |
+
dtype = dtype_cls._standardize_dtype(dtype)
|
| 149 |
+
|
| 150 |
+
cls = dtype_cls.construct_array_type()
|
| 151 |
+
if isinstance(values, cls):
|
| 152 |
+
values, mask = values._data, values._mask
|
| 153 |
+
if dtype is not None:
|
| 154 |
+
values = values.astype(dtype.numpy_dtype, copy=False)
|
| 155 |
+
|
| 156 |
+
if copy:
|
| 157 |
+
values = values.copy()
|
| 158 |
+
mask = mask.copy()
|
| 159 |
+
return values, mask, dtype, inferred_type
|
| 160 |
+
|
| 161 |
+
original = values
|
| 162 |
+
if not copy:
|
| 163 |
+
values = np.asarray(values)
|
| 164 |
+
else:
|
| 165 |
+
values = np.array(values, copy=copy)
|
| 166 |
+
inferred_type = None
|
| 167 |
+
if values.dtype == object or is_string_dtype(values.dtype):
|
| 168 |
+
inferred_type = lib.infer_dtype(values, skipna=True)
|
| 169 |
+
if inferred_type == "boolean" and dtype is None:
|
| 170 |
+
name = dtype_cls.__name__.strip("_")
|
| 171 |
+
raise TypeError(f"{values.dtype} cannot be converted to {name}")
|
| 172 |
+
|
| 173 |
+
elif values.dtype.kind == "b" and checker(dtype):
|
| 174 |
+
if not copy:
|
| 175 |
+
values = np.asarray(values, dtype=default_dtype)
|
| 176 |
+
else:
|
| 177 |
+
values = np.array(values, dtype=default_dtype, copy=copy)
|
| 178 |
+
|
| 179 |
+
elif values.dtype.kind not in "iuf":
|
| 180 |
+
name = dtype_cls.__name__.strip("_")
|
| 181 |
+
raise TypeError(f"{values.dtype} cannot be converted to {name}")
|
| 182 |
+
|
| 183 |
+
if values.ndim != 1:
|
| 184 |
+
raise TypeError("values must be a 1D list-like")
|
| 185 |
+
|
| 186 |
+
if mask is None:
|
| 187 |
+
if values.dtype.kind in "iu":
|
| 188 |
+
# fastpath
|
| 189 |
+
mask = np.zeros(len(values), dtype=np.bool_)
|
| 190 |
+
else:
|
| 191 |
+
mask = libmissing.is_numeric_na(values)
|
| 192 |
+
else:
|
| 193 |
+
assert len(mask) == len(values)
|
| 194 |
+
|
| 195 |
+
if mask.ndim != 1:
|
| 196 |
+
raise TypeError("mask must be a 1D list-like")
|
| 197 |
+
|
| 198 |
+
# infer dtype if needed
|
| 199 |
+
if dtype is None:
|
| 200 |
+
dtype = default_dtype
|
| 201 |
+
else:
|
| 202 |
+
dtype = dtype.numpy_dtype
|
| 203 |
+
|
| 204 |
+
if is_integer_dtype(dtype) and values.dtype.kind == "f" and len(values) > 0:
|
| 205 |
+
if mask.all():
|
| 206 |
+
values = np.ones(values.shape, dtype=dtype)
|
| 207 |
+
else:
|
| 208 |
+
idx = np.nanargmax(values)
|
| 209 |
+
if int(values[idx]) != original[idx]:
|
| 210 |
+
# We have ints that lost precision during the cast.
|
| 211 |
+
inferred_type = lib.infer_dtype(original, skipna=True)
|
| 212 |
+
if (
|
| 213 |
+
inferred_type not in ["floating", "mixed-integer-float"]
|
| 214 |
+
and not mask.any()
|
| 215 |
+
):
|
| 216 |
+
values = np.asarray(original, dtype=dtype)
|
| 217 |
+
else:
|
| 218 |
+
values = np.asarray(original, dtype="object")
|
| 219 |
+
|
| 220 |
+
# we copy as need to coerce here
|
| 221 |
+
if mask.any():
|
| 222 |
+
values = values.copy()
|
| 223 |
+
values[mask] = cls._internal_fill_value
|
| 224 |
+
if inferred_type in ("string", "unicode"):
|
| 225 |
+
# casts from str are always safe since they raise
|
| 226 |
+
# a ValueError if the str cannot be parsed into a float
|
| 227 |
+
values = values.astype(dtype, copy=copy)
|
| 228 |
+
else:
|
| 229 |
+
values = dtype_cls._safe_cast(values, dtype, copy=False)
|
| 230 |
+
|
| 231 |
+
return values, mask, dtype, inferred_type
|
| 232 |
+
|
| 233 |
+
|
| 234 |
+
class NumericArray(BaseMaskedArray):
|
| 235 |
+
"""
|
| 236 |
+
Base class for IntegerArray and FloatingArray.
|
| 237 |
+
"""
|
| 238 |
+
|
| 239 |
+
_dtype_cls: type[NumericDtype]
|
| 240 |
+
|
| 241 |
+
def __init__(
|
| 242 |
+
self, values: np.ndarray, mask: npt.NDArray[np.bool_], copy: bool = False
|
| 243 |
+
) -> None:
|
| 244 |
+
checker = self._dtype_cls._checker
|
| 245 |
+
if not (isinstance(values, np.ndarray) and checker(values.dtype)):
|
| 246 |
+
descr = (
|
| 247 |
+
"floating"
|
| 248 |
+
if self._dtype_cls.kind == "f" # type: ignore[comparison-overlap]
|
| 249 |
+
else "integer"
|
| 250 |
+
)
|
| 251 |
+
raise TypeError(
|
| 252 |
+
f"values should be {descr} numpy array. Use "
|
| 253 |
+
"the 'pd.array' function instead"
|
| 254 |
+
)
|
| 255 |
+
if values.dtype == np.float16:
|
| 256 |
+
# If we don't raise here, then accessing self.dtype would raise
|
| 257 |
+
raise TypeError("FloatingArray does not support np.float16 dtype.")
|
| 258 |
+
|
| 259 |
+
super().__init__(values, mask, copy=copy)
|
| 260 |
+
|
| 261 |
+
@cache_readonly
|
| 262 |
+
def dtype(self) -> NumericDtype:
|
| 263 |
+
mapping = self._dtype_cls._get_dtype_mapping()
|
| 264 |
+
return mapping[self._data.dtype]
|
| 265 |
+
|
| 266 |
+
@classmethod
|
| 267 |
+
def _coerce_to_array(
|
| 268 |
+
cls, value, *, dtype: DtypeObj, copy: bool = False
|
| 269 |
+
) -> tuple[np.ndarray, np.ndarray]:
|
| 270 |
+
dtype_cls = cls._dtype_cls
|
| 271 |
+
default_dtype = dtype_cls._default_np_dtype
|
| 272 |
+
values, mask, _, _ = _coerce_to_data_and_mask(
|
| 273 |
+
value, dtype, copy, dtype_cls, default_dtype
|
| 274 |
+
)
|
| 275 |
+
return values, mask
|
| 276 |
+
|
| 277 |
+
@classmethod
|
| 278 |
+
def _from_sequence_of_strings(
|
| 279 |
+
cls, strings, *, dtype: Dtype | None = None, copy: bool = False
|
| 280 |
+
) -> Self:
|
| 281 |
+
from pandas.core.tools.numeric import to_numeric
|
| 282 |
+
|
| 283 |
+
scalars = to_numeric(strings, errors="raise", dtype_backend="numpy_nullable")
|
| 284 |
+
return cls._from_sequence(scalars, dtype=dtype, copy=copy)
|
| 285 |
+
|
| 286 |
+
_HANDLED_TYPES = (np.ndarray, numbers.Number)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/arrays/numpy_.py
ADDED
|
@@ -0,0 +1,563 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from typing import (
|
| 4 |
+
TYPE_CHECKING,
|
| 5 |
+
Literal,
|
| 6 |
+
)
|
| 7 |
+
|
| 8 |
+
import numpy as np
|
| 9 |
+
|
| 10 |
+
from pandas._libs import lib
|
| 11 |
+
from pandas._libs.tslibs import is_supported_dtype
|
| 12 |
+
from pandas.compat.numpy import function as nv
|
| 13 |
+
|
| 14 |
+
from pandas.core.dtypes.astype import astype_array
|
| 15 |
+
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
|
| 16 |
+
from pandas.core.dtypes.common import pandas_dtype
|
| 17 |
+
from pandas.core.dtypes.dtypes import NumpyEADtype
|
| 18 |
+
from pandas.core.dtypes.missing import isna
|
| 19 |
+
|
| 20 |
+
from pandas.core import (
|
| 21 |
+
arraylike,
|
| 22 |
+
missing,
|
| 23 |
+
nanops,
|
| 24 |
+
ops,
|
| 25 |
+
)
|
| 26 |
+
from pandas.core.arraylike import OpsMixin
|
| 27 |
+
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
|
| 28 |
+
from pandas.core.construction import ensure_wrapped_if_datetimelike
|
| 29 |
+
from pandas.core.strings.object_array import ObjectStringArrayMixin
|
| 30 |
+
|
| 31 |
+
if TYPE_CHECKING:
|
| 32 |
+
from pandas._typing import (
|
| 33 |
+
AxisInt,
|
| 34 |
+
Dtype,
|
| 35 |
+
FillnaOptions,
|
| 36 |
+
InterpolateOptions,
|
| 37 |
+
NpDtype,
|
| 38 |
+
Scalar,
|
| 39 |
+
Self,
|
| 40 |
+
npt,
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
from pandas import Index
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
# error: Definition of "_concat_same_type" in base class "NDArrayBacked" is
|
| 47 |
+
# incompatible with definition in base class "ExtensionArray"
|
| 48 |
+
class NumpyExtensionArray( # type: ignore[misc]
|
| 49 |
+
OpsMixin,
|
| 50 |
+
NDArrayBackedExtensionArray,
|
| 51 |
+
ObjectStringArrayMixin,
|
| 52 |
+
):
|
| 53 |
+
"""
|
| 54 |
+
A pandas ExtensionArray for NumPy data.
|
| 55 |
+
|
| 56 |
+
This is mostly for internal compatibility, and is not especially
|
| 57 |
+
useful on its own.
|
| 58 |
+
|
| 59 |
+
Parameters
|
| 60 |
+
----------
|
| 61 |
+
values : ndarray
|
| 62 |
+
The NumPy ndarray to wrap. Must be 1-dimensional.
|
| 63 |
+
copy : bool, default False
|
| 64 |
+
Whether to copy `values`.
|
| 65 |
+
|
| 66 |
+
Attributes
|
| 67 |
+
----------
|
| 68 |
+
None
|
| 69 |
+
|
| 70 |
+
Methods
|
| 71 |
+
-------
|
| 72 |
+
None
|
| 73 |
+
|
| 74 |
+
Examples
|
| 75 |
+
--------
|
| 76 |
+
>>> pd.arrays.NumpyExtensionArray(np.array([0, 1, 2, 3]))
|
| 77 |
+
<NumpyExtensionArray>
|
| 78 |
+
[0, 1, 2, 3]
|
| 79 |
+
Length: 4, dtype: int64
|
| 80 |
+
"""
|
| 81 |
+
|
| 82 |
+
# If you're wondering why pd.Series(cls) doesn't put the array in an
|
| 83 |
+
# ExtensionBlock, search for `ABCNumpyExtensionArray`. We check for
|
| 84 |
+
# that _typ to ensure that users don't unnecessarily use EAs inside
|
| 85 |
+
# pandas internals, which turns off things like block consolidation.
|
| 86 |
+
_typ = "npy_extension"
|
| 87 |
+
__array_priority__ = 1000
|
| 88 |
+
_ndarray: np.ndarray
|
| 89 |
+
_dtype: NumpyEADtype
|
| 90 |
+
_internal_fill_value = np.nan
|
| 91 |
+
|
| 92 |
+
# ------------------------------------------------------------------------
|
| 93 |
+
# Constructors
|
| 94 |
+
|
| 95 |
+
def __init__(
|
| 96 |
+
self, values: np.ndarray | NumpyExtensionArray, copy: bool = False
|
| 97 |
+
) -> None:
|
| 98 |
+
if isinstance(values, type(self)):
|
| 99 |
+
values = values._ndarray
|
| 100 |
+
if not isinstance(values, np.ndarray):
|
| 101 |
+
raise ValueError(
|
| 102 |
+
f"'values' must be a NumPy array, not {type(values).__name__}"
|
| 103 |
+
)
|
| 104 |
+
|
| 105 |
+
if values.ndim == 0:
|
| 106 |
+
# Technically we support 2, but do not advertise that fact.
|
| 107 |
+
raise ValueError("NumpyExtensionArray must be 1-dimensional.")
|
| 108 |
+
|
| 109 |
+
if copy:
|
| 110 |
+
values = values.copy()
|
| 111 |
+
|
| 112 |
+
dtype = NumpyEADtype(values.dtype)
|
| 113 |
+
super().__init__(values, dtype)
|
| 114 |
+
|
| 115 |
+
@classmethod
|
| 116 |
+
def _from_sequence(
|
| 117 |
+
cls, scalars, *, dtype: Dtype | None = None, copy: bool = False
|
| 118 |
+
) -> NumpyExtensionArray:
|
| 119 |
+
if isinstance(dtype, NumpyEADtype):
|
| 120 |
+
dtype = dtype._dtype
|
| 121 |
+
|
| 122 |
+
# error: Argument "dtype" to "asarray" has incompatible type
|
| 123 |
+
# "Union[ExtensionDtype, str, dtype[Any], dtype[floating[_64Bit]], Type[object],
|
| 124 |
+
# None]"; expected "Union[dtype[Any], None, type, _SupportsDType, str,
|
| 125 |
+
# Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any],
|
| 126 |
+
# _DTypeDict, Tuple[Any, Any]]]"
|
| 127 |
+
result = np.asarray(scalars, dtype=dtype) # type: ignore[arg-type]
|
| 128 |
+
if (
|
| 129 |
+
result.ndim > 1
|
| 130 |
+
and not hasattr(scalars, "dtype")
|
| 131 |
+
and (dtype is None or dtype == object)
|
| 132 |
+
):
|
| 133 |
+
# e.g. list-of-tuples
|
| 134 |
+
result = construct_1d_object_array_from_listlike(scalars)
|
| 135 |
+
|
| 136 |
+
if copy and result is scalars:
|
| 137 |
+
result = result.copy()
|
| 138 |
+
return cls(result)
|
| 139 |
+
|
| 140 |
+
def _from_backing_data(self, arr: np.ndarray) -> NumpyExtensionArray:
|
| 141 |
+
return type(self)(arr)
|
| 142 |
+
|
| 143 |
+
# ------------------------------------------------------------------------
|
| 144 |
+
# Data
|
| 145 |
+
|
| 146 |
+
@property
|
| 147 |
+
def dtype(self) -> NumpyEADtype:
|
| 148 |
+
return self._dtype
|
| 149 |
+
|
| 150 |
+
# ------------------------------------------------------------------------
|
| 151 |
+
# NumPy Array Interface
|
| 152 |
+
|
| 153 |
+
def __array__(
|
| 154 |
+
self, dtype: NpDtype | None = None, copy: bool | None = None
|
| 155 |
+
) -> np.ndarray:
|
| 156 |
+
return np.asarray(self._ndarray, dtype=dtype)
|
| 157 |
+
|
| 158 |
+
def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
|
| 159 |
+
# Lightly modified version of
|
| 160 |
+
# https://numpy.org/doc/stable/reference/generated/numpy.lib.mixins.NDArrayOperatorsMixin.html
|
| 161 |
+
# The primary modification is not boxing scalar return values
|
| 162 |
+
# in NumpyExtensionArray, since pandas' ExtensionArrays are 1-d.
|
| 163 |
+
out = kwargs.get("out", ())
|
| 164 |
+
|
| 165 |
+
result = arraylike.maybe_dispatch_ufunc_to_dunder_op(
|
| 166 |
+
self, ufunc, method, *inputs, **kwargs
|
| 167 |
+
)
|
| 168 |
+
if result is not NotImplemented:
|
| 169 |
+
return result
|
| 170 |
+
|
| 171 |
+
if "out" in kwargs:
|
| 172 |
+
# e.g. test_ufunc_unary
|
| 173 |
+
return arraylike.dispatch_ufunc_with_out(
|
| 174 |
+
self, ufunc, method, *inputs, **kwargs
|
| 175 |
+
)
|
| 176 |
+
|
| 177 |
+
if method == "reduce":
|
| 178 |
+
result = arraylike.dispatch_reduction_ufunc(
|
| 179 |
+
self, ufunc, method, *inputs, **kwargs
|
| 180 |
+
)
|
| 181 |
+
if result is not NotImplemented:
|
| 182 |
+
# e.g. tests.series.test_ufunc.TestNumpyReductions
|
| 183 |
+
return result
|
| 184 |
+
|
| 185 |
+
# Defer to the implementation of the ufunc on unwrapped values.
|
| 186 |
+
inputs = tuple(
|
| 187 |
+
x._ndarray if isinstance(x, NumpyExtensionArray) else x for x in inputs
|
| 188 |
+
)
|
| 189 |
+
if out:
|
| 190 |
+
kwargs["out"] = tuple(
|
| 191 |
+
x._ndarray if isinstance(x, NumpyExtensionArray) else x for x in out
|
| 192 |
+
)
|
| 193 |
+
result = getattr(ufunc, method)(*inputs, **kwargs)
|
| 194 |
+
|
| 195 |
+
if ufunc.nout > 1:
|
| 196 |
+
# multiple return values; re-box array-like results
|
| 197 |
+
return tuple(type(self)(x) for x in result)
|
| 198 |
+
elif method == "at":
|
| 199 |
+
# no return value
|
| 200 |
+
return None
|
| 201 |
+
elif method == "reduce":
|
| 202 |
+
if isinstance(result, np.ndarray):
|
| 203 |
+
# e.g. test_np_reduce_2d
|
| 204 |
+
return type(self)(result)
|
| 205 |
+
|
| 206 |
+
# e.g. test_np_max_nested_tuples
|
| 207 |
+
return result
|
| 208 |
+
else:
|
| 209 |
+
# one return value; re-box array-like results
|
| 210 |
+
return type(self)(result)
|
| 211 |
+
|
| 212 |
+
# ------------------------------------------------------------------------
|
| 213 |
+
# Pandas ExtensionArray Interface
|
| 214 |
+
|
| 215 |
+
def astype(self, dtype, copy: bool = True):
|
| 216 |
+
dtype = pandas_dtype(dtype)
|
| 217 |
+
|
| 218 |
+
if dtype == self.dtype:
|
| 219 |
+
if copy:
|
| 220 |
+
return self.copy()
|
| 221 |
+
return self
|
| 222 |
+
|
| 223 |
+
result = astype_array(self._ndarray, dtype=dtype, copy=copy)
|
| 224 |
+
return result
|
| 225 |
+
|
| 226 |
+
def isna(self) -> np.ndarray:
|
| 227 |
+
return isna(self._ndarray)
|
| 228 |
+
|
| 229 |
+
def _validate_scalar(self, fill_value):
|
| 230 |
+
if fill_value is None:
|
| 231 |
+
# Primarily for subclasses
|
| 232 |
+
fill_value = self.dtype.na_value
|
| 233 |
+
return fill_value
|
| 234 |
+
|
| 235 |
+
def _values_for_factorize(self) -> tuple[np.ndarray, float | None]:
|
| 236 |
+
if self.dtype.kind in "iub":
|
| 237 |
+
fv = None
|
| 238 |
+
else:
|
| 239 |
+
fv = np.nan
|
| 240 |
+
return self._ndarray, fv
|
| 241 |
+
|
| 242 |
+
# Base EA class (and all other EA classes) don't have limit_area keyword
|
| 243 |
+
# This can be removed here as well when the interpolate ffill/bfill method
|
| 244 |
+
# deprecation is enforced
|
| 245 |
+
def _pad_or_backfill(
|
| 246 |
+
self,
|
| 247 |
+
*,
|
| 248 |
+
method: FillnaOptions,
|
| 249 |
+
limit: int | None = None,
|
| 250 |
+
limit_area: Literal["inside", "outside"] | None = None,
|
| 251 |
+
copy: bool = True,
|
| 252 |
+
) -> Self:
|
| 253 |
+
"""
|
| 254 |
+
ffill or bfill along axis=0.
|
| 255 |
+
"""
|
| 256 |
+
if copy:
|
| 257 |
+
out_data = self._ndarray.copy()
|
| 258 |
+
else:
|
| 259 |
+
out_data = self._ndarray
|
| 260 |
+
|
| 261 |
+
meth = missing.clean_fill_method(method)
|
| 262 |
+
missing.pad_or_backfill_inplace(
|
| 263 |
+
out_data.T,
|
| 264 |
+
method=meth,
|
| 265 |
+
axis=0,
|
| 266 |
+
limit=limit,
|
| 267 |
+
limit_area=limit_area,
|
| 268 |
+
)
|
| 269 |
+
|
| 270 |
+
if not copy:
|
| 271 |
+
return self
|
| 272 |
+
return type(self)._simple_new(out_data, dtype=self.dtype)
|
| 273 |
+
|
| 274 |
+
def interpolate(
|
| 275 |
+
self,
|
| 276 |
+
*,
|
| 277 |
+
method: InterpolateOptions,
|
| 278 |
+
axis: int,
|
| 279 |
+
index: Index,
|
| 280 |
+
limit,
|
| 281 |
+
limit_direction,
|
| 282 |
+
limit_area,
|
| 283 |
+
copy: bool,
|
| 284 |
+
**kwargs,
|
| 285 |
+
) -> Self:
|
| 286 |
+
"""
|
| 287 |
+
See NDFrame.interpolate.__doc__.
|
| 288 |
+
"""
|
| 289 |
+
# NB: we return type(self) even if copy=False
|
| 290 |
+
if not copy:
|
| 291 |
+
out_data = self._ndarray
|
| 292 |
+
else:
|
| 293 |
+
out_data = self._ndarray.copy()
|
| 294 |
+
|
| 295 |
+
# TODO: assert we have floating dtype?
|
| 296 |
+
missing.interpolate_2d_inplace(
|
| 297 |
+
out_data,
|
| 298 |
+
method=method,
|
| 299 |
+
axis=axis,
|
| 300 |
+
index=index,
|
| 301 |
+
limit=limit,
|
| 302 |
+
limit_direction=limit_direction,
|
| 303 |
+
limit_area=limit_area,
|
| 304 |
+
**kwargs,
|
| 305 |
+
)
|
| 306 |
+
if not copy:
|
| 307 |
+
return self
|
| 308 |
+
return type(self)._simple_new(out_data, dtype=self.dtype)
|
| 309 |
+
|
| 310 |
+
# ------------------------------------------------------------------------
|
| 311 |
+
# Reductions
|
| 312 |
+
|
| 313 |
+
def any(
|
| 314 |
+
self,
|
| 315 |
+
*,
|
| 316 |
+
axis: AxisInt | None = None,
|
| 317 |
+
out=None,
|
| 318 |
+
keepdims: bool = False,
|
| 319 |
+
skipna: bool = True,
|
| 320 |
+
):
|
| 321 |
+
nv.validate_any((), {"out": out, "keepdims": keepdims})
|
| 322 |
+
result = nanops.nanany(self._ndarray, axis=axis, skipna=skipna)
|
| 323 |
+
return self._wrap_reduction_result(axis, result)
|
| 324 |
+
|
| 325 |
+
def all(
|
| 326 |
+
self,
|
| 327 |
+
*,
|
| 328 |
+
axis: AxisInt | None = None,
|
| 329 |
+
out=None,
|
| 330 |
+
keepdims: bool = False,
|
| 331 |
+
skipna: bool = True,
|
| 332 |
+
):
|
| 333 |
+
nv.validate_all((), {"out": out, "keepdims": keepdims})
|
| 334 |
+
result = nanops.nanall(self._ndarray, axis=axis, skipna=skipna)
|
| 335 |
+
return self._wrap_reduction_result(axis, result)
|
| 336 |
+
|
| 337 |
+
def min(
|
| 338 |
+
self, *, axis: AxisInt | None = None, skipna: bool = True, **kwargs
|
| 339 |
+
) -> Scalar:
|
| 340 |
+
nv.validate_min((), kwargs)
|
| 341 |
+
result = nanops.nanmin(
|
| 342 |
+
values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna
|
| 343 |
+
)
|
| 344 |
+
return self._wrap_reduction_result(axis, result)
|
| 345 |
+
|
| 346 |
+
def max(
|
| 347 |
+
self, *, axis: AxisInt | None = None, skipna: bool = True, **kwargs
|
| 348 |
+
) -> Scalar:
|
| 349 |
+
nv.validate_max((), kwargs)
|
| 350 |
+
result = nanops.nanmax(
|
| 351 |
+
values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna
|
| 352 |
+
)
|
| 353 |
+
return self._wrap_reduction_result(axis, result)
|
| 354 |
+
|
| 355 |
+
def sum(
|
| 356 |
+
self,
|
| 357 |
+
*,
|
| 358 |
+
axis: AxisInt | None = None,
|
| 359 |
+
skipna: bool = True,
|
| 360 |
+
min_count: int = 0,
|
| 361 |
+
**kwargs,
|
| 362 |
+
) -> Scalar:
|
| 363 |
+
nv.validate_sum((), kwargs)
|
| 364 |
+
result = nanops.nansum(
|
| 365 |
+
self._ndarray, axis=axis, skipna=skipna, min_count=min_count
|
| 366 |
+
)
|
| 367 |
+
return self._wrap_reduction_result(axis, result)
|
| 368 |
+
|
| 369 |
+
def prod(
|
| 370 |
+
self,
|
| 371 |
+
*,
|
| 372 |
+
axis: AxisInt | None = None,
|
| 373 |
+
skipna: bool = True,
|
| 374 |
+
min_count: int = 0,
|
| 375 |
+
**kwargs,
|
| 376 |
+
) -> Scalar:
|
| 377 |
+
nv.validate_prod((), kwargs)
|
| 378 |
+
result = nanops.nanprod(
|
| 379 |
+
self._ndarray, axis=axis, skipna=skipna, min_count=min_count
|
| 380 |
+
)
|
| 381 |
+
return self._wrap_reduction_result(axis, result)
|
| 382 |
+
|
| 383 |
+
def mean(
|
| 384 |
+
self,
|
| 385 |
+
*,
|
| 386 |
+
axis: AxisInt | None = None,
|
| 387 |
+
dtype: NpDtype | None = None,
|
| 388 |
+
out=None,
|
| 389 |
+
keepdims: bool = False,
|
| 390 |
+
skipna: bool = True,
|
| 391 |
+
):
|
| 392 |
+
nv.validate_mean((), {"dtype": dtype, "out": out, "keepdims": keepdims})
|
| 393 |
+
result = nanops.nanmean(self._ndarray, axis=axis, skipna=skipna)
|
| 394 |
+
return self._wrap_reduction_result(axis, result)
|
| 395 |
+
|
| 396 |
+
def median(
|
| 397 |
+
self,
|
| 398 |
+
*,
|
| 399 |
+
axis: AxisInt | None = None,
|
| 400 |
+
out=None,
|
| 401 |
+
overwrite_input: bool = False,
|
| 402 |
+
keepdims: bool = False,
|
| 403 |
+
skipna: bool = True,
|
| 404 |
+
):
|
| 405 |
+
nv.validate_median(
|
| 406 |
+
(), {"out": out, "overwrite_input": overwrite_input, "keepdims": keepdims}
|
| 407 |
+
)
|
| 408 |
+
result = nanops.nanmedian(self._ndarray, axis=axis, skipna=skipna)
|
| 409 |
+
return self._wrap_reduction_result(axis, result)
|
| 410 |
+
|
| 411 |
+
def std(
|
| 412 |
+
self,
|
| 413 |
+
*,
|
| 414 |
+
axis: AxisInt | None = None,
|
| 415 |
+
dtype: NpDtype | None = None,
|
| 416 |
+
out=None,
|
| 417 |
+
ddof: int = 1,
|
| 418 |
+
keepdims: bool = False,
|
| 419 |
+
skipna: bool = True,
|
| 420 |
+
):
|
| 421 |
+
nv.validate_stat_ddof_func(
|
| 422 |
+
(), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="std"
|
| 423 |
+
)
|
| 424 |
+
result = nanops.nanstd(self._ndarray, axis=axis, skipna=skipna, ddof=ddof)
|
| 425 |
+
return self._wrap_reduction_result(axis, result)
|
| 426 |
+
|
| 427 |
+
def var(
|
| 428 |
+
self,
|
| 429 |
+
*,
|
| 430 |
+
axis: AxisInt | None = None,
|
| 431 |
+
dtype: NpDtype | None = None,
|
| 432 |
+
out=None,
|
| 433 |
+
ddof: int = 1,
|
| 434 |
+
keepdims: bool = False,
|
| 435 |
+
skipna: bool = True,
|
| 436 |
+
):
|
| 437 |
+
nv.validate_stat_ddof_func(
|
| 438 |
+
(), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="var"
|
| 439 |
+
)
|
| 440 |
+
result = nanops.nanvar(self._ndarray, axis=axis, skipna=skipna, ddof=ddof)
|
| 441 |
+
return self._wrap_reduction_result(axis, result)
|
| 442 |
+
|
| 443 |
+
def sem(
|
| 444 |
+
self,
|
| 445 |
+
*,
|
| 446 |
+
axis: AxisInt | None = None,
|
| 447 |
+
dtype: NpDtype | None = None,
|
| 448 |
+
out=None,
|
| 449 |
+
ddof: int = 1,
|
| 450 |
+
keepdims: bool = False,
|
| 451 |
+
skipna: bool = True,
|
| 452 |
+
):
|
| 453 |
+
nv.validate_stat_ddof_func(
|
| 454 |
+
(), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="sem"
|
| 455 |
+
)
|
| 456 |
+
result = nanops.nansem(self._ndarray, axis=axis, skipna=skipna, ddof=ddof)
|
| 457 |
+
return self._wrap_reduction_result(axis, result)
|
| 458 |
+
|
| 459 |
+
def kurt(
|
| 460 |
+
self,
|
| 461 |
+
*,
|
| 462 |
+
axis: AxisInt | None = None,
|
| 463 |
+
dtype: NpDtype | None = None,
|
| 464 |
+
out=None,
|
| 465 |
+
keepdims: bool = False,
|
| 466 |
+
skipna: bool = True,
|
| 467 |
+
):
|
| 468 |
+
nv.validate_stat_ddof_func(
|
| 469 |
+
(), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="kurt"
|
| 470 |
+
)
|
| 471 |
+
result = nanops.nankurt(self._ndarray, axis=axis, skipna=skipna)
|
| 472 |
+
return self._wrap_reduction_result(axis, result)
|
| 473 |
+
|
| 474 |
+
def skew(
|
| 475 |
+
self,
|
| 476 |
+
*,
|
| 477 |
+
axis: AxisInt | None = None,
|
| 478 |
+
dtype: NpDtype | None = None,
|
| 479 |
+
out=None,
|
| 480 |
+
keepdims: bool = False,
|
| 481 |
+
skipna: bool = True,
|
| 482 |
+
):
|
| 483 |
+
nv.validate_stat_ddof_func(
|
| 484 |
+
(), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="skew"
|
| 485 |
+
)
|
| 486 |
+
result = nanops.nanskew(self._ndarray, axis=axis, skipna=skipna)
|
| 487 |
+
return self._wrap_reduction_result(axis, result)
|
| 488 |
+
|
| 489 |
+
# ------------------------------------------------------------------------
|
| 490 |
+
# Additional Methods
|
| 491 |
+
|
| 492 |
+
def to_numpy(
|
| 493 |
+
self,
|
| 494 |
+
dtype: npt.DTypeLike | None = None,
|
| 495 |
+
copy: bool = False,
|
| 496 |
+
na_value: object = lib.no_default,
|
| 497 |
+
) -> np.ndarray:
|
| 498 |
+
mask = self.isna()
|
| 499 |
+
if na_value is not lib.no_default and mask.any():
|
| 500 |
+
result = self._ndarray.copy()
|
| 501 |
+
result[mask] = na_value
|
| 502 |
+
else:
|
| 503 |
+
result = self._ndarray
|
| 504 |
+
|
| 505 |
+
result = np.asarray(result, dtype=dtype)
|
| 506 |
+
|
| 507 |
+
if copy and result is self._ndarray:
|
| 508 |
+
result = result.copy()
|
| 509 |
+
|
| 510 |
+
return result
|
| 511 |
+
|
| 512 |
+
# ------------------------------------------------------------------------
|
| 513 |
+
# Ops
|
| 514 |
+
|
| 515 |
+
def __invert__(self) -> NumpyExtensionArray:
|
| 516 |
+
return type(self)(~self._ndarray)
|
| 517 |
+
|
| 518 |
+
def __neg__(self) -> NumpyExtensionArray:
|
| 519 |
+
return type(self)(-self._ndarray)
|
| 520 |
+
|
| 521 |
+
def __pos__(self) -> NumpyExtensionArray:
|
| 522 |
+
return type(self)(+self._ndarray)
|
| 523 |
+
|
| 524 |
+
def __abs__(self) -> NumpyExtensionArray:
|
| 525 |
+
return type(self)(abs(self._ndarray))
|
| 526 |
+
|
| 527 |
+
def _cmp_method(self, other, op):
|
| 528 |
+
if isinstance(other, NumpyExtensionArray):
|
| 529 |
+
other = other._ndarray
|
| 530 |
+
|
| 531 |
+
other = ops.maybe_prepare_scalar_for_op(other, (len(self),))
|
| 532 |
+
pd_op = ops.get_array_op(op)
|
| 533 |
+
other = ensure_wrapped_if_datetimelike(other)
|
| 534 |
+
result = pd_op(self._ndarray, other)
|
| 535 |
+
|
| 536 |
+
if op is divmod or op is ops.rdivmod:
|
| 537 |
+
a, b = result
|
| 538 |
+
if isinstance(a, np.ndarray):
|
| 539 |
+
# for e.g. op vs TimedeltaArray, we may already
|
| 540 |
+
# have an ExtensionArray, in which case we do not wrap
|
| 541 |
+
return self._wrap_ndarray_result(a), self._wrap_ndarray_result(b)
|
| 542 |
+
return a, b
|
| 543 |
+
|
| 544 |
+
if isinstance(result, np.ndarray):
|
| 545 |
+
# for e.g. multiplication vs TimedeltaArray, we may already
|
| 546 |
+
# have an ExtensionArray, in which case we do not wrap
|
| 547 |
+
return self._wrap_ndarray_result(result)
|
| 548 |
+
return result
|
| 549 |
+
|
| 550 |
+
_arith_method = _cmp_method
|
| 551 |
+
|
| 552 |
+
def _wrap_ndarray_result(self, result: np.ndarray):
|
| 553 |
+
# If we have timedelta64[ns] result, return a TimedeltaArray instead
|
| 554 |
+
# of a NumpyExtensionArray
|
| 555 |
+
if result.dtype.kind == "m" and is_supported_dtype(result.dtype):
|
| 556 |
+
from pandas.core.arrays import TimedeltaArray
|
| 557 |
+
|
| 558 |
+
return TimedeltaArray._simple_new(result, dtype=result.dtype)
|
| 559 |
+
return type(self)(result)
|
| 560 |
+
|
| 561 |
+
# ------------------------------------------------------------------------
|
| 562 |
+
# String methods interface
|
| 563 |
+
_str_na_value = np.nan
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/arrays/period.py
ADDED
|
@@ -0,0 +1,1313 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from datetime import timedelta
|
| 4 |
+
import operator
|
| 5 |
+
from typing import (
|
| 6 |
+
TYPE_CHECKING,
|
| 7 |
+
Any,
|
| 8 |
+
Callable,
|
| 9 |
+
Literal,
|
| 10 |
+
TypeVar,
|
| 11 |
+
cast,
|
| 12 |
+
overload,
|
| 13 |
+
)
|
| 14 |
+
import warnings
|
| 15 |
+
|
| 16 |
+
import numpy as np
|
| 17 |
+
|
| 18 |
+
from pandas._libs import (
|
| 19 |
+
algos as libalgos,
|
| 20 |
+
lib,
|
| 21 |
+
)
|
| 22 |
+
from pandas._libs.arrays import NDArrayBacked
|
| 23 |
+
from pandas._libs.tslibs import (
|
| 24 |
+
BaseOffset,
|
| 25 |
+
NaT,
|
| 26 |
+
NaTType,
|
| 27 |
+
Timedelta,
|
| 28 |
+
add_overflowsafe,
|
| 29 |
+
astype_overflowsafe,
|
| 30 |
+
dt64arr_to_periodarr as c_dt64arr_to_periodarr,
|
| 31 |
+
get_unit_from_dtype,
|
| 32 |
+
iNaT,
|
| 33 |
+
parsing,
|
| 34 |
+
period as libperiod,
|
| 35 |
+
to_offset,
|
| 36 |
+
)
|
| 37 |
+
from pandas._libs.tslibs.dtypes import (
|
| 38 |
+
FreqGroup,
|
| 39 |
+
PeriodDtypeBase,
|
| 40 |
+
freq_to_period_freqstr,
|
| 41 |
+
)
|
| 42 |
+
from pandas._libs.tslibs.fields import isleapyear_arr
|
| 43 |
+
from pandas._libs.tslibs.offsets import (
|
| 44 |
+
Tick,
|
| 45 |
+
delta_to_tick,
|
| 46 |
+
)
|
| 47 |
+
from pandas._libs.tslibs.period import (
|
| 48 |
+
DIFFERENT_FREQ,
|
| 49 |
+
IncompatibleFrequency,
|
| 50 |
+
Period,
|
| 51 |
+
get_period_field_arr,
|
| 52 |
+
period_asfreq_arr,
|
| 53 |
+
)
|
| 54 |
+
from pandas.util._decorators import (
|
| 55 |
+
cache_readonly,
|
| 56 |
+
doc,
|
| 57 |
+
)
|
| 58 |
+
from pandas.util._exceptions import find_stack_level
|
| 59 |
+
|
| 60 |
+
from pandas.core.dtypes.common import (
|
| 61 |
+
ensure_object,
|
| 62 |
+
pandas_dtype,
|
| 63 |
+
)
|
| 64 |
+
from pandas.core.dtypes.dtypes import (
|
| 65 |
+
DatetimeTZDtype,
|
| 66 |
+
PeriodDtype,
|
| 67 |
+
)
|
| 68 |
+
from pandas.core.dtypes.generic import (
|
| 69 |
+
ABCIndex,
|
| 70 |
+
ABCPeriodIndex,
|
| 71 |
+
ABCSeries,
|
| 72 |
+
ABCTimedeltaArray,
|
| 73 |
+
)
|
| 74 |
+
from pandas.core.dtypes.missing import isna
|
| 75 |
+
|
| 76 |
+
from pandas.core.arrays import datetimelike as dtl
|
| 77 |
+
import pandas.core.common as com
|
| 78 |
+
|
| 79 |
+
if TYPE_CHECKING:
|
| 80 |
+
from collections.abc import Sequence
|
| 81 |
+
|
| 82 |
+
from pandas._typing import (
|
| 83 |
+
AnyArrayLike,
|
| 84 |
+
Dtype,
|
| 85 |
+
FillnaOptions,
|
| 86 |
+
NpDtype,
|
| 87 |
+
NumpySorter,
|
| 88 |
+
NumpyValueArrayLike,
|
| 89 |
+
Self,
|
| 90 |
+
npt,
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
from pandas.core.arrays import (
|
| 94 |
+
DatetimeArray,
|
| 95 |
+
TimedeltaArray,
|
| 96 |
+
)
|
| 97 |
+
from pandas.core.arrays.base import ExtensionArray
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
BaseOffsetT = TypeVar("BaseOffsetT", bound=BaseOffset)
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
_shared_doc_kwargs = {
|
| 104 |
+
"klass": "PeriodArray",
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
def _field_accessor(name: str, docstring: str | None = None):
|
| 109 |
+
def f(self):
|
| 110 |
+
base = self.dtype._dtype_code
|
| 111 |
+
result = get_period_field_arr(name, self.asi8, base)
|
| 112 |
+
return result
|
| 113 |
+
|
| 114 |
+
f.__name__ = name
|
| 115 |
+
f.__doc__ = docstring
|
| 116 |
+
return property(f)
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
# error: Definition of "_concat_same_type" in base class "NDArrayBacked" is
|
| 120 |
+
# incompatible with definition in base class "ExtensionArray"
|
| 121 |
+
class PeriodArray(dtl.DatelikeOps, libperiod.PeriodMixin): # type: ignore[misc]
|
| 122 |
+
"""
|
| 123 |
+
Pandas ExtensionArray for storing Period data.
|
| 124 |
+
|
| 125 |
+
Users should use :func:`~pandas.array` to create new instances.
|
| 126 |
+
|
| 127 |
+
Parameters
|
| 128 |
+
----------
|
| 129 |
+
values : Union[PeriodArray, Series[period], ndarray[int], PeriodIndex]
|
| 130 |
+
The data to store. These should be arrays that can be directly
|
| 131 |
+
converted to ordinals without inference or copy (PeriodArray,
|
| 132 |
+
ndarray[int64]), or a box around such an array (Series[period],
|
| 133 |
+
PeriodIndex).
|
| 134 |
+
dtype : PeriodDtype, optional
|
| 135 |
+
A PeriodDtype instance from which to extract a `freq`. If both
|
| 136 |
+
`freq` and `dtype` are specified, then the frequencies must match.
|
| 137 |
+
freq : str or DateOffset
|
| 138 |
+
The `freq` to use for the array. Mostly applicable when `values`
|
| 139 |
+
is an ndarray of integers, when `freq` is required. When `values`
|
| 140 |
+
is a PeriodArray (or box around), it's checked that ``values.freq``
|
| 141 |
+
matches `freq`.
|
| 142 |
+
copy : bool, default False
|
| 143 |
+
Whether to copy the ordinals before storing.
|
| 144 |
+
|
| 145 |
+
Attributes
|
| 146 |
+
----------
|
| 147 |
+
None
|
| 148 |
+
|
| 149 |
+
Methods
|
| 150 |
+
-------
|
| 151 |
+
None
|
| 152 |
+
|
| 153 |
+
See Also
|
| 154 |
+
--------
|
| 155 |
+
Period: Represents a period of time.
|
| 156 |
+
PeriodIndex : Immutable Index for period data.
|
| 157 |
+
period_range: Create a fixed-frequency PeriodArray.
|
| 158 |
+
array: Construct a pandas array.
|
| 159 |
+
|
| 160 |
+
Notes
|
| 161 |
+
-----
|
| 162 |
+
There are two components to a PeriodArray
|
| 163 |
+
|
| 164 |
+
- ordinals : integer ndarray
|
| 165 |
+
- freq : pd.tseries.offsets.Offset
|
| 166 |
+
|
| 167 |
+
The values are physically stored as a 1-D ndarray of integers. These are
|
| 168 |
+
called "ordinals" and represent some kind of offset from a base.
|
| 169 |
+
|
| 170 |
+
The `freq` indicates the span covered by each element of the array.
|
| 171 |
+
All elements in the PeriodArray have the same `freq`.
|
| 172 |
+
|
| 173 |
+
Examples
|
| 174 |
+
--------
|
| 175 |
+
>>> pd.arrays.PeriodArray(pd.PeriodIndex(['2023-01-01',
|
| 176 |
+
... '2023-01-02'], freq='D'))
|
| 177 |
+
<PeriodArray>
|
| 178 |
+
['2023-01-01', '2023-01-02']
|
| 179 |
+
Length: 2, dtype: period[D]
|
| 180 |
+
"""
|
| 181 |
+
|
| 182 |
+
# array priority higher than numpy scalars
|
| 183 |
+
__array_priority__ = 1000
|
| 184 |
+
_typ = "periodarray" # ABCPeriodArray
|
| 185 |
+
_internal_fill_value = np.int64(iNaT)
|
| 186 |
+
_recognized_scalars = (Period,)
|
| 187 |
+
_is_recognized_dtype = lambda x: isinstance(
|
| 188 |
+
x, PeriodDtype
|
| 189 |
+
) # check_compatible_with checks freq match
|
| 190 |
+
_infer_matches = ("period",)
|
| 191 |
+
|
| 192 |
+
@property
|
| 193 |
+
def _scalar_type(self) -> type[Period]:
|
| 194 |
+
return Period
|
| 195 |
+
|
| 196 |
+
# Names others delegate to us
|
| 197 |
+
_other_ops: list[str] = []
|
| 198 |
+
_bool_ops: list[str] = ["is_leap_year"]
|
| 199 |
+
_object_ops: list[str] = ["start_time", "end_time", "freq"]
|
| 200 |
+
_field_ops: list[str] = [
|
| 201 |
+
"year",
|
| 202 |
+
"month",
|
| 203 |
+
"day",
|
| 204 |
+
"hour",
|
| 205 |
+
"minute",
|
| 206 |
+
"second",
|
| 207 |
+
"weekofyear",
|
| 208 |
+
"weekday",
|
| 209 |
+
"week",
|
| 210 |
+
"dayofweek",
|
| 211 |
+
"day_of_week",
|
| 212 |
+
"dayofyear",
|
| 213 |
+
"day_of_year",
|
| 214 |
+
"quarter",
|
| 215 |
+
"qyear",
|
| 216 |
+
"days_in_month",
|
| 217 |
+
"daysinmonth",
|
| 218 |
+
]
|
| 219 |
+
_datetimelike_ops: list[str] = _field_ops + _object_ops + _bool_ops
|
| 220 |
+
_datetimelike_methods: list[str] = ["strftime", "to_timestamp", "asfreq"]
|
| 221 |
+
|
| 222 |
+
_dtype: PeriodDtype
|
| 223 |
+
|
| 224 |
+
# --------------------------------------------------------------------
|
| 225 |
+
# Constructors
|
| 226 |
+
|
| 227 |
+
def __init__(
|
| 228 |
+
self, values, dtype: Dtype | None = None, freq=None, copy: bool = False
|
| 229 |
+
) -> None:
|
| 230 |
+
if freq is not None:
|
| 231 |
+
# GH#52462
|
| 232 |
+
warnings.warn(
|
| 233 |
+
"The 'freq' keyword in the PeriodArray constructor is deprecated "
|
| 234 |
+
"and will be removed in a future version. Pass 'dtype' instead",
|
| 235 |
+
FutureWarning,
|
| 236 |
+
stacklevel=find_stack_level(),
|
| 237 |
+
)
|
| 238 |
+
freq = validate_dtype_freq(dtype, freq)
|
| 239 |
+
dtype = PeriodDtype(freq)
|
| 240 |
+
|
| 241 |
+
if dtype is not None:
|
| 242 |
+
dtype = pandas_dtype(dtype)
|
| 243 |
+
if not isinstance(dtype, PeriodDtype):
|
| 244 |
+
raise ValueError(f"Invalid dtype {dtype} for PeriodArray")
|
| 245 |
+
|
| 246 |
+
if isinstance(values, ABCSeries):
|
| 247 |
+
values = values._values
|
| 248 |
+
if not isinstance(values, type(self)):
|
| 249 |
+
raise TypeError("Incorrect dtype")
|
| 250 |
+
|
| 251 |
+
elif isinstance(values, ABCPeriodIndex):
|
| 252 |
+
values = values._values
|
| 253 |
+
|
| 254 |
+
if isinstance(values, type(self)):
|
| 255 |
+
if dtype is not None and dtype != values.dtype:
|
| 256 |
+
raise raise_on_incompatible(values, dtype.freq)
|
| 257 |
+
values, dtype = values._ndarray, values.dtype
|
| 258 |
+
|
| 259 |
+
if not copy:
|
| 260 |
+
values = np.asarray(values, dtype="int64")
|
| 261 |
+
else:
|
| 262 |
+
values = np.array(values, dtype="int64", copy=copy)
|
| 263 |
+
if dtype is None:
|
| 264 |
+
raise ValueError("dtype is not specified and cannot be inferred")
|
| 265 |
+
dtype = cast(PeriodDtype, dtype)
|
| 266 |
+
NDArrayBacked.__init__(self, values, dtype)
|
| 267 |
+
|
| 268 |
+
# error: Signature of "_simple_new" incompatible with supertype "NDArrayBacked"
|
| 269 |
+
@classmethod
|
| 270 |
+
def _simple_new( # type: ignore[override]
|
| 271 |
+
cls,
|
| 272 |
+
values: npt.NDArray[np.int64],
|
| 273 |
+
dtype: PeriodDtype,
|
| 274 |
+
) -> Self:
|
| 275 |
+
# alias for PeriodArray.__init__
|
| 276 |
+
assertion_msg = "Should be numpy array of type i8"
|
| 277 |
+
assert isinstance(values, np.ndarray) and values.dtype == "i8", assertion_msg
|
| 278 |
+
return cls(values, dtype=dtype)
|
| 279 |
+
|
| 280 |
+
@classmethod
|
| 281 |
+
def _from_sequence(
|
| 282 |
+
cls,
|
| 283 |
+
scalars,
|
| 284 |
+
*,
|
| 285 |
+
dtype: Dtype | None = None,
|
| 286 |
+
copy: bool = False,
|
| 287 |
+
) -> Self:
|
| 288 |
+
if dtype is not None:
|
| 289 |
+
dtype = pandas_dtype(dtype)
|
| 290 |
+
if dtype and isinstance(dtype, PeriodDtype):
|
| 291 |
+
freq = dtype.freq
|
| 292 |
+
else:
|
| 293 |
+
freq = None
|
| 294 |
+
|
| 295 |
+
if isinstance(scalars, cls):
|
| 296 |
+
validate_dtype_freq(scalars.dtype, freq)
|
| 297 |
+
if copy:
|
| 298 |
+
scalars = scalars.copy()
|
| 299 |
+
return scalars
|
| 300 |
+
|
| 301 |
+
periods = np.asarray(scalars, dtype=object)
|
| 302 |
+
|
| 303 |
+
freq = freq or libperiod.extract_freq(periods)
|
| 304 |
+
ordinals = libperiod.extract_ordinals(periods, freq)
|
| 305 |
+
dtype = PeriodDtype(freq)
|
| 306 |
+
return cls(ordinals, dtype=dtype)
|
| 307 |
+
|
| 308 |
+
@classmethod
|
| 309 |
+
def _from_sequence_of_strings(
|
| 310 |
+
cls, strings, *, dtype: Dtype | None = None, copy: bool = False
|
| 311 |
+
) -> Self:
|
| 312 |
+
return cls._from_sequence(strings, dtype=dtype, copy=copy)
|
| 313 |
+
|
| 314 |
+
@classmethod
|
| 315 |
+
def _from_datetime64(cls, data, freq, tz=None) -> Self:
|
| 316 |
+
"""
|
| 317 |
+
Construct a PeriodArray from a datetime64 array
|
| 318 |
+
|
| 319 |
+
Parameters
|
| 320 |
+
----------
|
| 321 |
+
data : ndarray[datetime64[ns], datetime64[ns, tz]]
|
| 322 |
+
freq : str or Tick
|
| 323 |
+
tz : tzinfo, optional
|
| 324 |
+
|
| 325 |
+
Returns
|
| 326 |
+
-------
|
| 327 |
+
PeriodArray[freq]
|
| 328 |
+
"""
|
| 329 |
+
if isinstance(freq, BaseOffset):
|
| 330 |
+
freq = freq_to_period_freqstr(freq.n, freq.name)
|
| 331 |
+
data, freq = dt64arr_to_periodarr(data, freq, tz)
|
| 332 |
+
dtype = PeriodDtype(freq)
|
| 333 |
+
return cls(data, dtype=dtype)
|
| 334 |
+
|
| 335 |
+
@classmethod
|
| 336 |
+
def _generate_range(cls, start, end, periods, freq):
|
| 337 |
+
periods = dtl.validate_periods(periods)
|
| 338 |
+
|
| 339 |
+
if freq is not None:
|
| 340 |
+
freq = Period._maybe_convert_freq(freq)
|
| 341 |
+
|
| 342 |
+
if start is not None or end is not None:
|
| 343 |
+
subarr, freq = _get_ordinal_range(start, end, periods, freq)
|
| 344 |
+
else:
|
| 345 |
+
raise ValueError("Not enough parameters to construct Period range")
|
| 346 |
+
|
| 347 |
+
return subarr, freq
|
| 348 |
+
|
| 349 |
+
@classmethod
|
| 350 |
+
def _from_fields(cls, *, fields: dict, freq) -> Self:
|
| 351 |
+
subarr, freq = _range_from_fields(freq=freq, **fields)
|
| 352 |
+
dtype = PeriodDtype(freq)
|
| 353 |
+
return cls._simple_new(subarr, dtype=dtype)
|
| 354 |
+
|
| 355 |
+
# -----------------------------------------------------------------
|
| 356 |
+
# DatetimeLike Interface
|
| 357 |
+
|
| 358 |
+
# error: Argument 1 of "_unbox_scalar" is incompatible with supertype
|
| 359 |
+
# "DatetimeLikeArrayMixin"; supertype defines the argument type as
|
| 360 |
+
# "Union[Union[Period, Any, Timedelta], NaTType]"
|
| 361 |
+
def _unbox_scalar( # type: ignore[override]
|
| 362 |
+
self,
|
| 363 |
+
value: Period | NaTType,
|
| 364 |
+
) -> np.int64:
|
| 365 |
+
if value is NaT:
|
| 366 |
+
# error: Item "Period" of "Union[Period, NaTType]" has no attribute "value"
|
| 367 |
+
return np.int64(value._value) # type: ignore[union-attr]
|
| 368 |
+
elif isinstance(value, self._scalar_type):
|
| 369 |
+
self._check_compatible_with(value)
|
| 370 |
+
return np.int64(value.ordinal)
|
| 371 |
+
else:
|
| 372 |
+
raise ValueError(f"'value' should be a Period. Got '{value}' instead.")
|
| 373 |
+
|
| 374 |
+
def _scalar_from_string(self, value: str) -> Period:
|
| 375 |
+
return Period(value, freq=self.freq)
|
| 376 |
+
|
| 377 |
+
# error: Argument 1 of "_check_compatible_with" is incompatible with
|
| 378 |
+
# supertype "DatetimeLikeArrayMixin"; supertype defines the argument type
|
| 379 |
+
# as "Period | Timestamp | Timedelta | NaTType"
|
| 380 |
+
def _check_compatible_with(self, other: Period | NaTType | PeriodArray) -> None: # type: ignore[override]
|
| 381 |
+
if other is NaT:
|
| 382 |
+
return
|
| 383 |
+
# error: Item "NaTType" of "Period | NaTType | PeriodArray" has no
|
| 384 |
+
# attribute "freq"
|
| 385 |
+
self._require_matching_freq(other.freq) # type: ignore[union-attr]
|
| 386 |
+
|
| 387 |
+
# --------------------------------------------------------------------
|
| 388 |
+
# Data / Attributes
|
| 389 |
+
|
| 390 |
+
@cache_readonly
|
| 391 |
+
def dtype(self) -> PeriodDtype:
|
| 392 |
+
return self._dtype
|
| 393 |
+
|
| 394 |
+
# error: Cannot override writeable attribute with read-only property
|
| 395 |
+
@property # type: ignore[override]
|
| 396 |
+
def freq(self) -> BaseOffset:
|
| 397 |
+
"""
|
| 398 |
+
Return the frequency object for this PeriodArray.
|
| 399 |
+
"""
|
| 400 |
+
return self.dtype.freq
|
| 401 |
+
|
| 402 |
+
@property
|
| 403 |
+
def freqstr(self) -> str:
|
| 404 |
+
return freq_to_period_freqstr(self.freq.n, self.freq.name)
|
| 405 |
+
|
| 406 |
+
def __array__(
|
| 407 |
+
self, dtype: NpDtype | None = None, copy: bool | None = None
|
| 408 |
+
) -> np.ndarray:
|
| 409 |
+
if dtype == "i8":
|
| 410 |
+
return self.asi8
|
| 411 |
+
elif dtype == bool:
|
| 412 |
+
return ~self._isnan
|
| 413 |
+
|
| 414 |
+
# This will raise TypeError for non-object dtypes
|
| 415 |
+
return np.array(list(self), dtype=object)
|
| 416 |
+
|
| 417 |
+
def __arrow_array__(self, type=None):
|
| 418 |
+
"""
|
| 419 |
+
Convert myself into a pyarrow Array.
|
| 420 |
+
"""
|
| 421 |
+
import pyarrow
|
| 422 |
+
|
| 423 |
+
from pandas.core.arrays.arrow.extension_types import ArrowPeriodType
|
| 424 |
+
|
| 425 |
+
if type is not None:
|
| 426 |
+
if pyarrow.types.is_integer(type):
|
| 427 |
+
return pyarrow.array(self._ndarray, mask=self.isna(), type=type)
|
| 428 |
+
elif isinstance(type, ArrowPeriodType):
|
| 429 |
+
# ensure we have the same freq
|
| 430 |
+
if self.freqstr != type.freq:
|
| 431 |
+
raise TypeError(
|
| 432 |
+
"Not supported to convert PeriodArray to array with different "
|
| 433 |
+
f"'freq' ({self.freqstr} vs {type.freq})"
|
| 434 |
+
)
|
| 435 |
+
else:
|
| 436 |
+
raise TypeError(
|
| 437 |
+
f"Not supported to convert PeriodArray to '{type}' type"
|
| 438 |
+
)
|
| 439 |
+
|
| 440 |
+
period_type = ArrowPeriodType(self.freqstr)
|
| 441 |
+
storage_array = pyarrow.array(self._ndarray, mask=self.isna(), type="int64")
|
| 442 |
+
return pyarrow.ExtensionArray.from_storage(period_type, storage_array)
|
| 443 |
+
|
| 444 |
+
# --------------------------------------------------------------------
|
| 445 |
+
# Vectorized analogues of Period properties
|
| 446 |
+
|
| 447 |
+
year = _field_accessor(
|
| 448 |
+
"year",
|
| 449 |
+
"""
|
| 450 |
+
The year of the period.
|
| 451 |
+
|
| 452 |
+
Examples
|
| 453 |
+
--------
|
| 454 |
+
>>> idx = pd.PeriodIndex(["2023", "2024", "2025"], freq="Y")
|
| 455 |
+
>>> idx.year
|
| 456 |
+
Index([2023, 2024, 2025], dtype='int64')
|
| 457 |
+
""",
|
| 458 |
+
)
|
| 459 |
+
month = _field_accessor(
|
| 460 |
+
"month",
|
| 461 |
+
"""
|
| 462 |
+
The month as January=1, December=12.
|
| 463 |
+
|
| 464 |
+
Examples
|
| 465 |
+
--------
|
| 466 |
+
>>> idx = pd.PeriodIndex(["2023-01", "2023-02", "2023-03"], freq="M")
|
| 467 |
+
>>> idx.month
|
| 468 |
+
Index([1, 2, 3], dtype='int64')
|
| 469 |
+
""",
|
| 470 |
+
)
|
| 471 |
+
day = _field_accessor(
|
| 472 |
+
"day",
|
| 473 |
+
"""
|
| 474 |
+
The days of the period.
|
| 475 |
+
|
| 476 |
+
Examples
|
| 477 |
+
--------
|
| 478 |
+
>>> idx = pd.PeriodIndex(['2020-01-31', '2020-02-28'], freq='D')
|
| 479 |
+
>>> idx.day
|
| 480 |
+
Index([31, 28], dtype='int64')
|
| 481 |
+
""",
|
| 482 |
+
)
|
| 483 |
+
hour = _field_accessor(
|
| 484 |
+
"hour",
|
| 485 |
+
"""
|
| 486 |
+
The hour of the period.
|
| 487 |
+
|
| 488 |
+
Examples
|
| 489 |
+
--------
|
| 490 |
+
>>> idx = pd.PeriodIndex(["2023-01-01 10:00", "2023-01-01 11:00"], freq='h')
|
| 491 |
+
>>> idx.hour
|
| 492 |
+
Index([10, 11], dtype='int64')
|
| 493 |
+
""",
|
| 494 |
+
)
|
| 495 |
+
minute = _field_accessor(
|
| 496 |
+
"minute",
|
| 497 |
+
"""
|
| 498 |
+
The minute of the period.
|
| 499 |
+
|
| 500 |
+
Examples
|
| 501 |
+
--------
|
| 502 |
+
>>> idx = pd.PeriodIndex(["2023-01-01 10:30:00",
|
| 503 |
+
... "2023-01-01 11:50:00"], freq='min')
|
| 504 |
+
>>> idx.minute
|
| 505 |
+
Index([30, 50], dtype='int64')
|
| 506 |
+
""",
|
| 507 |
+
)
|
| 508 |
+
second = _field_accessor(
|
| 509 |
+
"second",
|
| 510 |
+
"""
|
| 511 |
+
The second of the period.
|
| 512 |
+
|
| 513 |
+
Examples
|
| 514 |
+
--------
|
| 515 |
+
>>> idx = pd.PeriodIndex(["2023-01-01 10:00:30",
|
| 516 |
+
... "2023-01-01 10:00:31"], freq='s')
|
| 517 |
+
>>> idx.second
|
| 518 |
+
Index([30, 31], dtype='int64')
|
| 519 |
+
""",
|
| 520 |
+
)
|
| 521 |
+
weekofyear = _field_accessor(
|
| 522 |
+
"week",
|
| 523 |
+
"""
|
| 524 |
+
The week ordinal of the year.
|
| 525 |
+
|
| 526 |
+
Examples
|
| 527 |
+
--------
|
| 528 |
+
>>> idx = pd.PeriodIndex(["2023-01", "2023-02", "2023-03"], freq="M")
|
| 529 |
+
>>> idx.week # It can be written `weekofyear`
|
| 530 |
+
Index([5, 9, 13], dtype='int64')
|
| 531 |
+
""",
|
| 532 |
+
)
|
| 533 |
+
week = weekofyear
|
| 534 |
+
day_of_week = _field_accessor(
|
| 535 |
+
"day_of_week",
|
| 536 |
+
"""
|
| 537 |
+
The day of the week with Monday=0, Sunday=6.
|
| 538 |
+
|
| 539 |
+
Examples
|
| 540 |
+
--------
|
| 541 |
+
>>> idx = pd.PeriodIndex(["2023-01-01", "2023-01-02", "2023-01-03"], freq="D")
|
| 542 |
+
>>> idx.weekday
|
| 543 |
+
Index([6, 0, 1], dtype='int64')
|
| 544 |
+
""",
|
| 545 |
+
)
|
| 546 |
+
dayofweek = day_of_week
|
| 547 |
+
weekday = dayofweek
|
| 548 |
+
dayofyear = day_of_year = _field_accessor(
|
| 549 |
+
"day_of_year",
|
| 550 |
+
"""
|
| 551 |
+
The ordinal day of the year.
|
| 552 |
+
|
| 553 |
+
Examples
|
| 554 |
+
--------
|
| 555 |
+
>>> idx = pd.PeriodIndex(["2023-01-10", "2023-02-01", "2023-03-01"], freq="D")
|
| 556 |
+
>>> idx.dayofyear
|
| 557 |
+
Index([10, 32, 60], dtype='int64')
|
| 558 |
+
|
| 559 |
+
>>> idx = pd.PeriodIndex(["2023", "2024", "2025"], freq="Y")
|
| 560 |
+
>>> idx
|
| 561 |
+
PeriodIndex(['2023', '2024', '2025'], dtype='period[Y-DEC]')
|
| 562 |
+
>>> idx.dayofyear
|
| 563 |
+
Index([365, 366, 365], dtype='int64')
|
| 564 |
+
""",
|
| 565 |
+
)
|
| 566 |
+
quarter = _field_accessor(
|
| 567 |
+
"quarter",
|
| 568 |
+
"""
|
| 569 |
+
The quarter of the date.
|
| 570 |
+
|
| 571 |
+
Examples
|
| 572 |
+
--------
|
| 573 |
+
>>> idx = pd.PeriodIndex(["2023-01", "2023-02", "2023-03"], freq="M")
|
| 574 |
+
>>> idx.quarter
|
| 575 |
+
Index([1, 1, 1], dtype='int64')
|
| 576 |
+
""",
|
| 577 |
+
)
|
| 578 |
+
qyear = _field_accessor("qyear")
|
| 579 |
+
days_in_month = _field_accessor(
|
| 580 |
+
"days_in_month",
|
| 581 |
+
"""
|
| 582 |
+
The number of days in the month.
|
| 583 |
+
|
| 584 |
+
Examples
|
| 585 |
+
--------
|
| 586 |
+
For Series:
|
| 587 |
+
|
| 588 |
+
>>> period = pd.period_range('2020-1-1 00:00', '2020-3-1 00:00', freq='M')
|
| 589 |
+
>>> s = pd.Series(period)
|
| 590 |
+
>>> s
|
| 591 |
+
0 2020-01
|
| 592 |
+
1 2020-02
|
| 593 |
+
2 2020-03
|
| 594 |
+
dtype: period[M]
|
| 595 |
+
>>> s.dt.days_in_month
|
| 596 |
+
0 31
|
| 597 |
+
1 29
|
| 598 |
+
2 31
|
| 599 |
+
dtype: int64
|
| 600 |
+
|
| 601 |
+
For PeriodIndex:
|
| 602 |
+
|
| 603 |
+
>>> idx = pd.PeriodIndex(["2023-01", "2023-02", "2023-03"], freq="M")
|
| 604 |
+
>>> idx.days_in_month # It can be also entered as `daysinmonth`
|
| 605 |
+
Index([31, 28, 31], dtype='int64')
|
| 606 |
+
""",
|
| 607 |
+
)
|
| 608 |
+
daysinmonth = days_in_month
|
| 609 |
+
|
| 610 |
+
@property
|
| 611 |
+
def is_leap_year(self) -> npt.NDArray[np.bool_]:
|
| 612 |
+
"""
|
| 613 |
+
Logical indicating if the date belongs to a leap year.
|
| 614 |
+
|
| 615 |
+
Examples
|
| 616 |
+
--------
|
| 617 |
+
>>> idx = pd.PeriodIndex(["2023", "2024", "2025"], freq="Y")
|
| 618 |
+
>>> idx.is_leap_year
|
| 619 |
+
array([False, True, False])
|
| 620 |
+
"""
|
| 621 |
+
return isleapyear_arr(np.asarray(self.year))
|
| 622 |
+
|
| 623 |
+
def to_timestamp(self, freq=None, how: str = "start") -> DatetimeArray:
|
| 624 |
+
"""
|
| 625 |
+
Cast to DatetimeArray/Index.
|
| 626 |
+
|
| 627 |
+
Parameters
|
| 628 |
+
----------
|
| 629 |
+
freq : str or DateOffset, optional
|
| 630 |
+
Target frequency. The default is 'D' for week or longer,
|
| 631 |
+
's' otherwise.
|
| 632 |
+
how : {'s', 'e', 'start', 'end'}
|
| 633 |
+
Whether to use the start or end of the time period being converted.
|
| 634 |
+
|
| 635 |
+
Returns
|
| 636 |
+
-------
|
| 637 |
+
DatetimeArray/Index
|
| 638 |
+
|
| 639 |
+
Examples
|
| 640 |
+
--------
|
| 641 |
+
>>> idx = pd.PeriodIndex(["2023-01", "2023-02", "2023-03"], freq="M")
|
| 642 |
+
>>> idx.to_timestamp()
|
| 643 |
+
DatetimeIndex(['2023-01-01', '2023-02-01', '2023-03-01'],
|
| 644 |
+
dtype='datetime64[ns]', freq='MS')
|
| 645 |
+
"""
|
| 646 |
+
from pandas.core.arrays import DatetimeArray
|
| 647 |
+
|
| 648 |
+
how = libperiod.validate_end_alias(how)
|
| 649 |
+
|
| 650 |
+
end = how == "E"
|
| 651 |
+
if end:
|
| 652 |
+
if freq == "B" or self.freq == "B":
|
| 653 |
+
# roll forward to ensure we land on B date
|
| 654 |
+
adjust = Timedelta(1, "D") - Timedelta(1, "ns")
|
| 655 |
+
return self.to_timestamp(how="start") + adjust
|
| 656 |
+
else:
|
| 657 |
+
adjust = Timedelta(1, "ns")
|
| 658 |
+
return (self + self.freq).to_timestamp(how="start") - adjust
|
| 659 |
+
|
| 660 |
+
if freq is None:
|
| 661 |
+
freq_code = self._dtype._get_to_timestamp_base()
|
| 662 |
+
dtype = PeriodDtypeBase(freq_code, 1)
|
| 663 |
+
freq = dtype._freqstr
|
| 664 |
+
base = freq_code
|
| 665 |
+
else:
|
| 666 |
+
freq = Period._maybe_convert_freq(freq)
|
| 667 |
+
base = freq._period_dtype_code
|
| 668 |
+
|
| 669 |
+
new_parr = self.asfreq(freq, how=how)
|
| 670 |
+
|
| 671 |
+
new_data = libperiod.periodarr_to_dt64arr(new_parr.asi8, base)
|
| 672 |
+
dta = DatetimeArray._from_sequence(new_data)
|
| 673 |
+
|
| 674 |
+
if self.freq.name == "B":
|
| 675 |
+
# See if we can retain BDay instead of Day in cases where
|
| 676 |
+
# len(self) is too small for infer_freq to distinguish between them
|
| 677 |
+
diffs = libalgos.unique_deltas(self.asi8)
|
| 678 |
+
if len(diffs) == 1:
|
| 679 |
+
diff = diffs[0]
|
| 680 |
+
if diff == self.dtype._n:
|
| 681 |
+
dta._freq = self.freq
|
| 682 |
+
elif diff == 1:
|
| 683 |
+
dta._freq = self.freq.base
|
| 684 |
+
# TODO: other cases?
|
| 685 |
+
return dta
|
| 686 |
+
else:
|
| 687 |
+
return dta._with_freq("infer")
|
| 688 |
+
|
| 689 |
+
# --------------------------------------------------------------------
|
| 690 |
+
|
| 691 |
+
def _box_func(self, x) -> Period | NaTType:
|
| 692 |
+
return Period._from_ordinal(ordinal=x, freq=self.freq)
|
| 693 |
+
|
| 694 |
+
@doc(**_shared_doc_kwargs, other="PeriodIndex", other_name="PeriodIndex")
|
| 695 |
+
def asfreq(self, freq=None, how: str = "E") -> Self:
|
| 696 |
+
"""
|
| 697 |
+
Convert the {klass} to the specified frequency `freq`.
|
| 698 |
+
|
| 699 |
+
Equivalent to applying :meth:`pandas.Period.asfreq` with the given arguments
|
| 700 |
+
to each :class:`~pandas.Period` in this {klass}.
|
| 701 |
+
|
| 702 |
+
Parameters
|
| 703 |
+
----------
|
| 704 |
+
freq : str
|
| 705 |
+
A frequency.
|
| 706 |
+
how : str {{'E', 'S'}}, default 'E'
|
| 707 |
+
Whether the elements should be aligned to the end
|
| 708 |
+
or start within pa period.
|
| 709 |
+
|
| 710 |
+
* 'E', 'END', or 'FINISH' for end,
|
| 711 |
+
* 'S', 'START', or 'BEGIN' for start.
|
| 712 |
+
|
| 713 |
+
January 31st ('END') vs. January 1st ('START') for example.
|
| 714 |
+
|
| 715 |
+
Returns
|
| 716 |
+
-------
|
| 717 |
+
{klass}
|
| 718 |
+
The transformed {klass} with the new frequency.
|
| 719 |
+
|
| 720 |
+
See Also
|
| 721 |
+
--------
|
| 722 |
+
{other}.asfreq: Convert each Period in a {other_name} to the given frequency.
|
| 723 |
+
Period.asfreq : Convert a :class:`~pandas.Period` object to the given frequency.
|
| 724 |
+
|
| 725 |
+
Examples
|
| 726 |
+
--------
|
| 727 |
+
>>> pidx = pd.period_range('2010-01-01', '2015-01-01', freq='Y')
|
| 728 |
+
>>> pidx
|
| 729 |
+
PeriodIndex(['2010', '2011', '2012', '2013', '2014', '2015'],
|
| 730 |
+
dtype='period[Y-DEC]')
|
| 731 |
+
|
| 732 |
+
>>> pidx.asfreq('M')
|
| 733 |
+
PeriodIndex(['2010-12', '2011-12', '2012-12', '2013-12', '2014-12',
|
| 734 |
+
'2015-12'], dtype='period[M]')
|
| 735 |
+
|
| 736 |
+
>>> pidx.asfreq('M', how='S')
|
| 737 |
+
PeriodIndex(['2010-01', '2011-01', '2012-01', '2013-01', '2014-01',
|
| 738 |
+
'2015-01'], dtype='period[M]')
|
| 739 |
+
"""
|
| 740 |
+
how = libperiod.validate_end_alias(how)
|
| 741 |
+
if isinstance(freq, BaseOffset) and hasattr(freq, "_period_dtype_code"):
|
| 742 |
+
freq = PeriodDtype(freq)._freqstr
|
| 743 |
+
freq = Period._maybe_convert_freq(freq)
|
| 744 |
+
|
| 745 |
+
base1 = self._dtype._dtype_code
|
| 746 |
+
base2 = freq._period_dtype_code
|
| 747 |
+
|
| 748 |
+
asi8 = self.asi8
|
| 749 |
+
# self.freq.n can't be negative or 0
|
| 750 |
+
end = how == "E"
|
| 751 |
+
if end:
|
| 752 |
+
ordinal = asi8 + self.dtype._n - 1
|
| 753 |
+
else:
|
| 754 |
+
ordinal = asi8
|
| 755 |
+
|
| 756 |
+
new_data = period_asfreq_arr(ordinal, base1, base2, end)
|
| 757 |
+
|
| 758 |
+
if self._hasna:
|
| 759 |
+
new_data[self._isnan] = iNaT
|
| 760 |
+
|
| 761 |
+
dtype = PeriodDtype(freq)
|
| 762 |
+
return type(self)(new_data, dtype=dtype)
|
| 763 |
+
|
| 764 |
+
# ------------------------------------------------------------------
|
| 765 |
+
# Rendering Methods
|
| 766 |
+
|
| 767 |
+
def _formatter(self, boxed: bool = False):
|
| 768 |
+
if boxed:
|
| 769 |
+
return str
|
| 770 |
+
return "'{}'".format
|
| 771 |
+
|
| 772 |
+
def _format_native_types(
|
| 773 |
+
self, *, na_rep: str | float = "NaT", date_format=None, **kwargs
|
| 774 |
+
) -> npt.NDArray[np.object_]:
|
| 775 |
+
"""
|
| 776 |
+
actually format my specific types
|
| 777 |
+
"""
|
| 778 |
+
return libperiod.period_array_strftime(
|
| 779 |
+
self.asi8, self.dtype._dtype_code, na_rep, date_format
|
| 780 |
+
)
|
| 781 |
+
|
| 782 |
+
# ------------------------------------------------------------------
|
| 783 |
+
|
| 784 |
+
def astype(self, dtype, copy: bool = True):
|
| 785 |
+
# We handle Period[T] -> Period[U]
|
| 786 |
+
# Our parent handles everything else.
|
| 787 |
+
dtype = pandas_dtype(dtype)
|
| 788 |
+
if dtype == self._dtype:
|
| 789 |
+
if not copy:
|
| 790 |
+
return self
|
| 791 |
+
else:
|
| 792 |
+
return self.copy()
|
| 793 |
+
if isinstance(dtype, PeriodDtype):
|
| 794 |
+
return self.asfreq(dtype.freq)
|
| 795 |
+
|
| 796 |
+
if lib.is_np_dtype(dtype, "M") or isinstance(dtype, DatetimeTZDtype):
|
| 797 |
+
# GH#45038 match PeriodIndex behavior.
|
| 798 |
+
tz = getattr(dtype, "tz", None)
|
| 799 |
+
unit = dtl.dtype_to_unit(dtype)
|
| 800 |
+
return self.to_timestamp().tz_localize(tz).as_unit(unit)
|
| 801 |
+
|
| 802 |
+
return super().astype(dtype, copy=copy)
|
| 803 |
+
|
| 804 |
+
def searchsorted(
|
| 805 |
+
self,
|
| 806 |
+
value: NumpyValueArrayLike | ExtensionArray,
|
| 807 |
+
side: Literal["left", "right"] = "left",
|
| 808 |
+
sorter: NumpySorter | None = None,
|
| 809 |
+
) -> npt.NDArray[np.intp] | np.intp:
|
| 810 |
+
npvalue = self._validate_setitem_value(value).view("M8[ns]")
|
| 811 |
+
|
| 812 |
+
# Cast to M8 to get datetime-like NaT placement,
|
| 813 |
+
# similar to dtl._period_dispatch
|
| 814 |
+
m8arr = self._ndarray.view("M8[ns]")
|
| 815 |
+
return m8arr.searchsorted(npvalue, side=side, sorter=sorter)
|
| 816 |
+
|
| 817 |
+
def _pad_or_backfill(
|
| 818 |
+
self,
|
| 819 |
+
*,
|
| 820 |
+
method: FillnaOptions,
|
| 821 |
+
limit: int | None = None,
|
| 822 |
+
limit_area: Literal["inside", "outside"] | None = None,
|
| 823 |
+
copy: bool = True,
|
| 824 |
+
) -> Self:
|
| 825 |
+
# view as dt64 so we get treated as timelike in core.missing,
|
| 826 |
+
# similar to dtl._period_dispatch
|
| 827 |
+
dta = self.view("M8[ns]")
|
| 828 |
+
result = dta._pad_or_backfill(
|
| 829 |
+
method=method, limit=limit, limit_area=limit_area, copy=copy
|
| 830 |
+
)
|
| 831 |
+
if copy:
|
| 832 |
+
return cast("Self", result.view(self.dtype))
|
| 833 |
+
else:
|
| 834 |
+
return self
|
| 835 |
+
|
| 836 |
+
def fillna(
|
| 837 |
+
self, value=None, method=None, limit: int | None = None, copy: bool = True
|
| 838 |
+
) -> Self:
|
| 839 |
+
if method is not None:
|
| 840 |
+
# view as dt64 so we get treated as timelike in core.missing,
|
| 841 |
+
# similar to dtl._period_dispatch
|
| 842 |
+
dta = self.view("M8[ns]")
|
| 843 |
+
result = dta.fillna(value=value, method=method, limit=limit, copy=copy)
|
| 844 |
+
# error: Incompatible return value type (got "Union[ExtensionArray,
|
| 845 |
+
# ndarray[Any, Any]]", expected "PeriodArray")
|
| 846 |
+
return result.view(self.dtype) # type: ignore[return-value]
|
| 847 |
+
return super().fillna(value=value, method=method, limit=limit, copy=copy)
|
| 848 |
+
|
| 849 |
+
# ------------------------------------------------------------------
|
| 850 |
+
# Arithmetic Methods
|
| 851 |
+
|
| 852 |
+
def _addsub_int_array_or_scalar(
|
| 853 |
+
self, other: np.ndarray | int, op: Callable[[Any, Any], Any]
|
| 854 |
+
) -> Self:
|
| 855 |
+
"""
|
| 856 |
+
Add or subtract array of integers.
|
| 857 |
+
|
| 858 |
+
Parameters
|
| 859 |
+
----------
|
| 860 |
+
other : np.ndarray[int64] or int
|
| 861 |
+
op : {operator.add, operator.sub}
|
| 862 |
+
|
| 863 |
+
Returns
|
| 864 |
+
-------
|
| 865 |
+
result : PeriodArray
|
| 866 |
+
"""
|
| 867 |
+
assert op in [operator.add, operator.sub]
|
| 868 |
+
if op is operator.sub:
|
| 869 |
+
other = -other
|
| 870 |
+
res_values = add_overflowsafe(self.asi8, np.asarray(other, dtype="i8"))
|
| 871 |
+
return type(self)(res_values, dtype=self.dtype)
|
| 872 |
+
|
| 873 |
+
def _add_offset(self, other: BaseOffset):
|
| 874 |
+
assert not isinstance(other, Tick)
|
| 875 |
+
|
| 876 |
+
self._require_matching_freq(other, base=True)
|
| 877 |
+
return self._addsub_int_array_or_scalar(other.n, operator.add)
|
| 878 |
+
|
| 879 |
+
# TODO: can we de-duplicate with Period._add_timedeltalike_scalar?
|
| 880 |
+
def _add_timedeltalike_scalar(self, other):
|
| 881 |
+
"""
|
| 882 |
+
Parameters
|
| 883 |
+
----------
|
| 884 |
+
other : timedelta, Tick, np.timedelta64
|
| 885 |
+
|
| 886 |
+
Returns
|
| 887 |
+
-------
|
| 888 |
+
PeriodArray
|
| 889 |
+
"""
|
| 890 |
+
if not isinstance(self.freq, Tick):
|
| 891 |
+
# We cannot add timedelta-like to non-tick PeriodArray
|
| 892 |
+
raise raise_on_incompatible(self, other)
|
| 893 |
+
|
| 894 |
+
if isna(other):
|
| 895 |
+
# i.e. np.timedelta64("NaT")
|
| 896 |
+
return super()._add_timedeltalike_scalar(other)
|
| 897 |
+
|
| 898 |
+
td = np.asarray(Timedelta(other).asm8)
|
| 899 |
+
return self._add_timedelta_arraylike(td)
|
| 900 |
+
|
| 901 |
+
def _add_timedelta_arraylike(
|
| 902 |
+
self, other: TimedeltaArray | npt.NDArray[np.timedelta64]
|
| 903 |
+
) -> Self:
|
| 904 |
+
"""
|
| 905 |
+
Parameters
|
| 906 |
+
----------
|
| 907 |
+
other : TimedeltaArray or ndarray[timedelta64]
|
| 908 |
+
|
| 909 |
+
Returns
|
| 910 |
+
-------
|
| 911 |
+
PeriodArray
|
| 912 |
+
"""
|
| 913 |
+
if not self.dtype._is_tick_like():
|
| 914 |
+
# We cannot add timedelta-like to non-tick PeriodArray
|
| 915 |
+
raise TypeError(
|
| 916 |
+
f"Cannot add or subtract timedelta64[ns] dtype from {self.dtype}"
|
| 917 |
+
)
|
| 918 |
+
|
| 919 |
+
dtype = np.dtype(f"m8[{self.dtype._td64_unit}]")
|
| 920 |
+
|
| 921 |
+
# Similar to _check_timedeltalike_freq_compat, but we raise with a
|
| 922 |
+
# more specific exception message if necessary.
|
| 923 |
+
try:
|
| 924 |
+
delta = astype_overflowsafe(
|
| 925 |
+
np.asarray(other), dtype=dtype, copy=False, round_ok=False
|
| 926 |
+
)
|
| 927 |
+
except ValueError as err:
|
| 928 |
+
# e.g. if we have minutes freq and try to add 30s
|
| 929 |
+
# "Cannot losslessly convert units"
|
| 930 |
+
raise IncompatibleFrequency(
|
| 931 |
+
"Cannot add/subtract timedelta-like from PeriodArray that is "
|
| 932 |
+
"not an integer multiple of the PeriodArray's freq."
|
| 933 |
+
) from err
|
| 934 |
+
|
| 935 |
+
res_values = add_overflowsafe(self.asi8, np.asarray(delta.view("i8")))
|
| 936 |
+
return type(self)(res_values, dtype=self.dtype)
|
| 937 |
+
|
| 938 |
+
def _check_timedeltalike_freq_compat(self, other):
|
| 939 |
+
"""
|
| 940 |
+
Arithmetic operations with timedelta-like scalars or array `other`
|
| 941 |
+
are only valid if `other` is an integer multiple of `self.freq`.
|
| 942 |
+
If the operation is valid, find that integer multiple. Otherwise,
|
| 943 |
+
raise because the operation is invalid.
|
| 944 |
+
|
| 945 |
+
Parameters
|
| 946 |
+
----------
|
| 947 |
+
other : timedelta, np.timedelta64, Tick,
|
| 948 |
+
ndarray[timedelta64], TimedeltaArray, TimedeltaIndex
|
| 949 |
+
|
| 950 |
+
Returns
|
| 951 |
+
-------
|
| 952 |
+
multiple : int or ndarray[int64]
|
| 953 |
+
|
| 954 |
+
Raises
|
| 955 |
+
------
|
| 956 |
+
IncompatibleFrequency
|
| 957 |
+
"""
|
| 958 |
+
assert self.dtype._is_tick_like() # checked by calling function
|
| 959 |
+
|
| 960 |
+
dtype = np.dtype(f"m8[{self.dtype._td64_unit}]")
|
| 961 |
+
|
| 962 |
+
if isinstance(other, (timedelta, np.timedelta64, Tick)):
|
| 963 |
+
td = np.asarray(Timedelta(other).asm8)
|
| 964 |
+
else:
|
| 965 |
+
td = np.asarray(other)
|
| 966 |
+
|
| 967 |
+
try:
|
| 968 |
+
delta = astype_overflowsafe(td, dtype=dtype, copy=False, round_ok=False)
|
| 969 |
+
except ValueError as err:
|
| 970 |
+
raise raise_on_incompatible(self, other) from err
|
| 971 |
+
|
| 972 |
+
delta = delta.view("i8")
|
| 973 |
+
return lib.item_from_zerodim(delta)
|
| 974 |
+
|
| 975 |
+
|
| 976 |
+
def raise_on_incompatible(left, right) -> IncompatibleFrequency:
|
| 977 |
+
"""
|
| 978 |
+
Helper function to render a consistent error message when raising
|
| 979 |
+
IncompatibleFrequency.
|
| 980 |
+
|
| 981 |
+
Parameters
|
| 982 |
+
----------
|
| 983 |
+
left : PeriodArray
|
| 984 |
+
right : None, DateOffset, Period, ndarray, or timedelta-like
|
| 985 |
+
|
| 986 |
+
Returns
|
| 987 |
+
-------
|
| 988 |
+
IncompatibleFrequency
|
| 989 |
+
Exception to be raised by the caller.
|
| 990 |
+
"""
|
| 991 |
+
# GH#24283 error message format depends on whether right is scalar
|
| 992 |
+
if isinstance(right, (np.ndarray, ABCTimedeltaArray)) or right is None:
|
| 993 |
+
other_freq = None
|
| 994 |
+
elif isinstance(right, BaseOffset):
|
| 995 |
+
other_freq = freq_to_period_freqstr(right.n, right.name)
|
| 996 |
+
elif isinstance(right, (ABCPeriodIndex, PeriodArray, Period)):
|
| 997 |
+
other_freq = right.freqstr
|
| 998 |
+
else:
|
| 999 |
+
other_freq = delta_to_tick(Timedelta(right)).freqstr
|
| 1000 |
+
|
| 1001 |
+
own_freq = freq_to_period_freqstr(left.freq.n, left.freq.name)
|
| 1002 |
+
msg = DIFFERENT_FREQ.format(
|
| 1003 |
+
cls=type(left).__name__, own_freq=own_freq, other_freq=other_freq
|
| 1004 |
+
)
|
| 1005 |
+
return IncompatibleFrequency(msg)
|
| 1006 |
+
|
| 1007 |
+
|
| 1008 |
+
# -------------------------------------------------------------------
|
| 1009 |
+
# Constructor Helpers
|
| 1010 |
+
|
| 1011 |
+
|
| 1012 |
+
def period_array(
|
| 1013 |
+
data: Sequence[Period | str | None] | AnyArrayLike,
|
| 1014 |
+
freq: str | Tick | BaseOffset | None = None,
|
| 1015 |
+
copy: bool = False,
|
| 1016 |
+
) -> PeriodArray:
|
| 1017 |
+
"""
|
| 1018 |
+
Construct a new PeriodArray from a sequence of Period scalars.
|
| 1019 |
+
|
| 1020 |
+
Parameters
|
| 1021 |
+
----------
|
| 1022 |
+
data : Sequence of Period objects
|
| 1023 |
+
A sequence of Period objects. These are required to all have
|
| 1024 |
+
the same ``freq.`` Missing values can be indicated by ``None``
|
| 1025 |
+
or ``pandas.NaT``.
|
| 1026 |
+
freq : str, Tick, or Offset
|
| 1027 |
+
The frequency of every element of the array. This can be specified
|
| 1028 |
+
to avoid inferring the `freq` from `data`.
|
| 1029 |
+
copy : bool, default False
|
| 1030 |
+
Whether to ensure a copy of the data is made.
|
| 1031 |
+
|
| 1032 |
+
Returns
|
| 1033 |
+
-------
|
| 1034 |
+
PeriodArray
|
| 1035 |
+
|
| 1036 |
+
See Also
|
| 1037 |
+
--------
|
| 1038 |
+
PeriodArray
|
| 1039 |
+
pandas.PeriodIndex
|
| 1040 |
+
|
| 1041 |
+
Examples
|
| 1042 |
+
--------
|
| 1043 |
+
>>> period_array([pd.Period('2017', freq='Y'),
|
| 1044 |
+
... pd.Period('2018', freq='Y')])
|
| 1045 |
+
<PeriodArray>
|
| 1046 |
+
['2017', '2018']
|
| 1047 |
+
Length: 2, dtype: period[Y-DEC]
|
| 1048 |
+
|
| 1049 |
+
>>> period_array([pd.Period('2017', freq='Y'),
|
| 1050 |
+
... pd.Period('2018', freq='Y'),
|
| 1051 |
+
... pd.NaT])
|
| 1052 |
+
<PeriodArray>
|
| 1053 |
+
['2017', '2018', 'NaT']
|
| 1054 |
+
Length: 3, dtype: period[Y-DEC]
|
| 1055 |
+
|
| 1056 |
+
Integers that look like years are handled
|
| 1057 |
+
|
| 1058 |
+
>>> period_array([2000, 2001, 2002], freq='D')
|
| 1059 |
+
<PeriodArray>
|
| 1060 |
+
['2000-01-01', '2001-01-01', '2002-01-01']
|
| 1061 |
+
Length: 3, dtype: period[D]
|
| 1062 |
+
|
| 1063 |
+
Datetime-like strings may also be passed
|
| 1064 |
+
|
| 1065 |
+
>>> period_array(['2000-Q1', '2000-Q2', '2000-Q3', '2000-Q4'], freq='Q')
|
| 1066 |
+
<PeriodArray>
|
| 1067 |
+
['2000Q1', '2000Q2', '2000Q3', '2000Q4']
|
| 1068 |
+
Length: 4, dtype: period[Q-DEC]
|
| 1069 |
+
"""
|
| 1070 |
+
data_dtype = getattr(data, "dtype", None)
|
| 1071 |
+
|
| 1072 |
+
if lib.is_np_dtype(data_dtype, "M"):
|
| 1073 |
+
return PeriodArray._from_datetime64(data, freq)
|
| 1074 |
+
if isinstance(data_dtype, PeriodDtype):
|
| 1075 |
+
out = PeriodArray(data)
|
| 1076 |
+
if freq is not None:
|
| 1077 |
+
if freq == data_dtype.freq:
|
| 1078 |
+
return out
|
| 1079 |
+
return out.asfreq(freq)
|
| 1080 |
+
return out
|
| 1081 |
+
|
| 1082 |
+
# other iterable of some kind
|
| 1083 |
+
if not isinstance(data, (np.ndarray, list, tuple, ABCSeries)):
|
| 1084 |
+
data = list(data)
|
| 1085 |
+
|
| 1086 |
+
arrdata = np.asarray(data)
|
| 1087 |
+
|
| 1088 |
+
dtype: PeriodDtype | None
|
| 1089 |
+
if freq:
|
| 1090 |
+
dtype = PeriodDtype(freq)
|
| 1091 |
+
else:
|
| 1092 |
+
dtype = None
|
| 1093 |
+
|
| 1094 |
+
if arrdata.dtype.kind == "f" and len(arrdata) > 0:
|
| 1095 |
+
raise TypeError("PeriodIndex does not allow floating point in construction")
|
| 1096 |
+
|
| 1097 |
+
if arrdata.dtype.kind in "iu":
|
| 1098 |
+
arr = arrdata.astype(np.int64, copy=False)
|
| 1099 |
+
# error: Argument 2 to "from_ordinals" has incompatible type "Union[str,
|
| 1100 |
+
# Tick, None]"; expected "Union[timedelta, BaseOffset, str]"
|
| 1101 |
+
ordinals = libperiod.from_ordinals(arr, freq) # type: ignore[arg-type]
|
| 1102 |
+
return PeriodArray(ordinals, dtype=dtype)
|
| 1103 |
+
|
| 1104 |
+
data = ensure_object(arrdata)
|
| 1105 |
+
if freq is None:
|
| 1106 |
+
freq = libperiod.extract_freq(data)
|
| 1107 |
+
dtype = PeriodDtype(freq)
|
| 1108 |
+
return PeriodArray._from_sequence(data, dtype=dtype)
|
| 1109 |
+
|
| 1110 |
+
|
| 1111 |
+
@overload
|
| 1112 |
+
def validate_dtype_freq(dtype, freq: BaseOffsetT) -> BaseOffsetT:
|
| 1113 |
+
...
|
| 1114 |
+
|
| 1115 |
+
|
| 1116 |
+
@overload
|
| 1117 |
+
def validate_dtype_freq(dtype, freq: timedelta | str | None) -> BaseOffset:
|
| 1118 |
+
...
|
| 1119 |
+
|
| 1120 |
+
|
| 1121 |
+
def validate_dtype_freq(
|
| 1122 |
+
dtype, freq: BaseOffsetT | BaseOffset | timedelta | str | None
|
| 1123 |
+
) -> BaseOffsetT:
|
| 1124 |
+
"""
|
| 1125 |
+
If both a dtype and a freq are available, ensure they match. If only
|
| 1126 |
+
dtype is available, extract the implied freq.
|
| 1127 |
+
|
| 1128 |
+
Parameters
|
| 1129 |
+
----------
|
| 1130 |
+
dtype : dtype
|
| 1131 |
+
freq : DateOffset or None
|
| 1132 |
+
|
| 1133 |
+
Returns
|
| 1134 |
+
-------
|
| 1135 |
+
freq : DateOffset
|
| 1136 |
+
|
| 1137 |
+
Raises
|
| 1138 |
+
------
|
| 1139 |
+
ValueError : non-period dtype
|
| 1140 |
+
IncompatibleFrequency : mismatch between dtype and freq
|
| 1141 |
+
"""
|
| 1142 |
+
if freq is not None:
|
| 1143 |
+
freq = to_offset(freq, is_period=True)
|
| 1144 |
+
|
| 1145 |
+
if dtype is not None:
|
| 1146 |
+
dtype = pandas_dtype(dtype)
|
| 1147 |
+
if not isinstance(dtype, PeriodDtype):
|
| 1148 |
+
raise ValueError("dtype must be PeriodDtype")
|
| 1149 |
+
if freq is None:
|
| 1150 |
+
freq = dtype.freq
|
| 1151 |
+
elif freq != dtype.freq:
|
| 1152 |
+
raise IncompatibleFrequency("specified freq and dtype are different")
|
| 1153 |
+
# error: Incompatible return value type (got "Union[BaseOffset, Any, None]",
|
| 1154 |
+
# expected "BaseOffset")
|
| 1155 |
+
return freq # type: ignore[return-value]
|
| 1156 |
+
|
| 1157 |
+
|
| 1158 |
+
def dt64arr_to_periodarr(
|
| 1159 |
+
data, freq, tz=None
|
| 1160 |
+
) -> tuple[npt.NDArray[np.int64], BaseOffset]:
|
| 1161 |
+
"""
|
| 1162 |
+
Convert an datetime-like array to values Period ordinals.
|
| 1163 |
+
|
| 1164 |
+
Parameters
|
| 1165 |
+
----------
|
| 1166 |
+
data : Union[Series[datetime64[ns]], DatetimeIndex, ndarray[datetime64ns]]
|
| 1167 |
+
freq : Optional[Union[str, Tick]]
|
| 1168 |
+
Must match the `freq` on the `data` if `data` is a DatetimeIndex
|
| 1169 |
+
or Series.
|
| 1170 |
+
tz : Optional[tzinfo]
|
| 1171 |
+
|
| 1172 |
+
Returns
|
| 1173 |
+
-------
|
| 1174 |
+
ordinals : ndarray[int64]
|
| 1175 |
+
freq : Tick
|
| 1176 |
+
The frequency extracted from the Series or DatetimeIndex if that's
|
| 1177 |
+
used.
|
| 1178 |
+
|
| 1179 |
+
"""
|
| 1180 |
+
if not isinstance(data.dtype, np.dtype) or data.dtype.kind != "M":
|
| 1181 |
+
raise ValueError(f"Wrong dtype: {data.dtype}")
|
| 1182 |
+
|
| 1183 |
+
if freq is None:
|
| 1184 |
+
if isinstance(data, ABCIndex):
|
| 1185 |
+
data, freq = data._values, data.freq
|
| 1186 |
+
elif isinstance(data, ABCSeries):
|
| 1187 |
+
data, freq = data._values, data.dt.freq
|
| 1188 |
+
|
| 1189 |
+
elif isinstance(data, (ABCIndex, ABCSeries)):
|
| 1190 |
+
data = data._values
|
| 1191 |
+
|
| 1192 |
+
reso = get_unit_from_dtype(data.dtype)
|
| 1193 |
+
freq = Period._maybe_convert_freq(freq)
|
| 1194 |
+
base = freq._period_dtype_code
|
| 1195 |
+
return c_dt64arr_to_periodarr(data.view("i8"), base, tz, reso=reso), freq
|
| 1196 |
+
|
| 1197 |
+
|
| 1198 |
+
def _get_ordinal_range(start, end, periods, freq, mult: int = 1):
|
| 1199 |
+
if com.count_not_none(start, end, periods) != 2:
|
| 1200 |
+
raise ValueError(
|
| 1201 |
+
"Of the three parameters: start, end, and periods, "
|
| 1202 |
+
"exactly two must be specified"
|
| 1203 |
+
)
|
| 1204 |
+
|
| 1205 |
+
if freq is not None:
|
| 1206 |
+
freq = to_offset(freq, is_period=True)
|
| 1207 |
+
mult = freq.n
|
| 1208 |
+
|
| 1209 |
+
if start is not None:
|
| 1210 |
+
start = Period(start, freq)
|
| 1211 |
+
if end is not None:
|
| 1212 |
+
end = Period(end, freq)
|
| 1213 |
+
|
| 1214 |
+
is_start_per = isinstance(start, Period)
|
| 1215 |
+
is_end_per = isinstance(end, Period)
|
| 1216 |
+
|
| 1217 |
+
if is_start_per and is_end_per and start.freq != end.freq:
|
| 1218 |
+
raise ValueError("start and end must have same freq")
|
| 1219 |
+
if start is NaT or end is NaT:
|
| 1220 |
+
raise ValueError("start and end must not be NaT")
|
| 1221 |
+
|
| 1222 |
+
if freq is None:
|
| 1223 |
+
if is_start_per:
|
| 1224 |
+
freq = start.freq
|
| 1225 |
+
elif is_end_per:
|
| 1226 |
+
freq = end.freq
|
| 1227 |
+
else: # pragma: no cover
|
| 1228 |
+
raise ValueError("Could not infer freq from start/end")
|
| 1229 |
+
mult = freq.n
|
| 1230 |
+
|
| 1231 |
+
if periods is not None:
|
| 1232 |
+
periods = periods * mult
|
| 1233 |
+
if start is None:
|
| 1234 |
+
data = np.arange(
|
| 1235 |
+
end.ordinal - periods + mult, end.ordinal + 1, mult, dtype=np.int64
|
| 1236 |
+
)
|
| 1237 |
+
else:
|
| 1238 |
+
data = np.arange(
|
| 1239 |
+
start.ordinal, start.ordinal + periods, mult, dtype=np.int64
|
| 1240 |
+
)
|
| 1241 |
+
else:
|
| 1242 |
+
data = np.arange(start.ordinal, end.ordinal + 1, mult, dtype=np.int64)
|
| 1243 |
+
|
| 1244 |
+
return data, freq
|
| 1245 |
+
|
| 1246 |
+
|
| 1247 |
+
def _range_from_fields(
|
| 1248 |
+
year=None,
|
| 1249 |
+
month=None,
|
| 1250 |
+
quarter=None,
|
| 1251 |
+
day=None,
|
| 1252 |
+
hour=None,
|
| 1253 |
+
minute=None,
|
| 1254 |
+
second=None,
|
| 1255 |
+
freq=None,
|
| 1256 |
+
) -> tuple[np.ndarray, BaseOffset]:
|
| 1257 |
+
if hour is None:
|
| 1258 |
+
hour = 0
|
| 1259 |
+
if minute is None:
|
| 1260 |
+
minute = 0
|
| 1261 |
+
if second is None:
|
| 1262 |
+
second = 0
|
| 1263 |
+
if day is None:
|
| 1264 |
+
day = 1
|
| 1265 |
+
|
| 1266 |
+
ordinals = []
|
| 1267 |
+
|
| 1268 |
+
if quarter is not None:
|
| 1269 |
+
if freq is None:
|
| 1270 |
+
freq = to_offset("Q", is_period=True)
|
| 1271 |
+
base = FreqGroup.FR_QTR.value
|
| 1272 |
+
else:
|
| 1273 |
+
freq = to_offset(freq, is_period=True)
|
| 1274 |
+
base = libperiod.freq_to_dtype_code(freq)
|
| 1275 |
+
if base != FreqGroup.FR_QTR.value:
|
| 1276 |
+
raise AssertionError("base must equal FR_QTR")
|
| 1277 |
+
|
| 1278 |
+
freqstr = freq.freqstr
|
| 1279 |
+
year, quarter = _make_field_arrays(year, quarter)
|
| 1280 |
+
for y, q in zip(year, quarter):
|
| 1281 |
+
calendar_year, calendar_month = parsing.quarter_to_myear(y, q, freqstr)
|
| 1282 |
+
val = libperiod.period_ordinal(
|
| 1283 |
+
calendar_year, calendar_month, 1, 1, 1, 1, 0, 0, base
|
| 1284 |
+
)
|
| 1285 |
+
ordinals.append(val)
|
| 1286 |
+
else:
|
| 1287 |
+
freq = to_offset(freq, is_period=True)
|
| 1288 |
+
base = libperiod.freq_to_dtype_code(freq)
|
| 1289 |
+
arrays = _make_field_arrays(year, month, day, hour, minute, second)
|
| 1290 |
+
for y, mth, d, h, mn, s in zip(*arrays):
|
| 1291 |
+
ordinals.append(libperiod.period_ordinal(y, mth, d, h, mn, s, 0, 0, base))
|
| 1292 |
+
|
| 1293 |
+
return np.array(ordinals, dtype=np.int64), freq
|
| 1294 |
+
|
| 1295 |
+
|
| 1296 |
+
def _make_field_arrays(*fields) -> list[np.ndarray]:
|
| 1297 |
+
length = None
|
| 1298 |
+
for x in fields:
|
| 1299 |
+
if isinstance(x, (list, np.ndarray, ABCSeries)):
|
| 1300 |
+
if length is not None and len(x) != length:
|
| 1301 |
+
raise ValueError("Mismatched Period array lengths")
|
| 1302 |
+
if length is None:
|
| 1303 |
+
length = len(x)
|
| 1304 |
+
|
| 1305 |
+
# error: Argument 2 to "repeat" has incompatible type "Optional[int]"; expected
|
| 1306 |
+
# "Union[Union[int, integer[Any]], Union[bool, bool_], ndarray, Sequence[Union[int,
|
| 1307 |
+
# integer[Any]]], Sequence[Union[bool, bool_]], Sequence[Sequence[Any]]]"
|
| 1308 |
+
return [
|
| 1309 |
+
np.asarray(x)
|
| 1310 |
+
if isinstance(x, (np.ndarray, list, ABCSeries))
|
| 1311 |
+
else np.repeat(x, length) # type: ignore[arg-type]
|
| 1312 |
+
for x in fields
|
| 1313 |
+
]
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/arrays/string_.py
ADDED
|
@@ -0,0 +1,657 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from typing import (
|
| 4 |
+
TYPE_CHECKING,
|
| 5 |
+
ClassVar,
|
| 6 |
+
Literal,
|
| 7 |
+
)
|
| 8 |
+
|
| 9 |
+
import numpy as np
|
| 10 |
+
|
| 11 |
+
from pandas._config import get_option
|
| 12 |
+
|
| 13 |
+
from pandas._libs import (
|
| 14 |
+
lib,
|
| 15 |
+
missing as libmissing,
|
| 16 |
+
)
|
| 17 |
+
from pandas._libs.arrays import NDArrayBacked
|
| 18 |
+
from pandas._libs.lib import ensure_string_array
|
| 19 |
+
from pandas.compat import pa_version_under10p1
|
| 20 |
+
from pandas.compat.numpy import function as nv
|
| 21 |
+
from pandas.util._decorators import doc
|
| 22 |
+
|
| 23 |
+
from pandas.core.dtypes.base import (
|
| 24 |
+
ExtensionDtype,
|
| 25 |
+
StorageExtensionDtype,
|
| 26 |
+
register_extension_dtype,
|
| 27 |
+
)
|
| 28 |
+
from pandas.core.dtypes.common import (
|
| 29 |
+
is_array_like,
|
| 30 |
+
is_bool_dtype,
|
| 31 |
+
is_integer_dtype,
|
| 32 |
+
is_object_dtype,
|
| 33 |
+
is_string_dtype,
|
| 34 |
+
pandas_dtype,
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
from pandas.core import ops
|
| 38 |
+
from pandas.core.array_algos import masked_reductions
|
| 39 |
+
from pandas.core.arrays.base import ExtensionArray
|
| 40 |
+
from pandas.core.arrays.floating import (
|
| 41 |
+
FloatingArray,
|
| 42 |
+
FloatingDtype,
|
| 43 |
+
)
|
| 44 |
+
from pandas.core.arrays.integer import (
|
| 45 |
+
IntegerArray,
|
| 46 |
+
IntegerDtype,
|
| 47 |
+
)
|
| 48 |
+
from pandas.core.arrays.numpy_ import NumpyExtensionArray
|
| 49 |
+
from pandas.core.construction import extract_array
|
| 50 |
+
from pandas.core.indexers import check_array_indexer
|
| 51 |
+
from pandas.core.missing import isna
|
| 52 |
+
|
| 53 |
+
if TYPE_CHECKING:
|
| 54 |
+
import pyarrow
|
| 55 |
+
|
| 56 |
+
from pandas._typing import (
|
| 57 |
+
AxisInt,
|
| 58 |
+
Dtype,
|
| 59 |
+
DtypeObj,
|
| 60 |
+
NumpySorter,
|
| 61 |
+
NumpyValueArrayLike,
|
| 62 |
+
Scalar,
|
| 63 |
+
Self,
|
| 64 |
+
npt,
|
| 65 |
+
type_t,
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
from pandas import Series
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
@register_extension_dtype
|
| 72 |
+
class StringDtype(StorageExtensionDtype):
|
| 73 |
+
"""
|
| 74 |
+
Extension dtype for string data.
|
| 75 |
+
|
| 76 |
+
.. warning::
|
| 77 |
+
|
| 78 |
+
StringDtype is considered experimental. The implementation and
|
| 79 |
+
parts of the API may change without warning.
|
| 80 |
+
|
| 81 |
+
Parameters
|
| 82 |
+
----------
|
| 83 |
+
storage : {"python", "pyarrow", "pyarrow_numpy"}, optional
|
| 84 |
+
If not given, the value of ``pd.options.mode.string_storage``.
|
| 85 |
+
|
| 86 |
+
Attributes
|
| 87 |
+
----------
|
| 88 |
+
None
|
| 89 |
+
|
| 90 |
+
Methods
|
| 91 |
+
-------
|
| 92 |
+
None
|
| 93 |
+
|
| 94 |
+
Examples
|
| 95 |
+
--------
|
| 96 |
+
>>> pd.StringDtype()
|
| 97 |
+
string[python]
|
| 98 |
+
|
| 99 |
+
>>> pd.StringDtype(storage="pyarrow")
|
| 100 |
+
string[pyarrow]
|
| 101 |
+
"""
|
| 102 |
+
|
| 103 |
+
# error: Cannot override instance variable (previously declared on
|
| 104 |
+
# base class "StorageExtensionDtype") with class variable
|
| 105 |
+
name: ClassVar[str] = "string" # type: ignore[misc]
|
| 106 |
+
|
| 107 |
+
#: StringDtype().na_value uses pandas.NA except the implementation that
|
| 108 |
+
# follows NumPy semantics, which uses nan.
|
| 109 |
+
@property
|
| 110 |
+
def na_value(self) -> libmissing.NAType | float: # type: ignore[override]
|
| 111 |
+
if self.storage == "pyarrow_numpy":
|
| 112 |
+
return np.nan
|
| 113 |
+
else:
|
| 114 |
+
return libmissing.NA
|
| 115 |
+
|
| 116 |
+
_metadata = ("storage",)
|
| 117 |
+
|
| 118 |
+
def __init__(self, storage=None) -> None:
|
| 119 |
+
if storage is None:
|
| 120 |
+
infer_string = get_option("future.infer_string")
|
| 121 |
+
if infer_string:
|
| 122 |
+
storage = "pyarrow_numpy"
|
| 123 |
+
else:
|
| 124 |
+
storage = get_option("mode.string_storage")
|
| 125 |
+
if storage not in {"python", "pyarrow", "pyarrow_numpy"}:
|
| 126 |
+
raise ValueError(
|
| 127 |
+
f"Storage must be 'python', 'pyarrow' or 'pyarrow_numpy'. "
|
| 128 |
+
f"Got {storage} instead."
|
| 129 |
+
)
|
| 130 |
+
if storage in ("pyarrow", "pyarrow_numpy") and pa_version_under10p1:
|
| 131 |
+
raise ImportError(
|
| 132 |
+
"pyarrow>=10.0.1 is required for PyArrow backed StringArray."
|
| 133 |
+
)
|
| 134 |
+
self.storage = storage
|
| 135 |
+
|
| 136 |
+
@property
|
| 137 |
+
def type(self) -> type[str]:
|
| 138 |
+
return str
|
| 139 |
+
|
| 140 |
+
@classmethod
|
| 141 |
+
def construct_from_string(cls, string) -> Self:
|
| 142 |
+
"""
|
| 143 |
+
Construct a StringDtype from a string.
|
| 144 |
+
|
| 145 |
+
Parameters
|
| 146 |
+
----------
|
| 147 |
+
string : str
|
| 148 |
+
The type of the name. The storage type will be taking from `string`.
|
| 149 |
+
Valid options and their storage types are
|
| 150 |
+
|
| 151 |
+
========================== ==============================================
|
| 152 |
+
string result storage
|
| 153 |
+
========================== ==============================================
|
| 154 |
+
``'string'`` pd.options.mode.string_storage, default python
|
| 155 |
+
``'string[python]'`` python
|
| 156 |
+
``'string[pyarrow]'`` pyarrow
|
| 157 |
+
========================== ==============================================
|
| 158 |
+
|
| 159 |
+
Returns
|
| 160 |
+
-------
|
| 161 |
+
StringDtype
|
| 162 |
+
|
| 163 |
+
Raise
|
| 164 |
+
-----
|
| 165 |
+
TypeError
|
| 166 |
+
If the string is not a valid option.
|
| 167 |
+
"""
|
| 168 |
+
if not isinstance(string, str):
|
| 169 |
+
raise TypeError(
|
| 170 |
+
f"'construct_from_string' expects a string, got {type(string)}"
|
| 171 |
+
)
|
| 172 |
+
if string == "string":
|
| 173 |
+
return cls()
|
| 174 |
+
elif string == "string[python]":
|
| 175 |
+
return cls(storage="python")
|
| 176 |
+
elif string == "string[pyarrow]":
|
| 177 |
+
return cls(storage="pyarrow")
|
| 178 |
+
elif string == "string[pyarrow_numpy]":
|
| 179 |
+
return cls(storage="pyarrow_numpy")
|
| 180 |
+
else:
|
| 181 |
+
raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'")
|
| 182 |
+
|
| 183 |
+
# https://github.com/pandas-dev/pandas/issues/36126
|
| 184 |
+
# error: Signature of "construct_array_type" incompatible with supertype
|
| 185 |
+
# "ExtensionDtype"
|
| 186 |
+
def construct_array_type( # type: ignore[override]
|
| 187 |
+
self,
|
| 188 |
+
) -> type_t[BaseStringArray]:
|
| 189 |
+
"""
|
| 190 |
+
Return the array type associated with this dtype.
|
| 191 |
+
|
| 192 |
+
Returns
|
| 193 |
+
-------
|
| 194 |
+
type
|
| 195 |
+
"""
|
| 196 |
+
from pandas.core.arrays.string_arrow import (
|
| 197 |
+
ArrowStringArray,
|
| 198 |
+
ArrowStringArrayNumpySemantics,
|
| 199 |
+
)
|
| 200 |
+
|
| 201 |
+
if self.storage == "python":
|
| 202 |
+
return StringArray
|
| 203 |
+
elif self.storage == "pyarrow":
|
| 204 |
+
return ArrowStringArray
|
| 205 |
+
else:
|
| 206 |
+
return ArrowStringArrayNumpySemantics
|
| 207 |
+
|
| 208 |
+
def __from_arrow__(
|
| 209 |
+
self, array: pyarrow.Array | pyarrow.ChunkedArray
|
| 210 |
+
) -> BaseStringArray:
|
| 211 |
+
"""
|
| 212 |
+
Construct StringArray from pyarrow Array/ChunkedArray.
|
| 213 |
+
"""
|
| 214 |
+
if self.storage == "pyarrow":
|
| 215 |
+
from pandas.core.arrays.string_arrow import ArrowStringArray
|
| 216 |
+
|
| 217 |
+
return ArrowStringArray(array)
|
| 218 |
+
elif self.storage == "pyarrow_numpy":
|
| 219 |
+
from pandas.core.arrays.string_arrow import ArrowStringArrayNumpySemantics
|
| 220 |
+
|
| 221 |
+
return ArrowStringArrayNumpySemantics(array)
|
| 222 |
+
else:
|
| 223 |
+
import pyarrow
|
| 224 |
+
|
| 225 |
+
if isinstance(array, pyarrow.Array):
|
| 226 |
+
chunks = [array]
|
| 227 |
+
else:
|
| 228 |
+
# pyarrow.ChunkedArray
|
| 229 |
+
chunks = array.chunks
|
| 230 |
+
|
| 231 |
+
results = []
|
| 232 |
+
for arr in chunks:
|
| 233 |
+
# convert chunk by chunk to numpy and concatenate then, to avoid
|
| 234 |
+
# overflow for large string data when concatenating the pyarrow arrays
|
| 235 |
+
arr = arr.to_numpy(zero_copy_only=False)
|
| 236 |
+
arr = ensure_string_array(arr, na_value=libmissing.NA)
|
| 237 |
+
results.append(arr)
|
| 238 |
+
|
| 239 |
+
if len(chunks) == 0:
|
| 240 |
+
arr = np.array([], dtype=object)
|
| 241 |
+
else:
|
| 242 |
+
arr = np.concatenate(results)
|
| 243 |
+
|
| 244 |
+
# Bypass validation inside StringArray constructor, see GH#47781
|
| 245 |
+
new_string_array = StringArray.__new__(StringArray)
|
| 246 |
+
NDArrayBacked.__init__(
|
| 247 |
+
new_string_array,
|
| 248 |
+
arr,
|
| 249 |
+
StringDtype(storage="python"),
|
| 250 |
+
)
|
| 251 |
+
return new_string_array
|
| 252 |
+
|
| 253 |
+
|
| 254 |
+
class BaseStringArray(ExtensionArray):
|
| 255 |
+
"""
|
| 256 |
+
Mixin class for StringArray, ArrowStringArray.
|
| 257 |
+
"""
|
| 258 |
+
|
| 259 |
+
@doc(ExtensionArray.tolist)
|
| 260 |
+
def tolist(self):
|
| 261 |
+
if self.ndim > 1:
|
| 262 |
+
return [x.tolist() for x in self]
|
| 263 |
+
return list(self.to_numpy())
|
| 264 |
+
|
| 265 |
+
@classmethod
|
| 266 |
+
def _from_scalars(cls, scalars, dtype: DtypeObj) -> Self:
|
| 267 |
+
if lib.infer_dtype(scalars, skipna=True) not in ["string", "empty"]:
|
| 268 |
+
# TODO: require any NAs be valid-for-string
|
| 269 |
+
raise ValueError
|
| 270 |
+
return cls._from_sequence(scalars, dtype=dtype)
|
| 271 |
+
|
| 272 |
+
|
| 273 |
+
# error: Definition of "_concat_same_type" in base class "NDArrayBacked" is
|
| 274 |
+
# incompatible with definition in base class "ExtensionArray"
|
| 275 |
+
class StringArray(BaseStringArray, NumpyExtensionArray): # type: ignore[misc]
|
| 276 |
+
"""
|
| 277 |
+
Extension array for string data.
|
| 278 |
+
|
| 279 |
+
.. warning::
|
| 280 |
+
|
| 281 |
+
StringArray is considered experimental. The implementation and
|
| 282 |
+
parts of the API may change without warning.
|
| 283 |
+
|
| 284 |
+
Parameters
|
| 285 |
+
----------
|
| 286 |
+
values : array-like
|
| 287 |
+
The array of data.
|
| 288 |
+
|
| 289 |
+
.. warning::
|
| 290 |
+
|
| 291 |
+
Currently, this expects an object-dtype ndarray
|
| 292 |
+
where the elements are Python strings
|
| 293 |
+
or nan-likes (``None``, ``np.nan``, ``NA``).
|
| 294 |
+
This may change without warning in the future. Use
|
| 295 |
+
:meth:`pandas.array` with ``dtype="string"`` for a stable way of
|
| 296 |
+
creating a `StringArray` from any sequence.
|
| 297 |
+
|
| 298 |
+
.. versionchanged:: 1.5.0
|
| 299 |
+
|
| 300 |
+
StringArray now accepts array-likes containing
|
| 301 |
+
nan-likes(``None``, ``np.nan``) for the ``values`` parameter
|
| 302 |
+
in addition to strings and :attr:`pandas.NA`
|
| 303 |
+
|
| 304 |
+
copy : bool, default False
|
| 305 |
+
Whether to copy the array of data.
|
| 306 |
+
|
| 307 |
+
Attributes
|
| 308 |
+
----------
|
| 309 |
+
None
|
| 310 |
+
|
| 311 |
+
Methods
|
| 312 |
+
-------
|
| 313 |
+
None
|
| 314 |
+
|
| 315 |
+
See Also
|
| 316 |
+
--------
|
| 317 |
+
:func:`pandas.array`
|
| 318 |
+
The recommended function for creating a StringArray.
|
| 319 |
+
Series.str
|
| 320 |
+
The string methods are available on Series backed by
|
| 321 |
+
a StringArray.
|
| 322 |
+
|
| 323 |
+
Notes
|
| 324 |
+
-----
|
| 325 |
+
StringArray returns a BooleanArray for comparison methods.
|
| 326 |
+
|
| 327 |
+
Examples
|
| 328 |
+
--------
|
| 329 |
+
>>> pd.array(['This is', 'some text', None, 'data.'], dtype="string")
|
| 330 |
+
<StringArray>
|
| 331 |
+
['This is', 'some text', <NA>, 'data.']
|
| 332 |
+
Length: 4, dtype: string
|
| 333 |
+
|
| 334 |
+
Unlike arrays instantiated with ``dtype="object"``, ``StringArray``
|
| 335 |
+
will convert the values to strings.
|
| 336 |
+
|
| 337 |
+
>>> pd.array(['1', 1], dtype="object")
|
| 338 |
+
<NumpyExtensionArray>
|
| 339 |
+
['1', 1]
|
| 340 |
+
Length: 2, dtype: object
|
| 341 |
+
>>> pd.array(['1', 1], dtype="string")
|
| 342 |
+
<StringArray>
|
| 343 |
+
['1', '1']
|
| 344 |
+
Length: 2, dtype: string
|
| 345 |
+
|
| 346 |
+
However, instantiating StringArrays directly with non-strings will raise an error.
|
| 347 |
+
|
| 348 |
+
For comparison methods, `StringArray` returns a :class:`pandas.BooleanArray`:
|
| 349 |
+
|
| 350 |
+
>>> pd.array(["a", None, "c"], dtype="string") == "a"
|
| 351 |
+
<BooleanArray>
|
| 352 |
+
[True, <NA>, False]
|
| 353 |
+
Length: 3, dtype: boolean
|
| 354 |
+
"""
|
| 355 |
+
|
| 356 |
+
# undo the NumpyExtensionArray hack
|
| 357 |
+
_typ = "extension"
|
| 358 |
+
|
| 359 |
+
def __init__(self, values, copy: bool = False) -> None:
|
| 360 |
+
values = extract_array(values)
|
| 361 |
+
|
| 362 |
+
super().__init__(values, copy=copy)
|
| 363 |
+
if not isinstance(values, type(self)):
|
| 364 |
+
self._validate()
|
| 365 |
+
NDArrayBacked.__init__(self, self._ndarray, StringDtype(storage="python"))
|
| 366 |
+
|
| 367 |
+
def _validate(self):
|
| 368 |
+
"""Validate that we only store NA or strings."""
|
| 369 |
+
if len(self._ndarray) and not lib.is_string_array(self._ndarray, skipna=True):
|
| 370 |
+
raise ValueError("StringArray requires a sequence of strings or pandas.NA")
|
| 371 |
+
if self._ndarray.dtype != "object":
|
| 372 |
+
raise ValueError(
|
| 373 |
+
"StringArray requires a sequence of strings or pandas.NA. Got "
|
| 374 |
+
f"'{self._ndarray.dtype}' dtype instead."
|
| 375 |
+
)
|
| 376 |
+
# Check to see if need to convert Na values to pd.NA
|
| 377 |
+
if self._ndarray.ndim > 2:
|
| 378 |
+
# Ravel if ndims > 2 b/c no cythonized version available
|
| 379 |
+
lib.convert_nans_to_NA(self._ndarray.ravel("K"))
|
| 380 |
+
else:
|
| 381 |
+
lib.convert_nans_to_NA(self._ndarray)
|
| 382 |
+
|
| 383 |
+
@classmethod
|
| 384 |
+
def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = False):
|
| 385 |
+
if dtype and not (isinstance(dtype, str) and dtype == "string"):
|
| 386 |
+
dtype = pandas_dtype(dtype)
|
| 387 |
+
assert isinstance(dtype, StringDtype) and dtype.storage == "python"
|
| 388 |
+
|
| 389 |
+
from pandas.core.arrays.masked import BaseMaskedArray
|
| 390 |
+
|
| 391 |
+
if isinstance(scalars, BaseMaskedArray):
|
| 392 |
+
# avoid costly conversion to object dtype
|
| 393 |
+
na_values = scalars._mask
|
| 394 |
+
result = scalars._data
|
| 395 |
+
result = lib.ensure_string_array(result, copy=copy, convert_na_value=False)
|
| 396 |
+
result[na_values] = libmissing.NA
|
| 397 |
+
|
| 398 |
+
else:
|
| 399 |
+
if lib.is_pyarrow_array(scalars):
|
| 400 |
+
# pyarrow array; we cannot rely on the "to_numpy" check in
|
| 401 |
+
# ensure_string_array because calling scalars.to_numpy would set
|
| 402 |
+
# zero_copy_only to True which caused problems see GH#52076
|
| 403 |
+
scalars = np.array(scalars)
|
| 404 |
+
# convert non-na-likes to str, and nan-likes to StringDtype().na_value
|
| 405 |
+
result = lib.ensure_string_array(scalars, na_value=libmissing.NA, copy=copy)
|
| 406 |
+
|
| 407 |
+
# Manually creating new array avoids the validation step in the __init__, so is
|
| 408 |
+
# faster. Refactor need for validation?
|
| 409 |
+
new_string_array = cls.__new__(cls)
|
| 410 |
+
NDArrayBacked.__init__(new_string_array, result, StringDtype(storage="python"))
|
| 411 |
+
|
| 412 |
+
return new_string_array
|
| 413 |
+
|
| 414 |
+
@classmethod
|
| 415 |
+
def _from_sequence_of_strings(
|
| 416 |
+
cls, strings, *, dtype: Dtype | None = None, copy: bool = False
|
| 417 |
+
):
|
| 418 |
+
return cls._from_sequence(strings, dtype=dtype, copy=copy)
|
| 419 |
+
|
| 420 |
+
@classmethod
|
| 421 |
+
def _empty(cls, shape, dtype) -> StringArray:
|
| 422 |
+
values = np.empty(shape, dtype=object)
|
| 423 |
+
values[:] = libmissing.NA
|
| 424 |
+
return cls(values).astype(dtype, copy=False)
|
| 425 |
+
|
| 426 |
+
def __arrow_array__(self, type=None):
|
| 427 |
+
"""
|
| 428 |
+
Convert myself into a pyarrow Array.
|
| 429 |
+
"""
|
| 430 |
+
import pyarrow as pa
|
| 431 |
+
|
| 432 |
+
if type is None:
|
| 433 |
+
type = pa.string()
|
| 434 |
+
|
| 435 |
+
values = self._ndarray.copy()
|
| 436 |
+
values[self.isna()] = None
|
| 437 |
+
return pa.array(values, type=type, from_pandas=True)
|
| 438 |
+
|
| 439 |
+
def _values_for_factorize(self):
|
| 440 |
+
arr = self._ndarray.copy()
|
| 441 |
+
mask = self.isna()
|
| 442 |
+
arr[mask] = None
|
| 443 |
+
return arr, None
|
| 444 |
+
|
| 445 |
+
def __setitem__(self, key, value) -> None:
|
| 446 |
+
value = extract_array(value, extract_numpy=True)
|
| 447 |
+
if isinstance(value, type(self)):
|
| 448 |
+
# extract_array doesn't extract NumpyExtensionArray subclasses
|
| 449 |
+
value = value._ndarray
|
| 450 |
+
|
| 451 |
+
key = check_array_indexer(self, key)
|
| 452 |
+
scalar_key = lib.is_scalar(key)
|
| 453 |
+
scalar_value = lib.is_scalar(value)
|
| 454 |
+
if scalar_key and not scalar_value:
|
| 455 |
+
raise ValueError("setting an array element with a sequence.")
|
| 456 |
+
|
| 457 |
+
# validate new items
|
| 458 |
+
if scalar_value:
|
| 459 |
+
if isna(value):
|
| 460 |
+
value = libmissing.NA
|
| 461 |
+
elif not isinstance(value, str):
|
| 462 |
+
raise TypeError(
|
| 463 |
+
f"Cannot set non-string value '{value}' into a StringArray."
|
| 464 |
+
)
|
| 465 |
+
else:
|
| 466 |
+
if not is_array_like(value):
|
| 467 |
+
value = np.asarray(value, dtype=object)
|
| 468 |
+
if len(value) and not lib.is_string_array(value, skipna=True):
|
| 469 |
+
raise TypeError("Must provide strings.")
|
| 470 |
+
|
| 471 |
+
mask = isna(value)
|
| 472 |
+
if mask.any():
|
| 473 |
+
value = value.copy()
|
| 474 |
+
value[isna(value)] = libmissing.NA
|
| 475 |
+
|
| 476 |
+
super().__setitem__(key, value)
|
| 477 |
+
|
| 478 |
+
def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None:
|
| 479 |
+
# the super() method NDArrayBackedExtensionArray._putmask uses
|
| 480 |
+
# np.putmask which doesn't properly handle None/pd.NA, so using the
|
| 481 |
+
# base class implementation that uses __setitem__
|
| 482 |
+
ExtensionArray._putmask(self, mask, value)
|
| 483 |
+
|
| 484 |
+
def astype(self, dtype, copy: bool = True):
|
| 485 |
+
dtype = pandas_dtype(dtype)
|
| 486 |
+
|
| 487 |
+
if dtype == self.dtype:
|
| 488 |
+
if copy:
|
| 489 |
+
return self.copy()
|
| 490 |
+
return self
|
| 491 |
+
|
| 492 |
+
elif isinstance(dtype, IntegerDtype):
|
| 493 |
+
arr = self._ndarray.copy()
|
| 494 |
+
mask = self.isna()
|
| 495 |
+
arr[mask] = 0
|
| 496 |
+
values = arr.astype(dtype.numpy_dtype)
|
| 497 |
+
return IntegerArray(values, mask, copy=False)
|
| 498 |
+
elif isinstance(dtype, FloatingDtype):
|
| 499 |
+
arr = self.copy()
|
| 500 |
+
mask = self.isna()
|
| 501 |
+
arr[mask] = "0"
|
| 502 |
+
values = arr.astype(dtype.numpy_dtype)
|
| 503 |
+
return FloatingArray(values, mask, copy=False)
|
| 504 |
+
elif isinstance(dtype, ExtensionDtype):
|
| 505 |
+
# Skip the NumpyExtensionArray.astype method
|
| 506 |
+
return ExtensionArray.astype(self, dtype, copy)
|
| 507 |
+
elif np.issubdtype(dtype, np.floating):
|
| 508 |
+
arr = self._ndarray.copy()
|
| 509 |
+
mask = self.isna()
|
| 510 |
+
arr[mask] = 0
|
| 511 |
+
values = arr.astype(dtype)
|
| 512 |
+
values[mask] = np.nan
|
| 513 |
+
return values
|
| 514 |
+
|
| 515 |
+
return super().astype(dtype, copy)
|
| 516 |
+
|
| 517 |
+
def _reduce(
|
| 518 |
+
self, name: str, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs
|
| 519 |
+
):
|
| 520 |
+
if name in ["min", "max"]:
|
| 521 |
+
return getattr(self, name)(skipna=skipna, axis=axis)
|
| 522 |
+
|
| 523 |
+
raise TypeError(f"Cannot perform reduction '{name}' with string dtype")
|
| 524 |
+
|
| 525 |
+
def min(self, axis=None, skipna: bool = True, **kwargs) -> Scalar:
|
| 526 |
+
nv.validate_min((), kwargs)
|
| 527 |
+
result = masked_reductions.min(
|
| 528 |
+
values=self.to_numpy(), mask=self.isna(), skipna=skipna
|
| 529 |
+
)
|
| 530 |
+
return self._wrap_reduction_result(axis, result)
|
| 531 |
+
|
| 532 |
+
def max(self, axis=None, skipna: bool = True, **kwargs) -> Scalar:
|
| 533 |
+
nv.validate_max((), kwargs)
|
| 534 |
+
result = masked_reductions.max(
|
| 535 |
+
values=self.to_numpy(), mask=self.isna(), skipna=skipna
|
| 536 |
+
)
|
| 537 |
+
return self._wrap_reduction_result(axis, result)
|
| 538 |
+
|
| 539 |
+
def value_counts(self, dropna: bool = True) -> Series:
|
| 540 |
+
from pandas.core.algorithms import value_counts_internal as value_counts
|
| 541 |
+
|
| 542 |
+
result = value_counts(self._ndarray, dropna=dropna).astype("Int64")
|
| 543 |
+
result.index = result.index.astype(self.dtype)
|
| 544 |
+
return result
|
| 545 |
+
|
| 546 |
+
def memory_usage(self, deep: bool = False) -> int:
|
| 547 |
+
result = self._ndarray.nbytes
|
| 548 |
+
if deep:
|
| 549 |
+
return result + lib.memory_usage_of_objects(self._ndarray)
|
| 550 |
+
return result
|
| 551 |
+
|
| 552 |
+
@doc(ExtensionArray.searchsorted)
|
| 553 |
+
def searchsorted(
|
| 554 |
+
self,
|
| 555 |
+
value: NumpyValueArrayLike | ExtensionArray,
|
| 556 |
+
side: Literal["left", "right"] = "left",
|
| 557 |
+
sorter: NumpySorter | None = None,
|
| 558 |
+
) -> npt.NDArray[np.intp] | np.intp:
|
| 559 |
+
if self._hasna:
|
| 560 |
+
raise ValueError(
|
| 561 |
+
"searchsorted requires array to be sorted, which is impossible "
|
| 562 |
+
"with NAs present."
|
| 563 |
+
)
|
| 564 |
+
return super().searchsorted(value=value, side=side, sorter=sorter)
|
| 565 |
+
|
| 566 |
+
def _cmp_method(self, other, op):
|
| 567 |
+
from pandas.arrays import BooleanArray
|
| 568 |
+
|
| 569 |
+
if isinstance(other, StringArray):
|
| 570 |
+
other = other._ndarray
|
| 571 |
+
|
| 572 |
+
mask = isna(self) | isna(other)
|
| 573 |
+
valid = ~mask
|
| 574 |
+
|
| 575 |
+
if not lib.is_scalar(other):
|
| 576 |
+
if len(other) != len(self):
|
| 577 |
+
# prevent improper broadcasting when other is 2D
|
| 578 |
+
raise ValueError(
|
| 579 |
+
f"Lengths of operands do not match: {len(self)} != {len(other)}"
|
| 580 |
+
)
|
| 581 |
+
|
| 582 |
+
other = np.asarray(other)
|
| 583 |
+
other = other[valid]
|
| 584 |
+
|
| 585 |
+
if op.__name__ in ops.ARITHMETIC_BINOPS:
|
| 586 |
+
result = np.empty_like(self._ndarray, dtype="object")
|
| 587 |
+
result[mask] = libmissing.NA
|
| 588 |
+
result[valid] = op(self._ndarray[valid], other)
|
| 589 |
+
return StringArray(result)
|
| 590 |
+
else:
|
| 591 |
+
# logical
|
| 592 |
+
result = np.zeros(len(self._ndarray), dtype="bool")
|
| 593 |
+
result[valid] = op(self._ndarray[valid], other)
|
| 594 |
+
return BooleanArray(result, mask)
|
| 595 |
+
|
| 596 |
+
_arith_method = _cmp_method
|
| 597 |
+
|
| 598 |
+
# ------------------------------------------------------------------------
|
| 599 |
+
# String methods interface
|
| 600 |
+
# error: Incompatible types in assignment (expression has type "NAType",
|
| 601 |
+
# base class "NumpyExtensionArray" defined the type as "float")
|
| 602 |
+
_str_na_value = libmissing.NA # type: ignore[assignment]
|
| 603 |
+
|
| 604 |
+
def _str_map(
|
| 605 |
+
self, f, na_value=None, dtype: Dtype | None = None, convert: bool = True
|
| 606 |
+
):
|
| 607 |
+
from pandas.arrays import BooleanArray
|
| 608 |
+
|
| 609 |
+
if dtype is None:
|
| 610 |
+
dtype = StringDtype(storage="python")
|
| 611 |
+
if na_value is None:
|
| 612 |
+
na_value = self.dtype.na_value
|
| 613 |
+
|
| 614 |
+
mask = isna(self)
|
| 615 |
+
arr = np.asarray(self)
|
| 616 |
+
|
| 617 |
+
if is_integer_dtype(dtype) or is_bool_dtype(dtype):
|
| 618 |
+
constructor: type[IntegerArray | BooleanArray]
|
| 619 |
+
if is_integer_dtype(dtype):
|
| 620 |
+
constructor = IntegerArray
|
| 621 |
+
else:
|
| 622 |
+
constructor = BooleanArray
|
| 623 |
+
|
| 624 |
+
na_value_is_na = isna(na_value)
|
| 625 |
+
if na_value_is_na:
|
| 626 |
+
na_value = 1
|
| 627 |
+
elif dtype == np.dtype("bool"):
|
| 628 |
+
na_value = bool(na_value)
|
| 629 |
+
result = lib.map_infer_mask(
|
| 630 |
+
arr,
|
| 631 |
+
f,
|
| 632 |
+
mask.view("uint8"),
|
| 633 |
+
convert=False,
|
| 634 |
+
na_value=na_value,
|
| 635 |
+
# error: Argument 1 to "dtype" has incompatible type
|
| 636 |
+
# "Union[ExtensionDtype, str, dtype[Any], Type[object]]"; expected
|
| 637 |
+
# "Type[object]"
|
| 638 |
+
dtype=np.dtype(dtype), # type: ignore[arg-type]
|
| 639 |
+
)
|
| 640 |
+
|
| 641 |
+
if not na_value_is_na:
|
| 642 |
+
mask[:] = False
|
| 643 |
+
|
| 644 |
+
return constructor(result, mask)
|
| 645 |
+
|
| 646 |
+
elif is_string_dtype(dtype) and not is_object_dtype(dtype):
|
| 647 |
+
# i.e. StringDtype
|
| 648 |
+
result = lib.map_infer_mask(
|
| 649 |
+
arr, f, mask.view("uint8"), convert=False, na_value=na_value
|
| 650 |
+
)
|
| 651 |
+
return StringArray(result)
|
| 652 |
+
else:
|
| 653 |
+
# This is when the result type is object. We reach this when
|
| 654 |
+
# -> We know the result type is truly object (e.g. .encode returns bytes
|
| 655 |
+
# or .findall returns a list).
|
| 656 |
+
# -> We don't know the result type. E.g. `.get` can return anything.
|
| 657 |
+
return lib.map_infer_mask(arr, f, mask.view("uint8"))
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/arrays/string_arrow.py
ADDED
|
@@ -0,0 +1,719 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from functools import partial
|
| 4 |
+
import operator
|
| 5 |
+
import re
|
| 6 |
+
from typing import (
|
| 7 |
+
TYPE_CHECKING,
|
| 8 |
+
Callable,
|
| 9 |
+
Union,
|
| 10 |
+
)
|
| 11 |
+
import warnings
|
| 12 |
+
|
| 13 |
+
import numpy as np
|
| 14 |
+
|
| 15 |
+
from pandas._libs import (
|
| 16 |
+
lib,
|
| 17 |
+
missing as libmissing,
|
| 18 |
+
)
|
| 19 |
+
from pandas.compat import (
|
| 20 |
+
pa_version_under10p1,
|
| 21 |
+
pa_version_under13p0,
|
| 22 |
+
)
|
| 23 |
+
from pandas.util._exceptions import find_stack_level
|
| 24 |
+
|
| 25 |
+
from pandas.core.dtypes.common import (
|
| 26 |
+
is_bool_dtype,
|
| 27 |
+
is_integer_dtype,
|
| 28 |
+
is_object_dtype,
|
| 29 |
+
is_scalar,
|
| 30 |
+
is_string_dtype,
|
| 31 |
+
pandas_dtype,
|
| 32 |
+
)
|
| 33 |
+
from pandas.core.dtypes.missing import isna
|
| 34 |
+
|
| 35 |
+
from pandas.core.arrays._arrow_string_mixins import ArrowStringArrayMixin
|
| 36 |
+
from pandas.core.arrays.arrow import ArrowExtensionArray
|
| 37 |
+
from pandas.core.arrays.boolean import BooleanDtype
|
| 38 |
+
from pandas.core.arrays.integer import Int64Dtype
|
| 39 |
+
from pandas.core.arrays.numeric import NumericDtype
|
| 40 |
+
from pandas.core.arrays.string_ import (
|
| 41 |
+
BaseStringArray,
|
| 42 |
+
StringDtype,
|
| 43 |
+
)
|
| 44 |
+
from pandas.core.ops import invalid_comparison
|
| 45 |
+
from pandas.core.strings.object_array import ObjectStringArrayMixin
|
| 46 |
+
|
| 47 |
+
if not pa_version_under10p1:
|
| 48 |
+
import pyarrow as pa
|
| 49 |
+
import pyarrow.compute as pc
|
| 50 |
+
|
| 51 |
+
from pandas.core.arrays.arrow._arrow_utils import fallback_performancewarning
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
if TYPE_CHECKING:
|
| 55 |
+
from collections.abc import Sequence
|
| 56 |
+
|
| 57 |
+
from pandas._typing import (
|
| 58 |
+
ArrayLike,
|
| 59 |
+
AxisInt,
|
| 60 |
+
Dtype,
|
| 61 |
+
Scalar,
|
| 62 |
+
npt,
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
from pandas import Series
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
ArrowStringScalarOrNAT = Union[str, libmissing.NAType]
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def _chk_pyarrow_available() -> None:
|
| 72 |
+
if pa_version_under10p1:
|
| 73 |
+
msg = "pyarrow>=10.0.1 is required for PyArrow backed ArrowExtensionArray."
|
| 74 |
+
raise ImportError(msg)
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
# TODO: Inherit directly from BaseStringArrayMethods. Currently we inherit from
|
| 78 |
+
# ObjectStringArrayMixin because we want to have the object-dtype based methods as
|
| 79 |
+
# fallback for the ones that pyarrow doesn't yet support
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
class ArrowStringArray(ObjectStringArrayMixin, ArrowExtensionArray, BaseStringArray):
|
| 83 |
+
"""
|
| 84 |
+
Extension array for string data in a ``pyarrow.ChunkedArray``.
|
| 85 |
+
|
| 86 |
+
.. warning::
|
| 87 |
+
|
| 88 |
+
ArrowStringArray is considered experimental. The implementation and
|
| 89 |
+
parts of the API may change without warning.
|
| 90 |
+
|
| 91 |
+
Parameters
|
| 92 |
+
----------
|
| 93 |
+
values : pyarrow.Array or pyarrow.ChunkedArray
|
| 94 |
+
The array of data.
|
| 95 |
+
|
| 96 |
+
Attributes
|
| 97 |
+
----------
|
| 98 |
+
None
|
| 99 |
+
|
| 100 |
+
Methods
|
| 101 |
+
-------
|
| 102 |
+
None
|
| 103 |
+
|
| 104 |
+
See Also
|
| 105 |
+
--------
|
| 106 |
+
:func:`pandas.array`
|
| 107 |
+
The recommended function for creating a ArrowStringArray.
|
| 108 |
+
Series.str
|
| 109 |
+
The string methods are available on Series backed by
|
| 110 |
+
a ArrowStringArray.
|
| 111 |
+
|
| 112 |
+
Notes
|
| 113 |
+
-----
|
| 114 |
+
ArrowStringArray returns a BooleanArray for comparison methods.
|
| 115 |
+
|
| 116 |
+
Examples
|
| 117 |
+
--------
|
| 118 |
+
>>> pd.array(['This is', 'some text', None, 'data.'], dtype="string[pyarrow]")
|
| 119 |
+
<ArrowStringArray>
|
| 120 |
+
['This is', 'some text', <NA>, 'data.']
|
| 121 |
+
Length: 4, dtype: string
|
| 122 |
+
"""
|
| 123 |
+
|
| 124 |
+
# error: Incompatible types in assignment (expression has type "StringDtype",
|
| 125 |
+
# base class "ArrowExtensionArray" defined the type as "ArrowDtype")
|
| 126 |
+
_dtype: StringDtype # type: ignore[assignment]
|
| 127 |
+
_storage = "pyarrow"
|
| 128 |
+
|
| 129 |
+
def __init__(self, values) -> None:
|
| 130 |
+
_chk_pyarrow_available()
|
| 131 |
+
if isinstance(values, (pa.Array, pa.ChunkedArray)) and pa.types.is_string(
|
| 132 |
+
values.type
|
| 133 |
+
):
|
| 134 |
+
values = pc.cast(values, pa.large_string())
|
| 135 |
+
|
| 136 |
+
super().__init__(values)
|
| 137 |
+
self._dtype = StringDtype(storage=self._storage)
|
| 138 |
+
|
| 139 |
+
if not pa.types.is_large_string(self._pa_array.type) and not (
|
| 140 |
+
pa.types.is_dictionary(self._pa_array.type)
|
| 141 |
+
and pa.types.is_large_string(self._pa_array.type.value_type)
|
| 142 |
+
):
|
| 143 |
+
raise ValueError(
|
| 144 |
+
"ArrowStringArray requires a PyArrow (chunked) array of "
|
| 145 |
+
"large_string type"
|
| 146 |
+
)
|
| 147 |
+
|
| 148 |
+
@classmethod
|
| 149 |
+
def _box_pa_scalar(cls, value, pa_type: pa.DataType | None = None) -> pa.Scalar:
|
| 150 |
+
pa_scalar = super()._box_pa_scalar(value, pa_type)
|
| 151 |
+
if pa.types.is_string(pa_scalar.type) and pa_type is None:
|
| 152 |
+
pa_scalar = pc.cast(pa_scalar, pa.large_string())
|
| 153 |
+
return pa_scalar
|
| 154 |
+
|
| 155 |
+
@classmethod
|
| 156 |
+
def _box_pa_array(
|
| 157 |
+
cls, value, pa_type: pa.DataType | None = None, copy: bool = False
|
| 158 |
+
) -> pa.Array | pa.ChunkedArray:
|
| 159 |
+
pa_array = super()._box_pa_array(value, pa_type)
|
| 160 |
+
if pa.types.is_string(pa_array.type) and pa_type is None:
|
| 161 |
+
pa_array = pc.cast(pa_array, pa.large_string())
|
| 162 |
+
return pa_array
|
| 163 |
+
|
| 164 |
+
def __len__(self) -> int:
|
| 165 |
+
"""
|
| 166 |
+
Length of this array.
|
| 167 |
+
|
| 168 |
+
Returns
|
| 169 |
+
-------
|
| 170 |
+
length : int
|
| 171 |
+
"""
|
| 172 |
+
return len(self._pa_array)
|
| 173 |
+
|
| 174 |
+
@classmethod
|
| 175 |
+
def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = False):
|
| 176 |
+
from pandas.core.arrays.masked import BaseMaskedArray
|
| 177 |
+
|
| 178 |
+
_chk_pyarrow_available()
|
| 179 |
+
|
| 180 |
+
if dtype and not (isinstance(dtype, str) and dtype == "string"):
|
| 181 |
+
dtype = pandas_dtype(dtype)
|
| 182 |
+
assert isinstance(dtype, StringDtype) and dtype.storage in (
|
| 183 |
+
"pyarrow",
|
| 184 |
+
"pyarrow_numpy",
|
| 185 |
+
)
|
| 186 |
+
|
| 187 |
+
if isinstance(scalars, BaseMaskedArray):
|
| 188 |
+
# avoid costly conversion to object dtype in ensure_string_array and
|
| 189 |
+
# numerical issues with Float32Dtype
|
| 190 |
+
na_values = scalars._mask
|
| 191 |
+
result = scalars._data
|
| 192 |
+
result = lib.ensure_string_array(result, copy=copy, convert_na_value=False)
|
| 193 |
+
return cls(pa.array(result, mask=na_values, type=pa.large_string()))
|
| 194 |
+
elif isinstance(scalars, (pa.Array, pa.ChunkedArray)):
|
| 195 |
+
return cls(pc.cast(scalars, pa.large_string()))
|
| 196 |
+
|
| 197 |
+
# convert non-na-likes to str
|
| 198 |
+
result = lib.ensure_string_array(scalars, copy=copy)
|
| 199 |
+
return cls(pa.array(result, type=pa.large_string(), from_pandas=True))
|
| 200 |
+
|
| 201 |
+
@classmethod
|
| 202 |
+
def _from_sequence_of_strings(
|
| 203 |
+
cls, strings, dtype: Dtype | None = None, copy: bool = False
|
| 204 |
+
):
|
| 205 |
+
return cls._from_sequence(strings, dtype=dtype, copy=copy)
|
| 206 |
+
|
| 207 |
+
@property
|
| 208 |
+
def dtype(self) -> StringDtype: # type: ignore[override]
|
| 209 |
+
"""
|
| 210 |
+
An instance of 'string[pyarrow]'.
|
| 211 |
+
"""
|
| 212 |
+
return self._dtype
|
| 213 |
+
|
| 214 |
+
def insert(self, loc: int, item) -> ArrowStringArray:
|
| 215 |
+
if not isinstance(item, str) and item is not libmissing.NA:
|
| 216 |
+
raise TypeError("Scalar must be NA or str")
|
| 217 |
+
return super().insert(loc, item)
|
| 218 |
+
|
| 219 |
+
@classmethod
|
| 220 |
+
def _result_converter(cls, values, na=None):
|
| 221 |
+
return BooleanDtype().__from_arrow__(values)
|
| 222 |
+
|
| 223 |
+
def _maybe_convert_setitem_value(self, value):
|
| 224 |
+
"""Maybe convert value to be pyarrow compatible."""
|
| 225 |
+
if is_scalar(value):
|
| 226 |
+
if isna(value):
|
| 227 |
+
value = None
|
| 228 |
+
elif not isinstance(value, str):
|
| 229 |
+
raise TypeError("Scalar must be NA or str")
|
| 230 |
+
else:
|
| 231 |
+
value = np.array(value, dtype=object, copy=True)
|
| 232 |
+
value[isna(value)] = None
|
| 233 |
+
for v in value:
|
| 234 |
+
if not (v is None or isinstance(v, str)):
|
| 235 |
+
raise TypeError("Scalar must be NA or str")
|
| 236 |
+
return super()._maybe_convert_setitem_value(value)
|
| 237 |
+
|
| 238 |
+
def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
|
| 239 |
+
value_set = [
|
| 240 |
+
pa_scalar.as_py()
|
| 241 |
+
for pa_scalar in [pa.scalar(value, from_pandas=True) for value in values]
|
| 242 |
+
if pa_scalar.type in (pa.string(), pa.null(), pa.large_string())
|
| 243 |
+
]
|
| 244 |
+
|
| 245 |
+
# short-circuit to return all False array.
|
| 246 |
+
if not len(value_set):
|
| 247 |
+
return np.zeros(len(self), dtype=bool)
|
| 248 |
+
|
| 249 |
+
result = pc.is_in(
|
| 250 |
+
self._pa_array, value_set=pa.array(value_set, type=self._pa_array.type)
|
| 251 |
+
)
|
| 252 |
+
# pyarrow 2.0.0 returned nulls, so we explicily specify dtype to convert nulls
|
| 253 |
+
# to False
|
| 254 |
+
return np.array(result, dtype=np.bool_)
|
| 255 |
+
|
| 256 |
+
def astype(self, dtype, copy: bool = True):
|
| 257 |
+
dtype = pandas_dtype(dtype)
|
| 258 |
+
|
| 259 |
+
if dtype == self.dtype:
|
| 260 |
+
if copy:
|
| 261 |
+
return self.copy()
|
| 262 |
+
return self
|
| 263 |
+
elif isinstance(dtype, NumericDtype):
|
| 264 |
+
data = self._pa_array.cast(pa.from_numpy_dtype(dtype.numpy_dtype))
|
| 265 |
+
return dtype.__from_arrow__(data)
|
| 266 |
+
elif isinstance(dtype, np.dtype) and np.issubdtype(dtype, np.floating):
|
| 267 |
+
return self.to_numpy(dtype=dtype, na_value=np.nan)
|
| 268 |
+
|
| 269 |
+
return super().astype(dtype, copy=copy)
|
| 270 |
+
|
| 271 |
+
@property
|
| 272 |
+
def _data(self):
|
| 273 |
+
# dask accesses ._data directlys
|
| 274 |
+
warnings.warn(
|
| 275 |
+
f"{type(self).__name__}._data is a deprecated and will be removed "
|
| 276 |
+
"in a future version, use ._pa_array instead",
|
| 277 |
+
FutureWarning,
|
| 278 |
+
stacklevel=find_stack_level(),
|
| 279 |
+
)
|
| 280 |
+
return self._pa_array
|
| 281 |
+
|
| 282 |
+
# ------------------------------------------------------------------------
|
| 283 |
+
# String methods interface
|
| 284 |
+
|
| 285 |
+
# error: Incompatible types in assignment (expression has type "NAType",
|
| 286 |
+
# base class "ObjectStringArrayMixin" defined the type as "float")
|
| 287 |
+
_str_na_value = libmissing.NA # type: ignore[assignment]
|
| 288 |
+
|
| 289 |
+
def _str_map(
|
| 290 |
+
self, f, na_value=None, dtype: Dtype | None = None, convert: bool = True
|
| 291 |
+
):
|
| 292 |
+
# TODO: de-duplicate with StringArray method. This method is moreless copy and
|
| 293 |
+
# paste.
|
| 294 |
+
|
| 295 |
+
from pandas.arrays import (
|
| 296 |
+
BooleanArray,
|
| 297 |
+
IntegerArray,
|
| 298 |
+
)
|
| 299 |
+
|
| 300 |
+
if dtype is None:
|
| 301 |
+
dtype = self.dtype
|
| 302 |
+
if na_value is None:
|
| 303 |
+
na_value = self.dtype.na_value
|
| 304 |
+
|
| 305 |
+
mask = isna(self)
|
| 306 |
+
arr = np.asarray(self)
|
| 307 |
+
|
| 308 |
+
if is_integer_dtype(dtype) or is_bool_dtype(dtype):
|
| 309 |
+
constructor: type[IntegerArray | BooleanArray]
|
| 310 |
+
if is_integer_dtype(dtype):
|
| 311 |
+
constructor = IntegerArray
|
| 312 |
+
else:
|
| 313 |
+
constructor = BooleanArray
|
| 314 |
+
|
| 315 |
+
na_value_is_na = isna(na_value)
|
| 316 |
+
if na_value_is_na:
|
| 317 |
+
na_value = 1
|
| 318 |
+
result = lib.map_infer_mask(
|
| 319 |
+
arr,
|
| 320 |
+
f,
|
| 321 |
+
mask.view("uint8"),
|
| 322 |
+
convert=False,
|
| 323 |
+
na_value=na_value,
|
| 324 |
+
# error: Argument 1 to "dtype" has incompatible type
|
| 325 |
+
# "Union[ExtensionDtype, str, dtype[Any], Type[object]]"; expected
|
| 326 |
+
# "Type[object]"
|
| 327 |
+
dtype=np.dtype(dtype), # type: ignore[arg-type]
|
| 328 |
+
)
|
| 329 |
+
|
| 330 |
+
if not na_value_is_na:
|
| 331 |
+
mask[:] = False
|
| 332 |
+
|
| 333 |
+
return constructor(result, mask)
|
| 334 |
+
|
| 335 |
+
elif is_string_dtype(dtype) and not is_object_dtype(dtype):
|
| 336 |
+
# i.e. StringDtype
|
| 337 |
+
result = lib.map_infer_mask(
|
| 338 |
+
arr, f, mask.view("uint8"), convert=False, na_value=na_value
|
| 339 |
+
)
|
| 340 |
+
result = pa.array(
|
| 341 |
+
result, mask=mask, type=pa.large_string(), from_pandas=True
|
| 342 |
+
)
|
| 343 |
+
return type(self)(result)
|
| 344 |
+
else:
|
| 345 |
+
# This is when the result type is object. We reach this when
|
| 346 |
+
# -> We know the result type is truly object (e.g. .encode returns bytes
|
| 347 |
+
# or .findall returns a list).
|
| 348 |
+
# -> We don't know the result type. E.g. `.get` can return anything.
|
| 349 |
+
return lib.map_infer_mask(arr, f, mask.view("uint8"))
|
| 350 |
+
|
| 351 |
+
def _str_contains(
|
| 352 |
+
self, pat, case: bool = True, flags: int = 0, na=np.nan, regex: bool = True
|
| 353 |
+
):
|
| 354 |
+
if flags:
|
| 355 |
+
fallback_performancewarning()
|
| 356 |
+
return super()._str_contains(pat, case, flags, na, regex)
|
| 357 |
+
|
| 358 |
+
if regex:
|
| 359 |
+
result = pc.match_substring_regex(self._pa_array, pat, ignore_case=not case)
|
| 360 |
+
else:
|
| 361 |
+
result = pc.match_substring(self._pa_array, pat, ignore_case=not case)
|
| 362 |
+
result = self._result_converter(result, na=na)
|
| 363 |
+
if not isna(na):
|
| 364 |
+
result[isna(result)] = bool(na)
|
| 365 |
+
return result
|
| 366 |
+
|
| 367 |
+
def _str_startswith(self, pat: str | tuple[str, ...], na: Scalar | None = None):
|
| 368 |
+
if isinstance(pat, str):
|
| 369 |
+
result = pc.starts_with(self._pa_array, pattern=pat)
|
| 370 |
+
else:
|
| 371 |
+
if len(pat) == 0:
|
| 372 |
+
# mimic existing behaviour of string extension array
|
| 373 |
+
# and python string method
|
| 374 |
+
result = pa.array(
|
| 375 |
+
np.zeros(len(self._pa_array), dtype=bool), mask=isna(self._pa_array)
|
| 376 |
+
)
|
| 377 |
+
else:
|
| 378 |
+
result = pc.starts_with(self._pa_array, pattern=pat[0])
|
| 379 |
+
|
| 380 |
+
for p in pat[1:]:
|
| 381 |
+
result = pc.or_(result, pc.starts_with(self._pa_array, pattern=p))
|
| 382 |
+
if not isna(na):
|
| 383 |
+
result = result.fill_null(na)
|
| 384 |
+
return self._result_converter(result)
|
| 385 |
+
|
| 386 |
+
def _str_endswith(self, pat: str | tuple[str, ...], na: Scalar | None = None):
|
| 387 |
+
if isinstance(pat, str):
|
| 388 |
+
result = pc.ends_with(self._pa_array, pattern=pat)
|
| 389 |
+
else:
|
| 390 |
+
if len(pat) == 0:
|
| 391 |
+
# mimic existing behaviour of string extension array
|
| 392 |
+
# and python string method
|
| 393 |
+
result = pa.array(
|
| 394 |
+
np.zeros(len(self._pa_array), dtype=bool), mask=isna(self._pa_array)
|
| 395 |
+
)
|
| 396 |
+
else:
|
| 397 |
+
result = pc.ends_with(self._pa_array, pattern=pat[0])
|
| 398 |
+
|
| 399 |
+
for p in pat[1:]:
|
| 400 |
+
result = pc.or_(result, pc.ends_with(self._pa_array, pattern=p))
|
| 401 |
+
if not isna(na):
|
| 402 |
+
result = result.fill_null(na)
|
| 403 |
+
return self._result_converter(result)
|
| 404 |
+
|
| 405 |
+
def _str_replace(
|
| 406 |
+
self,
|
| 407 |
+
pat: str | re.Pattern,
|
| 408 |
+
repl: str | Callable,
|
| 409 |
+
n: int = -1,
|
| 410 |
+
case: bool = True,
|
| 411 |
+
flags: int = 0,
|
| 412 |
+
regex: bool = True,
|
| 413 |
+
):
|
| 414 |
+
if isinstance(pat, re.Pattern) or callable(repl) or not case or flags:
|
| 415 |
+
fallback_performancewarning()
|
| 416 |
+
return super()._str_replace(pat, repl, n, case, flags, regex)
|
| 417 |
+
|
| 418 |
+
func = pc.replace_substring_regex if regex else pc.replace_substring
|
| 419 |
+
result = func(self._pa_array, pattern=pat, replacement=repl, max_replacements=n)
|
| 420 |
+
return type(self)(result)
|
| 421 |
+
|
| 422 |
+
def _str_repeat(self, repeats: int | Sequence[int]):
|
| 423 |
+
if not isinstance(repeats, int):
|
| 424 |
+
return super()._str_repeat(repeats)
|
| 425 |
+
else:
|
| 426 |
+
return type(self)(pc.binary_repeat(self._pa_array, repeats))
|
| 427 |
+
|
| 428 |
+
def _str_match(
|
| 429 |
+
self, pat: str, case: bool = True, flags: int = 0, na: Scalar | None = None
|
| 430 |
+
):
|
| 431 |
+
if not pat.startswith("^"):
|
| 432 |
+
pat = f"^{pat}"
|
| 433 |
+
return self._str_contains(pat, case, flags, na, regex=True)
|
| 434 |
+
|
| 435 |
+
def _str_fullmatch(
|
| 436 |
+
self, pat, case: bool = True, flags: int = 0, na: Scalar | None = None
|
| 437 |
+
):
|
| 438 |
+
if not pat.endswith("$") or pat.endswith("\\$"):
|
| 439 |
+
pat = f"{pat}$"
|
| 440 |
+
return self._str_match(pat, case, flags, na)
|
| 441 |
+
|
| 442 |
+
def _str_slice(
|
| 443 |
+
self, start: int | None = None, stop: int | None = None, step: int | None = None
|
| 444 |
+
):
|
| 445 |
+
if stop is None:
|
| 446 |
+
return super()._str_slice(start, stop, step)
|
| 447 |
+
if start is None:
|
| 448 |
+
start = 0
|
| 449 |
+
if step is None:
|
| 450 |
+
step = 1
|
| 451 |
+
return type(self)(
|
| 452 |
+
pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step)
|
| 453 |
+
)
|
| 454 |
+
|
| 455 |
+
def _str_isalnum(self):
|
| 456 |
+
result = pc.utf8_is_alnum(self._pa_array)
|
| 457 |
+
return self._result_converter(result)
|
| 458 |
+
|
| 459 |
+
def _str_isalpha(self):
|
| 460 |
+
result = pc.utf8_is_alpha(self._pa_array)
|
| 461 |
+
return self._result_converter(result)
|
| 462 |
+
|
| 463 |
+
def _str_isdecimal(self):
|
| 464 |
+
result = pc.utf8_is_decimal(self._pa_array)
|
| 465 |
+
return self._result_converter(result)
|
| 466 |
+
|
| 467 |
+
def _str_isdigit(self):
|
| 468 |
+
result = pc.utf8_is_digit(self._pa_array)
|
| 469 |
+
return self._result_converter(result)
|
| 470 |
+
|
| 471 |
+
def _str_islower(self):
|
| 472 |
+
result = pc.utf8_is_lower(self._pa_array)
|
| 473 |
+
return self._result_converter(result)
|
| 474 |
+
|
| 475 |
+
def _str_isnumeric(self):
|
| 476 |
+
result = pc.utf8_is_numeric(self._pa_array)
|
| 477 |
+
return self._result_converter(result)
|
| 478 |
+
|
| 479 |
+
def _str_isspace(self):
|
| 480 |
+
result = pc.utf8_is_space(self._pa_array)
|
| 481 |
+
return self._result_converter(result)
|
| 482 |
+
|
| 483 |
+
def _str_istitle(self):
|
| 484 |
+
result = pc.utf8_is_title(self._pa_array)
|
| 485 |
+
return self._result_converter(result)
|
| 486 |
+
|
| 487 |
+
def _str_isupper(self):
|
| 488 |
+
result = pc.utf8_is_upper(self._pa_array)
|
| 489 |
+
return self._result_converter(result)
|
| 490 |
+
|
| 491 |
+
def _str_len(self):
|
| 492 |
+
result = pc.utf8_length(self._pa_array)
|
| 493 |
+
return self._convert_int_dtype(result)
|
| 494 |
+
|
| 495 |
+
def _str_lower(self):
|
| 496 |
+
return type(self)(pc.utf8_lower(self._pa_array))
|
| 497 |
+
|
| 498 |
+
def _str_upper(self):
|
| 499 |
+
return type(self)(pc.utf8_upper(self._pa_array))
|
| 500 |
+
|
| 501 |
+
def _str_strip(self, to_strip=None):
|
| 502 |
+
if to_strip is None:
|
| 503 |
+
result = pc.utf8_trim_whitespace(self._pa_array)
|
| 504 |
+
else:
|
| 505 |
+
result = pc.utf8_trim(self._pa_array, characters=to_strip)
|
| 506 |
+
return type(self)(result)
|
| 507 |
+
|
| 508 |
+
def _str_lstrip(self, to_strip=None):
|
| 509 |
+
if to_strip is None:
|
| 510 |
+
result = pc.utf8_ltrim_whitespace(self._pa_array)
|
| 511 |
+
else:
|
| 512 |
+
result = pc.utf8_ltrim(self._pa_array, characters=to_strip)
|
| 513 |
+
return type(self)(result)
|
| 514 |
+
|
| 515 |
+
def _str_rstrip(self, to_strip=None):
|
| 516 |
+
if to_strip is None:
|
| 517 |
+
result = pc.utf8_rtrim_whitespace(self._pa_array)
|
| 518 |
+
else:
|
| 519 |
+
result = pc.utf8_rtrim(self._pa_array, characters=to_strip)
|
| 520 |
+
return type(self)(result)
|
| 521 |
+
|
| 522 |
+
def _str_removeprefix(self, prefix: str):
|
| 523 |
+
if not pa_version_under13p0:
|
| 524 |
+
starts_with = pc.starts_with(self._pa_array, pattern=prefix)
|
| 525 |
+
removed = pc.utf8_slice_codeunits(self._pa_array, len(prefix))
|
| 526 |
+
result = pc.if_else(starts_with, removed, self._pa_array)
|
| 527 |
+
return type(self)(result)
|
| 528 |
+
return super()._str_removeprefix(prefix)
|
| 529 |
+
|
| 530 |
+
def _str_removesuffix(self, suffix: str):
|
| 531 |
+
ends_with = pc.ends_with(self._pa_array, pattern=suffix)
|
| 532 |
+
removed = pc.utf8_slice_codeunits(self._pa_array, 0, stop=-len(suffix))
|
| 533 |
+
result = pc.if_else(ends_with, removed, self._pa_array)
|
| 534 |
+
return type(self)(result)
|
| 535 |
+
|
| 536 |
+
def _str_count(self, pat: str, flags: int = 0):
|
| 537 |
+
if flags:
|
| 538 |
+
return super()._str_count(pat, flags)
|
| 539 |
+
result = pc.count_substring_regex(self._pa_array, pat)
|
| 540 |
+
return self._convert_int_dtype(result)
|
| 541 |
+
|
| 542 |
+
def _str_find(self, sub: str, start: int = 0, end: int | None = None):
|
| 543 |
+
if start != 0 and end is not None:
|
| 544 |
+
slices = pc.utf8_slice_codeunits(self._pa_array, start, stop=end)
|
| 545 |
+
result = pc.find_substring(slices, sub)
|
| 546 |
+
not_found = pc.equal(result, -1)
|
| 547 |
+
offset_result = pc.add(result, end - start)
|
| 548 |
+
result = pc.if_else(not_found, result, offset_result)
|
| 549 |
+
elif start == 0 and end is None:
|
| 550 |
+
slices = self._pa_array
|
| 551 |
+
result = pc.find_substring(slices, sub)
|
| 552 |
+
else:
|
| 553 |
+
return super()._str_find(sub, start, end)
|
| 554 |
+
return self._convert_int_dtype(result)
|
| 555 |
+
|
| 556 |
+
def _str_get_dummies(self, sep: str = "|"):
|
| 557 |
+
dummies_pa, labels = ArrowExtensionArray(self._pa_array)._str_get_dummies(sep)
|
| 558 |
+
if len(labels) == 0:
|
| 559 |
+
return np.empty(shape=(0, 0), dtype=np.int64), labels
|
| 560 |
+
dummies = np.vstack(dummies_pa.to_numpy())
|
| 561 |
+
return dummies.astype(np.int64, copy=False), labels
|
| 562 |
+
|
| 563 |
+
def _convert_int_dtype(self, result):
|
| 564 |
+
return Int64Dtype().__from_arrow__(result)
|
| 565 |
+
|
| 566 |
+
def _reduce(
|
| 567 |
+
self, name: str, *, skipna: bool = True, keepdims: bool = False, **kwargs
|
| 568 |
+
):
|
| 569 |
+
result = self._reduce_calc(name, skipna=skipna, keepdims=keepdims, **kwargs)
|
| 570 |
+
if name in ("argmin", "argmax") and isinstance(result, pa.Array):
|
| 571 |
+
return self._convert_int_dtype(result)
|
| 572 |
+
elif isinstance(result, pa.Array):
|
| 573 |
+
return type(self)(result)
|
| 574 |
+
else:
|
| 575 |
+
return result
|
| 576 |
+
|
| 577 |
+
def _rank(
|
| 578 |
+
self,
|
| 579 |
+
*,
|
| 580 |
+
axis: AxisInt = 0,
|
| 581 |
+
method: str = "average",
|
| 582 |
+
na_option: str = "keep",
|
| 583 |
+
ascending: bool = True,
|
| 584 |
+
pct: bool = False,
|
| 585 |
+
):
|
| 586 |
+
"""
|
| 587 |
+
See Series.rank.__doc__.
|
| 588 |
+
"""
|
| 589 |
+
return self._convert_int_dtype(
|
| 590 |
+
self._rank_calc(
|
| 591 |
+
axis=axis,
|
| 592 |
+
method=method,
|
| 593 |
+
na_option=na_option,
|
| 594 |
+
ascending=ascending,
|
| 595 |
+
pct=pct,
|
| 596 |
+
)
|
| 597 |
+
)
|
| 598 |
+
|
| 599 |
+
|
| 600 |
+
class ArrowStringArrayNumpySemantics(ArrowStringArray):
|
| 601 |
+
_storage = "pyarrow_numpy"
|
| 602 |
+
|
| 603 |
+
@classmethod
|
| 604 |
+
def _result_converter(cls, values, na=None):
|
| 605 |
+
if not isna(na):
|
| 606 |
+
values = values.fill_null(bool(na))
|
| 607 |
+
return ArrowExtensionArray(values).to_numpy(na_value=np.nan)
|
| 608 |
+
|
| 609 |
+
def __getattribute__(self, item):
|
| 610 |
+
# ArrowStringArray and we both inherit from ArrowExtensionArray, which
|
| 611 |
+
# creates inheritance problems (Diamond inheritance)
|
| 612 |
+
if item in ArrowStringArrayMixin.__dict__ and item not in (
|
| 613 |
+
"_pa_array",
|
| 614 |
+
"__dict__",
|
| 615 |
+
):
|
| 616 |
+
return partial(getattr(ArrowStringArrayMixin, item), self)
|
| 617 |
+
return super().__getattribute__(item)
|
| 618 |
+
|
| 619 |
+
def _str_map(
|
| 620 |
+
self, f, na_value=None, dtype: Dtype | None = None, convert: bool = True
|
| 621 |
+
):
|
| 622 |
+
if dtype is None:
|
| 623 |
+
dtype = self.dtype
|
| 624 |
+
if na_value is None:
|
| 625 |
+
na_value = self.dtype.na_value
|
| 626 |
+
|
| 627 |
+
mask = isna(self)
|
| 628 |
+
arr = np.asarray(self)
|
| 629 |
+
|
| 630 |
+
if is_integer_dtype(dtype) or is_bool_dtype(dtype):
|
| 631 |
+
if is_integer_dtype(dtype):
|
| 632 |
+
na_value = np.nan
|
| 633 |
+
else:
|
| 634 |
+
na_value = False
|
| 635 |
+
try:
|
| 636 |
+
result = lib.map_infer_mask(
|
| 637 |
+
arr,
|
| 638 |
+
f,
|
| 639 |
+
mask.view("uint8"),
|
| 640 |
+
convert=False,
|
| 641 |
+
na_value=na_value,
|
| 642 |
+
dtype=np.dtype(dtype), # type: ignore[arg-type]
|
| 643 |
+
)
|
| 644 |
+
return result
|
| 645 |
+
|
| 646 |
+
except ValueError:
|
| 647 |
+
result = lib.map_infer_mask(
|
| 648 |
+
arr,
|
| 649 |
+
f,
|
| 650 |
+
mask.view("uint8"),
|
| 651 |
+
convert=False,
|
| 652 |
+
na_value=na_value,
|
| 653 |
+
)
|
| 654 |
+
if convert and result.dtype == object:
|
| 655 |
+
result = lib.maybe_convert_objects(result)
|
| 656 |
+
return result
|
| 657 |
+
|
| 658 |
+
elif is_string_dtype(dtype) and not is_object_dtype(dtype):
|
| 659 |
+
# i.e. StringDtype
|
| 660 |
+
result = lib.map_infer_mask(
|
| 661 |
+
arr, f, mask.view("uint8"), convert=False, na_value=na_value
|
| 662 |
+
)
|
| 663 |
+
result = pa.array(
|
| 664 |
+
result, mask=mask, type=pa.large_string(), from_pandas=True
|
| 665 |
+
)
|
| 666 |
+
return type(self)(result)
|
| 667 |
+
else:
|
| 668 |
+
# This is when the result type is object. We reach this when
|
| 669 |
+
# -> We know the result type is truly object (e.g. .encode returns bytes
|
| 670 |
+
# or .findall returns a list).
|
| 671 |
+
# -> We don't know the result type. E.g. `.get` can return anything.
|
| 672 |
+
return lib.map_infer_mask(arr, f, mask.view("uint8"))
|
| 673 |
+
|
| 674 |
+
def _convert_int_dtype(self, result):
|
| 675 |
+
if isinstance(result, pa.Array):
|
| 676 |
+
result = result.to_numpy(zero_copy_only=False)
|
| 677 |
+
else:
|
| 678 |
+
result = result.to_numpy()
|
| 679 |
+
if result.dtype == np.int32:
|
| 680 |
+
result = result.astype(np.int64)
|
| 681 |
+
return result
|
| 682 |
+
|
| 683 |
+
def _cmp_method(self, other, op):
|
| 684 |
+
try:
|
| 685 |
+
result = super()._cmp_method(other, op)
|
| 686 |
+
except pa.ArrowNotImplementedError:
|
| 687 |
+
return invalid_comparison(self, other, op)
|
| 688 |
+
if op == operator.ne:
|
| 689 |
+
return result.to_numpy(np.bool_, na_value=True)
|
| 690 |
+
else:
|
| 691 |
+
return result.to_numpy(np.bool_, na_value=False)
|
| 692 |
+
|
| 693 |
+
def value_counts(self, dropna: bool = True) -> Series:
|
| 694 |
+
from pandas import Series
|
| 695 |
+
|
| 696 |
+
result = super().value_counts(dropna)
|
| 697 |
+
return Series(
|
| 698 |
+
result._values.to_numpy(), index=result.index, name=result.name, copy=False
|
| 699 |
+
)
|
| 700 |
+
|
| 701 |
+
def _reduce(
|
| 702 |
+
self, name: str, *, skipna: bool = True, keepdims: bool = False, **kwargs
|
| 703 |
+
):
|
| 704 |
+
if name in ["any", "all"]:
|
| 705 |
+
if not skipna and name == "all":
|
| 706 |
+
nas = pc.invert(pc.is_null(self._pa_array))
|
| 707 |
+
arr = pc.and_kleene(nas, pc.not_equal(self._pa_array, ""))
|
| 708 |
+
else:
|
| 709 |
+
arr = pc.not_equal(self._pa_array, "")
|
| 710 |
+
return ArrowExtensionArray(arr)._reduce(
|
| 711 |
+
name, skipna=skipna, keepdims=keepdims, **kwargs
|
| 712 |
+
)
|
| 713 |
+
else:
|
| 714 |
+
return super()._reduce(name, skipna=skipna, keepdims=keepdims, **kwargs)
|
| 715 |
+
|
| 716 |
+
def insert(self, loc: int, item) -> ArrowStringArrayNumpySemantics:
|
| 717 |
+
if item is np.nan:
|
| 718 |
+
item = libmissing.NA
|
| 719 |
+
return super().insert(loc, item) # type: ignore[return-value]
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/arrays/timedeltas.py
ADDED
|
@@ -0,0 +1,1185 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from datetime import timedelta
|
| 4 |
+
import operator
|
| 5 |
+
from typing import (
|
| 6 |
+
TYPE_CHECKING,
|
| 7 |
+
cast,
|
| 8 |
+
)
|
| 9 |
+
|
| 10 |
+
import numpy as np
|
| 11 |
+
|
| 12 |
+
from pandas._libs import (
|
| 13 |
+
lib,
|
| 14 |
+
tslibs,
|
| 15 |
+
)
|
| 16 |
+
from pandas._libs.tslibs import (
|
| 17 |
+
NaT,
|
| 18 |
+
NaTType,
|
| 19 |
+
Tick,
|
| 20 |
+
Timedelta,
|
| 21 |
+
astype_overflowsafe,
|
| 22 |
+
get_supported_dtype,
|
| 23 |
+
iNaT,
|
| 24 |
+
is_supported_dtype,
|
| 25 |
+
periods_per_second,
|
| 26 |
+
)
|
| 27 |
+
from pandas._libs.tslibs.conversion import cast_from_unit_vectorized
|
| 28 |
+
from pandas._libs.tslibs.fields import (
|
| 29 |
+
get_timedelta_days,
|
| 30 |
+
get_timedelta_field,
|
| 31 |
+
)
|
| 32 |
+
from pandas._libs.tslibs.timedeltas import (
|
| 33 |
+
array_to_timedelta64,
|
| 34 |
+
floordiv_object_array,
|
| 35 |
+
ints_to_pytimedelta,
|
| 36 |
+
parse_timedelta_unit,
|
| 37 |
+
truediv_object_array,
|
| 38 |
+
)
|
| 39 |
+
from pandas.compat.numpy import function as nv
|
| 40 |
+
from pandas.util._validators import validate_endpoints
|
| 41 |
+
|
| 42 |
+
from pandas.core.dtypes.common import (
|
| 43 |
+
TD64NS_DTYPE,
|
| 44 |
+
is_float_dtype,
|
| 45 |
+
is_integer_dtype,
|
| 46 |
+
is_object_dtype,
|
| 47 |
+
is_scalar,
|
| 48 |
+
is_string_dtype,
|
| 49 |
+
pandas_dtype,
|
| 50 |
+
)
|
| 51 |
+
from pandas.core.dtypes.dtypes import ExtensionDtype
|
| 52 |
+
from pandas.core.dtypes.missing import isna
|
| 53 |
+
|
| 54 |
+
from pandas.core import (
|
| 55 |
+
nanops,
|
| 56 |
+
roperator,
|
| 57 |
+
)
|
| 58 |
+
from pandas.core.array_algos import datetimelike_accumulations
|
| 59 |
+
from pandas.core.arrays import datetimelike as dtl
|
| 60 |
+
from pandas.core.arrays._ranges import generate_regular_range
|
| 61 |
+
import pandas.core.common as com
|
| 62 |
+
from pandas.core.ops.common import unpack_zerodim_and_defer
|
| 63 |
+
|
| 64 |
+
if TYPE_CHECKING:
|
| 65 |
+
from collections.abc import Iterator
|
| 66 |
+
|
| 67 |
+
from pandas._typing import (
|
| 68 |
+
AxisInt,
|
| 69 |
+
DateTimeErrorChoices,
|
| 70 |
+
DtypeObj,
|
| 71 |
+
NpDtype,
|
| 72 |
+
Self,
|
| 73 |
+
npt,
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
from pandas import DataFrame
|
| 77 |
+
|
| 78 |
+
import textwrap
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def _field_accessor(name: str, alias: str, docstring: str):
|
| 82 |
+
def f(self) -> np.ndarray:
|
| 83 |
+
values = self.asi8
|
| 84 |
+
if alias == "days":
|
| 85 |
+
result = get_timedelta_days(values, reso=self._creso)
|
| 86 |
+
else:
|
| 87 |
+
# error: Incompatible types in assignment (
|
| 88 |
+
# expression has type "ndarray[Any, dtype[signedinteger[_32Bit]]]",
|
| 89 |
+
# variable has type "ndarray[Any, dtype[signedinteger[_64Bit]]]
|
| 90 |
+
result = get_timedelta_field(values, alias, reso=self._creso) # type: ignore[assignment]
|
| 91 |
+
if self._hasna:
|
| 92 |
+
result = self._maybe_mask_results(
|
| 93 |
+
result, fill_value=None, convert="float64"
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
return result
|
| 97 |
+
|
| 98 |
+
f.__name__ = name
|
| 99 |
+
f.__doc__ = f"\n{docstring}\n"
|
| 100 |
+
return property(f)
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
class TimedeltaArray(dtl.TimelikeOps):
|
| 104 |
+
"""
|
| 105 |
+
Pandas ExtensionArray for timedelta data.
|
| 106 |
+
|
| 107 |
+
.. warning::
|
| 108 |
+
|
| 109 |
+
TimedeltaArray is currently experimental, and its API may change
|
| 110 |
+
without warning. In particular, :attr:`TimedeltaArray.dtype` is
|
| 111 |
+
expected to change to be an instance of an ``ExtensionDtype``
|
| 112 |
+
subclass.
|
| 113 |
+
|
| 114 |
+
Parameters
|
| 115 |
+
----------
|
| 116 |
+
values : array-like
|
| 117 |
+
The timedelta data.
|
| 118 |
+
|
| 119 |
+
dtype : numpy.dtype
|
| 120 |
+
Currently, only ``numpy.dtype("timedelta64[ns]")`` is accepted.
|
| 121 |
+
freq : Offset, optional
|
| 122 |
+
copy : bool, default False
|
| 123 |
+
Whether to copy the underlying array of data.
|
| 124 |
+
|
| 125 |
+
Attributes
|
| 126 |
+
----------
|
| 127 |
+
None
|
| 128 |
+
|
| 129 |
+
Methods
|
| 130 |
+
-------
|
| 131 |
+
None
|
| 132 |
+
|
| 133 |
+
Examples
|
| 134 |
+
--------
|
| 135 |
+
>>> pd.arrays.TimedeltaArray._from_sequence(pd.TimedeltaIndex(['1h', '2h']))
|
| 136 |
+
<TimedeltaArray>
|
| 137 |
+
['0 days 01:00:00', '0 days 02:00:00']
|
| 138 |
+
Length: 2, dtype: timedelta64[ns]
|
| 139 |
+
"""
|
| 140 |
+
|
| 141 |
+
_typ = "timedeltaarray"
|
| 142 |
+
_internal_fill_value = np.timedelta64("NaT", "ns")
|
| 143 |
+
_recognized_scalars = (timedelta, np.timedelta64, Tick)
|
| 144 |
+
_is_recognized_dtype = lambda x: lib.is_np_dtype(x, "m")
|
| 145 |
+
_infer_matches = ("timedelta", "timedelta64")
|
| 146 |
+
|
| 147 |
+
@property
|
| 148 |
+
def _scalar_type(self) -> type[Timedelta]:
|
| 149 |
+
return Timedelta
|
| 150 |
+
|
| 151 |
+
__array_priority__ = 1000
|
| 152 |
+
# define my properties & methods for delegation
|
| 153 |
+
_other_ops: list[str] = []
|
| 154 |
+
_bool_ops: list[str] = []
|
| 155 |
+
_object_ops: list[str] = ["freq"]
|
| 156 |
+
_field_ops: list[str] = ["days", "seconds", "microseconds", "nanoseconds"]
|
| 157 |
+
_datetimelike_ops: list[str] = _field_ops + _object_ops + _bool_ops + ["unit"]
|
| 158 |
+
_datetimelike_methods: list[str] = [
|
| 159 |
+
"to_pytimedelta",
|
| 160 |
+
"total_seconds",
|
| 161 |
+
"round",
|
| 162 |
+
"floor",
|
| 163 |
+
"ceil",
|
| 164 |
+
"as_unit",
|
| 165 |
+
]
|
| 166 |
+
|
| 167 |
+
# Note: ndim must be defined to ensure NaT.__richcmp__(TimedeltaArray)
|
| 168 |
+
# operates pointwise.
|
| 169 |
+
|
| 170 |
+
def _box_func(self, x: np.timedelta64) -> Timedelta | NaTType:
|
| 171 |
+
y = x.view("i8")
|
| 172 |
+
if y == NaT._value:
|
| 173 |
+
return NaT
|
| 174 |
+
return Timedelta._from_value_and_reso(y, reso=self._creso)
|
| 175 |
+
|
| 176 |
+
@property
|
| 177 |
+
# error: Return type "dtype" of "dtype" incompatible with return type
|
| 178 |
+
# "ExtensionDtype" in supertype "ExtensionArray"
|
| 179 |
+
def dtype(self) -> np.dtype[np.timedelta64]: # type: ignore[override]
|
| 180 |
+
"""
|
| 181 |
+
The dtype for the TimedeltaArray.
|
| 182 |
+
|
| 183 |
+
.. warning::
|
| 184 |
+
|
| 185 |
+
A future version of pandas will change dtype to be an instance
|
| 186 |
+
of a :class:`pandas.api.extensions.ExtensionDtype` subclass,
|
| 187 |
+
not a ``numpy.dtype``.
|
| 188 |
+
|
| 189 |
+
Returns
|
| 190 |
+
-------
|
| 191 |
+
numpy.dtype
|
| 192 |
+
"""
|
| 193 |
+
return self._ndarray.dtype
|
| 194 |
+
|
| 195 |
+
# ----------------------------------------------------------------
|
| 196 |
+
# Constructors
|
| 197 |
+
|
| 198 |
+
_freq = None
|
| 199 |
+
_default_dtype = TD64NS_DTYPE # used in TimeLikeOps.__init__
|
| 200 |
+
|
| 201 |
+
@classmethod
|
| 202 |
+
def _validate_dtype(cls, values, dtype):
|
| 203 |
+
# used in TimeLikeOps.__init__
|
| 204 |
+
dtype = _validate_td64_dtype(dtype)
|
| 205 |
+
_validate_td64_dtype(values.dtype)
|
| 206 |
+
if dtype != values.dtype:
|
| 207 |
+
raise ValueError("Values resolution does not match dtype.")
|
| 208 |
+
return dtype
|
| 209 |
+
|
| 210 |
+
# error: Signature of "_simple_new" incompatible with supertype "NDArrayBacked"
|
| 211 |
+
@classmethod
|
| 212 |
+
def _simple_new( # type: ignore[override]
|
| 213 |
+
cls,
|
| 214 |
+
values: npt.NDArray[np.timedelta64],
|
| 215 |
+
freq: Tick | None = None,
|
| 216 |
+
dtype: np.dtype[np.timedelta64] = TD64NS_DTYPE,
|
| 217 |
+
) -> Self:
|
| 218 |
+
# Require td64 dtype, not unit-less, matching values.dtype
|
| 219 |
+
assert lib.is_np_dtype(dtype, "m")
|
| 220 |
+
assert not tslibs.is_unitless(dtype)
|
| 221 |
+
assert isinstance(values, np.ndarray), type(values)
|
| 222 |
+
assert dtype == values.dtype
|
| 223 |
+
assert freq is None or isinstance(freq, Tick)
|
| 224 |
+
|
| 225 |
+
result = super()._simple_new(values=values, dtype=dtype)
|
| 226 |
+
result._freq = freq
|
| 227 |
+
return result
|
| 228 |
+
|
| 229 |
+
@classmethod
|
| 230 |
+
def _from_sequence(cls, data, *, dtype=None, copy: bool = False) -> Self:
|
| 231 |
+
if dtype:
|
| 232 |
+
dtype = _validate_td64_dtype(dtype)
|
| 233 |
+
|
| 234 |
+
data, freq = sequence_to_td64ns(data, copy=copy, unit=None)
|
| 235 |
+
|
| 236 |
+
if dtype is not None:
|
| 237 |
+
data = astype_overflowsafe(data, dtype=dtype, copy=False)
|
| 238 |
+
|
| 239 |
+
return cls._simple_new(data, dtype=data.dtype, freq=freq)
|
| 240 |
+
|
| 241 |
+
@classmethod
|
| 242 |
+
def _from_sequence_not_strict(
|
| 243 |
+
cls,
|
| 244 |
+
data,
|
| 245 |
+
*,
|
| 246 |
+
dtype=None,
|
| 247 |
+
copy: bool = False,
|
| 248 |
+
freq=lib.no_default,
|
| 249 |
+
unit=None,
|
| 250 |
+
) -> Self:
|
| 251 |
+
"""
|
| 252 |
+
_from_sequence_not_strict but without responsibility for finding the
|
| 253 |
+
result's `freq`.
|
| 254 |
+
"""
|
| 255 |
+
if dtype:
|
| 256 |
+
dtype = _validate_td64_dtype(dtype)
|
| 257 |
+
|
| 258 |
+
assert unit not in ["Y", "y", "M"] # caller is responsible for checking
|
| 259 |
+
|
| 260 |
+
data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=unit)
|
| 261 |
+
|
| 262 |
+
if dtype is not None:
|
| 263 |
+
data = astype_overflowsafe(data, dtype=dtype, copy=False)
|
| 264 |
+
|
| 265 |
+
result = cls._simple_new(data, dtype=data.dtype, freq=inferred_freq)
|
| 266 |
+
|
| 267 |
+
result._maybe_pin_freq(freq, {})
|
| 268 |
+
return result
|
| 269 |
+
|
| 270 |
+
@classmethod
|
| 271 |
+
def _generate_range(
|
| 272 |
+
cls, start, end, periods, freq, closed=None, *, unit: str | None = None
|
| 273 |
+
) -> Self:
|
| 274 |
+
periods = dtl.validate_periods(periods)
|
| 275 |
+
if freq is None and any(x is None for x in [periods, start, end]):
|
| 276 |
+
raise ValueError("Must provide freq argument if no data is supplied")
|
| 277 |
+
|
| 278 |
+
if com.count_not_none(start, end, periods, freq) != 3:
|
| 279 |
+
raise ValueError(
|
| 280 |
+
"Of the four parameters: start, end, periods, "
|
| 281 |
+
"and freq, exactly three must be specified"
|
| 282 |
+
)
|
| 283 |
+
|
| 284 |
+
if start is not None:
|
| 285 |
+
start = Timedelta(start).as_unit("ns")
|
| 286 |
+
|
| 287 |
+
if end is not None:
|
| 288 |
+
end = Timedelta(end).as_unit("ns")
|
| 289 |
+
|
| 290 |
+
if unit is not None:
|
| 291 |
+
if unit not in ["s", "ms", "us", "ns"]:
|
| 292 |
+
raise ValueError("'unit' must be one of 's', 'ms', 'us', 'ns'")
|
| 293 |
+
else:
|
| 294 |
+
unit = "ns"
|
| 295 |
+
|
| 296 |
+
if start is not None and unit is not None:
|
| 297 |
+
start = start.as_unit(unit, round_ok=False)
|
| 298 |
+
if end is not None and unit is not None:
|
| 299 |
+
end = end.as_unit(unit, round_ok=False)
|
| 300 |
+
|
| 301 |
+
left_closed, right_closed = validate_endpoints(closed)
|
| 302 |
+
|
| 303 |
+
if freq is not None:
|
| 304 |
+
index = generate_regular_range(start, end, periods, freq, unit=unit)
|
| 305 |
+
else:
|
| 306 |
+
index = np.linspace(start._value, end._value, periods).astype("i8")
|
| 307 |
+
|
| 308 |
+
if not left_closed:
|
| 309 |
+
index = index[1:]
|
| 310 |
+
if not right_closed:
|
| 311 |
+
index = index[:-1]
|
| 312 |
+
|
| 313 |
+
td64values = index.view(f"m8[{unit}]")
|
| 314 |
+
return cls._simple_new(td64values, dtype=td64values.dtype, freq=freq)
|
| 315 |
+
|
| 316 |
+
# ----------------------------------------------------------------
|
| 317 |
+
# DatetimeLike Interface
|
| 318 |
+
|
| 319 |
+
def _unbox_scalar(self, value) -> np.timedelta64:
|
| 320 |
+
if not isinstance(value, self._scalar_type) and value is not NaT:
|
| 321 |
+
raise ValueError("'value' should be a Timedelta.")
|
| 322 |
+
self._check_compatible_with(value)
|
| 323 |
+
if value is NaT:
|
| 324 |
+
return np.timedelta64(value._value, self.unit)
|
| 325 |
+
else:
|
| 326 |
+
return value.as_unit(self.unit).asm8
|
| 327 |
+
|
| 328 |
+
def _scalar_from_string(self, value) -> Timedelta | NaTType:
|
| 329 |
+
return Timedelta(value)
|
| 330 |
+
|
| 331 |
+
def _check_compatible_with(self, other) -> None:
|
| 332 |
+
# we don't have anything to validate.
|
| 333 |
+
pass
|
| 334 |
+
|
| 335 |
+
# ----------------------------------------------------------------
|
| 336 |
+
# Array-Like / EA-Interface Methods
|
| 337 |
+
|
| 338 |
+
def astype(self, dtype, copy: bool = True):
|
| 339 |
+
# We handle
|
| 340 |
+
# --> timedelta64[ns]
|
| 341 |
+
# --> timedelta64
|
| 342 |
+
# DatetimeLikeArrayMixin super call handles other cases
|
| 343 |
+
dtype = pandas_dtype(dtype)
|
| 344 |
+
|
| 345 |
+
if lib.is_np_dtype(dtype, "m"):
|
| 346 |
+
if dtype == self.dtype:
|
| 347 |
+
if copy:
|
| 348 |
+
return self.copy()
|
| 349 |
+
return self
|
| 350 |
+
|
| 351 |
+
if is_supported_dtype(dtype):
|
| 352 |
+
# unit conversion e.g. timedelta64[s]
|
| 353 |
+
res_values = astype_overflowsafe(self._ndarray, dtype, copy=False)
|
| 354 |
+
return type(self)._simple_new(
|
| 355 |
+
res_values, dtype=res_values.dtype, freq=self.freq
|
| 356 |
+
)
|
| 357 |
+
else:
|
| 358 |
+
raise ValueError(
|
| 359 |
+
f"Cannot convert from {self.dtype} to {dtype}. "
|
| 360 |
+
"Supported resolutions are 's', 'ms', 'us', 'ns'"
|
| 361 |
+
)
|
| 362 |
+
|
| 363 |
+
return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy=copy)
|
| 364 |
+
|
| 365 |
+
def __iter__(self) -> Iterator:
|
| 366 |
+
if self.ndim > 1:
|
| 367 |
+
for i in range(len(self)):
|
| 368 |
+
yield self[i]
|
| 369 |
+
else:
|
| 370 |
+
# convert in chunks of 10k for efficiency
|
| 371 |
+
data = self._ndarray
|
| 372 |
+
length = len(self)
|
| 373 |
+
chunksize = 10000
|
| 374 |
+
chunks = (length // chunksize) + 1
|
| 375 |
+
for i in range(chunks):
|
| 376 |
+
start_i = i * chunksize
|
| 377 |
+
end_i = min((i + 1) * chunksize, length)
|
| 378 |
+
converted = ints_to_pytimedelta(data[start_i:end_i], box=True)
|
| 379 |
+
yield from converted
|
| 380 |
+
|
| 381 |
+
# ----------------------------------------------------------------
|
| 382 |
+
# Reductions
|
| 383 |
+
|
| 384 |
+
def sum(
|
| 385 |
+
self,
|
| 386 |
+
*,
|
| 387 |
+
axis: AxisInt | None = None,
|
| 388 |
+
dtype: NpDtype | None = None,
|
| 389 |
+
out=None,
|
| 390 |
+
keepdims: bool = False,
|
| 391 |
+
initial=None,
|
| 392 |
+
skipna: bool = True,
|
| 393 |
+
min_count: int = 0,
|
| 394 |
+
):
|
| 395 |
+
nv.validate_sum(
|
| 396 |
+
(), {"dtype": dtype, "out": out, "keepdims": keepdims, "initial": initial}
|
| 397 |
+
)
|
| 398 |
+
|
| 399 |
+
result = nanops.nansum(
|
| 400 |
+
self._ndarray, axis=axis, skipna=skipna, min_count=min_count
|
| 401 |
+
)
|
| 402 |
+
return self._wrap_reduction_result(axis, result)
|
| 403 |
+
|
| 404 |
+
def std(
|
| 405 |
+
self,
|
| 406 |
+
*,
|
| 407 |
+
axis: AxisInt | None = None,
|
| 408 |
+
dtype: NpDtype | None = None,
|
| 409 |
+
out=None,
|
| 410 |
+
ddof: int = 1,
|
| 411 |
+
keepdims: bool = False,
|
| 412 |
+
skipna: bool = True,
|
| 413 |
+
):
|
| 414 |
+
nv.validate_stat_ddof_func(
|
| 415 |
+
(), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="std"
|
| 416 |
+
)
|
| 417 |
+
|
| 418 |
+
result = nanops.nanstd(self._ndarray, axis=axis, skipna=skipna, ddof=ddof)
|
| 419 |
+
if axis is None or self.ndim == 1:
|
| 420 |
+
return self._box_func(result)
|
| 421 |
+
return self._from_backing_data(result)
|
| 422 |
+
|
| 423 |
+
# ----------------------------------------------------------------
|
| 424 |
+
# Accumulations
|
| 425 |
+
|
| 426 |
+
def _accumulate(self, name: str, *, skipna: bool = True, **kwargs):
|
| 427 |
+
if name == "cumsum":
|
| 428 |
+
op = getattr(datetimelike_accumulations, name)
|
| 429 |
+
result = op(self._ndarray.copy(), skipna=skipna, **kwargs)
|
| 430 |
+
|
| 431 |
+
return type(self)._simple_new(result, freq=None, dtype=self.dtype)
|
| 432 |
+
elif name == "cumprod":
|
| 433 |
+
raise TypeError("cumprod not supported for Timedelta.")
|
| 434 |
+
|
| 435 |
+
else:
|
| 436 |
+
return super()._accumulate(name, skipna=skipna, **kwargs)
|
| 437 |
+
|
| 438 |
+
# ----------------------------------------------------------------
|
| 439 |
+
# Rendering Methods
|
| 440 |
+
|
| 441 |
+
def _formatter(self, boxed: bool = False):
|
| 442 |
+
from pandas.io.formats.format import get_format_timedelta64
|
| 443 |
+
|
| 444 |
+
return get_format_timedelta64(self, box=True)
|
| 445 |
+
|
| 446 |
+
def _format_native_types(
|
| 447 |
+
self, *, na_rep: str | float = "NaT", date_format=None, **kwargs
|
| 448 |
+
) -> npt.NDArray[np.object_]:
|
| 449 |
+
from pandas.io.formats.format import get_format_timedelta64
|
| 450 |
+
|
| 451 |
+
# Relies on TimeDelta._repr_base
|
| 452 |
+
formatter = get_format_timedelta64(self, na_rep)
|
| 453 |
+
# equiv: np.array([formatter(x) for x in self._ndarray])
|
| 454 |
+
# but independent of dimension
|
| 455 |
+
return np.frompyfunc(formatter, 1, 1)(self._ndarray)
|
| 456 |
+
|
| 457 |
+
# ----------------------------------------------------------------
|
| 458 |
+
# Arithmetic Methods
|
| 459 |
+
|
| 460 |
+
def _add_offset(self, other):
|
| 461 |
+
assert not isinstance(other, Tick)
|
| 462 |
+
raise TypeError(
|
| 463 |
+
f"cannot add the type {type(other).__name__} to a {type(self).__name__}"
|
| 464 |
+
)
|
| 465 |
+
|
| 466 |
+
@unpack_zerodim_and_defer("__mul__")
|
| 467 |
+
def __mul__(self, other) -> Self:
|
| 468 |
+
if is_scalar(other):
|
| 469 |
+
# numpy will accept float and int, raise TypeError for others
|
| 470 |
+
result = self._ndarray * other
|
| 471 |
+
if result.dtype.kind != "m":
|
| 472 |
+
# numpy >= 2.1 may not raise a TypeError
|
| 473 |
+
# and seems to dispatch to others.__rmul__?
|
| 474 |
+
raise TypeError(f"Cannot multiply with {type(other).__name__}")
|
| 475 |
+
freq = None
|
| 476 |
+
if self.freq is not None and not isna(other):
|
| 477 |
+
freq = self.freq * other
|
| 478 |
+
if freq.n == 0:
|
| 479 |
+
# GH#51575 Better to have no freq than an incorrect one
|
| 480 |
+
freq = None
|
| 481 |
+
return type(self)._simple_new(result, dtype=result.dtype, freq=freq)
|
| 482 |
+
|
| 483 |
+
if not hasattr(other, "dtype"):
|
| 484 |
+
# list, tuple
|
| 485 |
+
other = np.array(other)
|
| 486 |
+
if len(other) != len(self) and not lib.is_np_dtype(other.dtype, "m"):
|
| 487 |
+
# Exclude timedelta64 here so we correctly raise TypeError
|
| 488 |
+
# for that instead of ValueError
|
| 489 |
+
raise ValueError("Cannot multiply with unequal lengths")
|
| 490 |
+
|
| 491 |
+
if is_object_dtype(other.dtype):
|
| 492 |
+
# this multiplication will succeed only if all elements of other
|
| 493 |
+
# are int or float scalars, so we will end up with
|
| 494 |
+
# timedelta64[ns]-dtyped result
|
| 495 |
+
arr = self._ndarray
|
| 496 |
+
result = [arr[n] * other[n] for n in range(len(self))]
|
| 497 |
+
result = np.array(result)
|
| 498 |
+
return type(self)._simple_new(result, dtype=result.dtype)
|
| 499 |
+
|
| 500 |
+
# numpy will accept float or int dtype, raise TypeError for others
|
| 501 |
+
result = self._ndarray * other
|
| 502 |
+
if result.dtype.kind != "m":
|
| 503 |
+
# numpy >= 2.1 may not raise a TypeError
|
| 504 |
+
# and seems to dispatch to others.__rmul__?
|
| 505 |
+
raise TypeError(f"Cannot multiply with {type(other).__name__}")
|
| 506 |
+
return type(self)._simple_new(result, dtype=result.dtype)
|
| 507 |
+
|
| 508 |
+
__rmul__ = __mul__
|
| 509 |
+
|
| 510 |
+
def _scalar_divlike_op(self, other, op):
|
| 511 |
+
"""
|
| 512 |
+
Shared logic for __truediv__, __rtruediv__, __floordiv__, __rfloordiv__
|
| 513 |
+
with scalar 'other'.
|
| 514 |
+
"""
|
| 515 |
+
if isinstance(other, self._recognized_scalars):
|
| 516 |
+
other = Timedelta(other)
|
| 517 |
+
# mypy assumes that __new__ returns an instance of the class
|
| 518 |
+
# github.com/python/mypy/issues/1020
|
| 519 |
+
if cast("Timedelta | NaTType", other) is NaT:
|
| 520 |
+
# specifically timedelta64-NaT
|
| 521 |
+
res = np.empty(self.shape, dtype=np.float64)
|
| 522 |
+
res.fill(np.nan)
|
| 523 |
+
return res
|
| 524 |
+
|
| 525 |
+
# otherwise, dispatch to Timedelta implementation
|
| 526 |
+
return op(self._ndarray, other)
|
| 527 |
+
|
| 528 |
+
else:
|
| 529 |
+
# caller is responsible for checking lib.is_scalar(other)
|
| 530 |
+
# assume other is numeric, otherwise numpy will raise
|
| 531 |
+
|
| 532 |
+
if op in [roperator.rtruediv, roperator.rfloordiv]:
|
| 533 |
+
raise TypeError(
|
| 534 |
+
f"Cannot divide {type(other).__name__} by {type(self).__name__}"
|
| 535 |
+
)
|
| 536 |
+
|
| 537 |
+
result = op(self._ndarray, other)
|
| 538 |
+
freq = None
|
| 539 |
+
|
| 540 |
+
if self.freq is not None:
|
| 541 |
+
# Note: freq gets division, not floor-division, even if op
|
| 542 |
+
# is floordiv.
|
| 543 |
+
freq = self.freq / other
|
| 544 |
+
if freq.nanos == 0 and self.freq.nanos != 0:
|
| 545 |
+
# e.g. if self.freq is Nano(1) then dividing by 2
|
| 546 |
+
# rounds down to zero
|
| 547 |
+
freq = None
|
| 548 |
+
|
| 549 |
+
return type(self)._simple_new(result, dtype=result.dtype, freq=freq)
|
| 550 |
+
|
| 551 |
+
def _cast_divlike_op(self, other):
|
| 552 |
+
if not hasattr(other, "dtype"):
|
| 553 |
+
# e.g. list, tuple
|
| 554 |
+
other = np.array(other)
|
| 555 |
+
|
| 556 |
+
if len(other) != len(self):
|
| 557 |
+
raise ValueError("Cannot divide vectors with unequal lengths")
|
| 558 |
+
return other
|
| 559 |
+
|
| 560 |
+
def _vector_divlike_op(self, other, op) -> np.ndarray | Self:
|
| 561 |
+
"""
|
| 562 |
+
Shared logic for __truediv__, __floordiv__, and their reversed versions
|
| 563 |
+
with timedelta64-dtype ndarray other.
|
| 564 |
+
"""
|
| 565 |
+
# Let numpy handle it
|
| 566 |
+
result = op(self._ndarray, np.asarray(other))
|
| 567 |
+
|
| 568 |
+
if (is_integer_dtype(other.dtype) or is_float_dtype(other.dtype)) and op in [
|
| 569 |
+
operator.truediv,
|
| 570 |
+
operator.floordiv,
|
| 571 |
+
]:
|
| 572 |
+
return type(self)._simple_new(result, dtype=result.dtype)
|
| 573 |
+
|
| 574 |
+
if op in [operator.floordiv, roperator.rfloordiv]:
|
| 575 |
+
mask = self.isna() | isna(other)
|
| 576 |
+
if mask.any():
|
| 577 |
+
result = result.astype(np.float64)
|
| 578 |
+
np.putmask(result, mask, np.nan)
|
| 579 |
+
|
| 580 |
+
return result
|
| 581 |
+
|
| 582 |
+
@unpack_zerodim_and_defer("__truediv__")
|
| 583 |
+
def __truediv__(self, other):
|
| 584 |
+
# timedelta / X is well-defined for timedelta-like or numeric X
|
| 585 |
+
op = operator.truediv
|
| 586 |
+
if is_scalar(other):
|
| 587 |
+
return self._scalar_divlike_op(other, op)
|
| 588 |
+
|
| 589 |
+
other = self._cast_divlike_op(other)
|
| 590 |
+
if (
|
| 591 |
+
lib.is_np_dtype(other.dtype, "m")
|
| 592 |
+
or is_integer_dtype(other.dtype)
|
| 593 |
+
or is_float_dtype(other.dtype)
|
| 594 |
+
):
|
| 595 |
+
return self._vector_divlike_op(other, op)
|
| 596 |
+
|
| 597 |
+
if is_object_dtype(other.dtype):
|
| 598 |
+
other = np.asarray(other)
|
| 599 |
+
if self.ndim > 1:
|
| 600 |
+
res_cols = [left / right for left, right in zip(self, other)]
|
| 601 |
+
res_cols2 = [x.reshape(1, -1) for x in res_cols]
|
| 602 |
+
result = np.concatenate(res_cols2, axis=0)
|
| 603 |
+
else:
|
| 604 |
+
result = truediv_object_array(self._ndarray, other)
|
| 605 |
+
|
| 606 |
+
return result
|
| 607 |
+
|
| 608 |
+
else:
|
| 609 |
+
return NotImplemented
|
| 610 |
+
|
| 611 |
+
@unpack_zerodim_and_defer("__rtruediv__")
|
| 612 |
+
def __rtruediv__(self, other):
|
| 613 |
+
# X / timedelta is defined only for timedelta-like X
|
| 614 |
+
op = roperator.rtruediv
|
| 615 |
+
if is_scalar(other):
|
| 616 |
+
return self._scalar_divlike_op(other, op)
|
| 617 |
+
|
| 618 |
+
other = self._cast_divlike_op(other)
|
| 619 |
+
if lib.is_np_dtype(other.dtype, "m"):
|
| 620 |
+
return self._vector_divlike_op(other, op)
|
| 621 |
+
|
| 622 |
+
elif is_object_dtype(other.dtype):
|
| 623 |
+
# Note: unlike in __truediv__, we do not _need_ to do type
|
| 624 |
+
# inference on the result. It does not raise, a numeric array
|
| 625 |
+
# is returned. GH#23829
|
| 626 |
+
result_list = [other[n] / self[n] for n in range(len(self))]
|
| 627 |
+
return np.array(result_list)
|
| 628 |
+
|
| 629 |
+
else:
|
| 630 |
+
return NotImplemented
|
| 631 |
+
|
| 632 |
+
@unpack_zerodim_and_defer("__floordiv__")
|
| 633 |
+
def __floordiv__(self, other):
|
| 634 |
+
op = operator.floordiv
|
| 635 |
+
if is_scalar(other):
|
| 636 |
+
return self._scalar_divlike_op(other, op)
|
| 637 |
+
|
| 638 |
+
other = self._cast_divlike_op(other)
|
| 639 |
+
if (
|
| 640 |
+
lib.is_np_dtype(other.dtype, "m")
|
| 641 |
+
or is_integer_dtype(other.dtype)
|
| 642 |
+
or is_float_dtype(other.dtype)
|
| 643 |
+
):
|
| 644 |
+
return self._vector_divlike_op(other, op)
|
| 645 |
+
|
| 646 |
+
elif is_object_dtype(other.dtype):
|
| 647 |
+
other = np.asarray(other)
|
| 648 |
+
if self.ndim > 1:
|
| 649 |
+
res_cols = [left // right for left, right in zip(self, other)]
|
| 650 |
+
res_cols2 = [x.reshape(1, -1) for x in res_cols]
|
| 651 |
+
result = np.concatenate(res_cols2, axis=0)
|
| 652 |
+
else:
|
| 653 |
+
result = floordiv_object_array(self._ndarray, other)
|
| 654 |
+
|
| 655 |
+
assert result.dtype == object
|
| 656 |
+
return result
|
| 657 |
+
|
| 658 |
+
else:
|
| 659 |
+
return NotImplemented
|
| 660 |
+
|
| 661 |
+
@unpack_zerodim_and_defer("__rfloordiv__")
|
| 662 |
+
def __rfloordiv__(self, other):
|
| 663 |
+
op = roperator.rfloordiv
|
| 664 |
+
if is_scalar(other):
|
| 665 |
+
return self._scalar_divlike_op(other, op)
|
| 666 |
+
|
| 667 |
+
other = self._cast_divlike_op(other)
|
| 668 |
+
if lib.is_np_dtype(other.dtype, "m"):
|
| 669 |
+
return self._vector_divlike_op(other, op)
|
| 670 |
+
|
| 671 |
+
elif is_object_dtype(other.dtype):
|
| 672 |
+
result_list = [other[n] // self[n] for n in range(len(self))]
|
| 673 |
+
result = np.array(result_list)
|
| 674 |
+
return result
|
| 675 |
+
|
| 676 |
+
else:
|
| 677 |
+
return NotImplemented
|
| 678 |
+
|
| 679 |
+
@unpack_zerodim_and_defer("__mod__")
|
| 680 |
+
def __mod__(self, other):
|
| 681 |
+
# Note: This is a naive implementation, can likely be optimized
|
| 682 |
+
if isinstance(other, self._recognized_scalars):
|
| 683 |
+
other = Timedelta(other)
|
| 684 |
+
return self - (self // other) * other
|
| 685 |
+
|
| 686 |
+
@unpack_zerodim_and_defer("__rmod__")
|
| 687 |
+
def __rmod__(self, other):
|
| 688 |
+
# Note: This is a naive implementation, can likely be optimized
|
| 689 |
+
if isinstance(other, self._recognized_scalars):
|
| 690 |
+
other = Timedelta(other)
|
| 691 |
+
return other - (other // self) * self
|
| 692 |
+
|
| 693 |
+
@unpack_zerodim_and_defer("__divmod__")
|
| 694 |
+
def __divmod__(self, other):
|
| 695 |
+
# Note: This is a naive implementation, can likely be optimized
|
| 696 |
+
if isinstance(other, self._recognized_scalars):
|
| 697 |
+
other = Timedelta(other)
|
| 698 |
+
|
| 699 |
+
res1 = self // other
|
| 700 |
+
res2 = self - res1 * other
|
| 701 |
+
return res1, res2
|
| 702 |
+
|
| 703 |
+
@unpack_zerodim_and_defer("__rdivmod__")
|
| 704 |
+
def __rdivmod__(self, other):
|
| 705 |
+
# Note: This is a naive implementation, can likely be optimized
|
| 706 |
+
if isinstance(other, self._recognized_scalars):
|
| 707 |
+
other = Timedelta(other)
|
| 708 |
+
|
| 709 |
+
res1 = other // self
|
| 710 |
+
res2 = other - res1 * self
|
| 711 |
+
return res1, res2
|
| 712 |
+
|
| 713 |
+
def __neg__(self) -> TimedeltaArray:
|
| 714 |
+
freq = None
|
| 715 |
+
if self.freq is not None:
|
| 716 |
+
freq = -self.freq
|
| 717 |
+
return type(self)._simple_new(-self._ndarray, dtype=self.dtype, freq=freq)
|
| 718 |
+
|
| 719 |
+
def __pos__(self) -> TimedeltaArray:
|
| 720 |
+
return type(self)._simple_new(
|
| 721 |
+
self._ndarray.copy(), dtype=self.dtype, freq=self.freq
|
| 722 |
+
)
|
| 723 |
+
|
| 724 |
+
def __abs__(self) -> TimedeltaArray:
|
| 725 |
+
# Note: freq is not preserved
|
| 726 |
+
return type(self)._simple_new(np.abs(self._ndarray), dtype=self.dtype)
|
| 727 |
+
|
| 728 |
+
# ----------------------------------------------------------------
|
| 729 |
+
# Conversion Methods - Vectorized analogues of Timedelta methods
|
| 730 |
+
|
| 731 |
+
def total_seconds(self) -> npt.NDArray[np.float64]:
|
| 732 |
+
"""
|
| 733 |
+
Return total duration of each element expressed in seconds.
|
| 734 |
+
|
| 735 |
+
This method is available directly on TimedeltaArray, TimedeltaIndex
|
| 736 |
+
and on Series containing timedelta values under the ``.dt`` namespace.
|
| 737 |
+
|
| 738 |
+
Returns
|
| 739 |
+
-------
|
| 740 |
+
ndarray, Index or Series
|
| 741 |
+
When the calling object is a TimedeltaArray, the return type
|
| 742 |
+
is ndarray. When the calling object is a TimedeltaIndex,
|
| 743 |
+
the return type is an Index with a float64 dtype. When the calling object
|
| 744 |
+
is a Series, the return type is Series of type `float64` whose
|
| 745 |
+
index is the same as the original.
|
| 746 |
+
|
| 747 |
+
See Also
|
| 748 |
+
--------
|
| 749 |
+
datetime.timedelta.total_seconds : Standard library version
|
| 750 |
+
of this method.
|
| 751 |
+
TimedeltaIndex.components : Return a DataFrame with components of
|
| 752 |
+
each Timedelta.
|
| 753 |
+
|
| 754 |
+
Examples
|
| 755 |
+
--------
|
| 756 |
+
**Series**
|
| 757 |
+
|
| 758 |
+
>>> s = pd.Series(pd.to_timedelta(np.arange(5), unit='d'))
|
| 759 |
+
>>> s
|
| 760 |
+
0 0 days
|
| 761 |
+
1 1 days
|
| 762 |
+
2 2 days
|
| 763 |
+
3 3 days
|
| 764 |
+
4 4 days
|
| 765 |
+
dtype: timedelta64[ns]
|
| 766 |
+
|
| 767 |
+
>>> s.dt.total_seconds()
|
| 768 |
+
0 0.0
|
| 769 |
+
1 86400.0
|
| 770 |
+
2 172800.0
|
| 771 |
+
3 259200.0
|
| 772 |
+
4 345600.0
|
| 773 |
+
dtype: float64
|
| 774 |
+
|
| 775 |
+
**TimedeltaIndex**
|
| 776 |
+
|
| 777 |
+
>>> idx = pd.to_timedelta(np.arange(5), unit='d')
|
| 778 |
+
>>> idx
|
| 779 |
+
TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'],
|
| 780 |
+
dtype='timedelta64[ns]', freq=None)
|
| 781 |
+
|
| 782 |
+
>>> idx.total_seconds()
|
| 783 |
+
Index([0.0, 86400.0, 172800.0, 259200.0, 345600.0], dtype='float64')
|
| 784 |
+
"""
|
| 785 |
+
pps = periods_per_second(self._creso)
|
| 786 |
+
return self._maybe_mask_results(self.asi8 / pps, fill_value=None)
|
| 787 |
+
|
| 788 |
+
def to_pytimedelta(self) -> npt.NDArray[np.object_]:
|
| 789 |
+
"""
|
| 790 |
+
Return an ndarray of datetime.timedelta objects.
|
| 791 |
+
|
| 792 |
+
Returns
|
| 793 |
+
-------
|
| 794 |
+
numpy.ndarray
|
| 795 |
+
|
| 796 |
+
Examples
|
| 797 |
+
--------
|
| 798 |
+
>>> tdelta_idx = pd.to_timedelta([1, 2, 3], unit='D')
|
| 799 |
+
>>> tdelta_idx
|
| 800 |
+
TimedeltaIndex(['1 days', '2 days', '3 days'],
|
| 801 |
+
dtype='timedelta64[ns]', freq=None)
|
| 802 |
+
>>> tdelta_idx.to_pytimedelta()
|
| 803 |
+
array([datetime.timedelta(days=1), datetime.timedelta(days=2),
|
| 804 |
+
datetime.timedelta(days=3)], dtype=object)
|
| 805 |
+
"""
|
| 806 |
+
return ints_to_pytimedelta(self._ndarray)
|
| 807 |
+
|
| 808 |
+
days_docstring = textwrap.dedent(
|
| 809 |
+
"""Number of days for each element.
|
| 810 |
+
|
| 811 |
+
Examples
|
| 812 |
+
--------
|
| 813 |
+
For Series:
|
| 814 |
+
|
| 815 |
+
>>> ser = pd.Series(pd.to_timedelta([1, 2, 3], unit='d'))
|
| 816 |
+
>>> ser
|
| 817 |
+
0 1 days
|
| 818 |
+
1 2 days
|
| 819 |
+
2 3 days
|
| 820 |
+
dtype: timedelta64[ns]
|
| 821 |
+
>>> ser.dt.days
|
| 822 |
+
0 1
|
| 823 |
+
1 2
|
| 824 |
+
2 3
|
| 825 |
+
dtype: int64
|
| 826 |
+
|
| 827 |
+
For TimedeltaIndex:
|
| 828 |
+
|
| 829 |
+
>>> tdelta_idx = pd.to_timedelta(["0 days", "10 days", "20 days"])
|
| 830 |
+
>>> tdelta_idx
|
| 831 |
+
TimedeltaIndex(['0 days', '10 days', '20 days'],
|
| 832 |
+
dtype='timedelta64[ns]', freq=None)
|
| 833 |
+
>>> tdelta_idx.days
|
| 834 |
+
Index([0, 10, 20], dtype='int64')"""
|
| 835 |
+
)
|
| 836 |
+
days = _field_accessor("days", "days", days_docstring)
|
| 837 |
+
|
| 838 |
+
seconds_docstring = textwrap.dedent(
|
| 839 |
+
"""Number of seconds (>= 0 and less than 1 day) for each element.
|
| 840 |
+
|
| 841 |
+
Examples
|
| 842 |
+
--------
|
| 843 |
+
For Series:
|
| 844 |
+
|
| 845 |
+
>>> ser = pd.Series(pd.to_timedelta([1, 2, 3], unit='s'))
|
| 846 |
+
>>> ser
|
| 847 |
+
0 0 days 00:00:01
|
| 848 |
+
1 0 days 00:00:02
|
| 849 |
+
2 0 days 00:00:03
|
| 850 |
+
dtype: timedelta64[ns]
|
| 851 |
+
>>> ser.dt.seconds
|
| 852 |
+
0 1
|
| 853 |
+
1 2
|
| 854 |
+
2 3
|
| 855 |
+
dtype: int32
|
| 856 |
+
|
| 857 |
+
For TimedeltaIndex:
|
| 858 |
+
|
| 859 |
+
>>> tdelta_idx = pd.to_timedelta([1, 2, 3], unit='s')
|
| 860 |
+
>>> tdelta_idx
|
| 861 |
+
TimedeltaIndex(['0 days 00:00:01', '0 days 00:00:02', '0 days 00:00:03'],
|
| 862 |
+
dtype='timedelta64[ns]', freq=None)
|
| 863 |
+
>>> tdelta_idx.seconds
|
| 864 |
+
Index([1, 2, 3], dtype='int32')"""
|
| 865 |
+
)
|
| 866 |
+
seconds = _field_accessor(
|
| 867 |
+
"seconds",
|
| 868 |
+
"seconds",
|
| 869 |
+
seconds_docstring,
|
| 870 |
+
)
|
| 871 |
+
|
| 872 |
+
microseconds_docstring = textwrap.dedent(
|
| 873 |
+
"""Number of microseconds (>= 0 and less than 1 second) for each element.
|
| 874 |
+
|
| 875 |
+
Examples
|
| 876 |
+
--------
|
| 877 |
+
For Series:
|
| 878 |
+
|
| 879 |
+
>>> ser = pd.Series(pd.to_timedelta([1, 2, 3], unit='us'))
|
| 880 |
+
>>> ser
|
| 881 |
+
0 0 days 00:00:00.000001
|
| 882 |
+
1 0 days 00:00:00.000002
|
| 883 |
+
2 0 days 00:00:00.000003
|
| 884 |
+
dtype: timedelta64[ns]
|
| 885 |
+
>>> ser.dt.microseconds
|
| 886 |
+
0 1
|
| 887 |
+
1 2
|
| 888 |
+
2 3
|
| 889 |
+
dtype: int32
|
| 890 |
+
|
| 891 |
+
For TimedeltaIndex:
|
| 892 |
+
|
| 893 |
+
>>> tdelta_idx = pd.to_timedelta([1, 2, 3], unit='us')
|
| 894 |
+
>>> tdelta_idx
|
| 895 |
+
TimedeltaIndex(['0 days 00:00:00.000001', '0 days 00:00:00.000002',
|
| 896 |
+
'0 days 00:00:00.000003'],
|
| 897 |
+
dtype='timedelta64[ns]', freq=None)
|
| 898 |
+
>>> tdelta_idx.microseconds
|
| 899 |
+
Index([1, 2, 3], dtype='int32')"""
|
| 900 |
+
)
|
| 901 |
+
microseconds = _field_accessor(
|
| 902 |
+
"microseconds",
|
| 903 |
+
"microseconds",
|
| 904 |
+
microseconds_docstring,
|
| 905 |
+
)
|
| 906 |
+
|
| 907 |
+
nanoseconds_docstring = textwrap.dedent(
|
| 908 |
+
"""Number of nanoseconds (>= 0 and less than 1 microsecond) for each element.
|
| 909 |
+
|
| 910 |
+
Examples
|
| 911 |
+
--------
|
| 912 |
+
For Series:
|
| 913 |
+
|
| 914 |
+
>>> ser = pd.Series(pd.to_timedelta([1, 2, 3], unit='ns'))
|
| 915 |
+
>>> ser
|
| 916 |
+
0 0 days 00:00:00.000000001
|
| 917 |
+
1 0 days 00:00:00.000000002
|
| 918 |
+
2 0 days 00:00:00.000000003
|
| 919 |
+
dtype: timedelta64[ns]
|
| 920 |
+
>>> ser.dt.nanoseconds
|
| 921 |
+
0 1
|
| 922 |
+
1 2
|
| 923 |
+
2 3
|
| 924 |
+
dtype: int32
|
| 925 |
+
|
| 926 |
+
For TimedeltaIndex:
|
| 927 |
+
|
| 928 |
+
>>> tdelta_idx = pd.to_timedelta([1, 2, 3], unit='ns')
|
| 929 |
+
>>> tdelta_idx
|
| 930 |
+
TimedeltaIndex(['0 days 00:00:00.000000001', '0 days 00:00:00.000000002',
|
| 931 |
+
'0 days 00:00:00.000000003'],
|
| 932 |
+
dtype='timedelta64[ns]', freq=None)
|
| 933 |
+
>>> tdelta_idx.nanoseconds
|
| 934 |
+
Index([1, 2, 3], dtype='int32')"""
|
| 935 |
+
)
|
| 936 |
+
nanoseconds = _field_accessor(
|
| 937 |
+
"nanoseconds",
|
| 938 |
+
"nanoseconds",
|
| 939 |
+
nanoseconds_docstring,
|
| 940 |
+
)
|
| 941 |
+
|
| 942 |
+
@property
|
| 943 |
+
def components(self) -> DataFrame:
|
| 944 |
+
"""
|
| 945 |
+
Return a DataFrame of the individual resolution components of the Timedeltas.
|
| 946 |
+
|
| 947 |
+
The components (days, hours, minutes seconds, milliseconds, microseconds,
|
| 948 |
+
nanoseconds) are returned as columns in a DataFrame.
|
| 949 |
+
|
| 950 |
+
Returns
|
| 951 |
+
-------
|
| 952 |
+
DataFrame
|
| 953 |
+
|
| 954 |
+
Examples
|
| 955 |
+
--------
|
| 956 |
+
>>> tdelta_idx = pd.to_timedelta(['1 day 3 min 2 us 42 ns'])
|
| 957 |
+
>>> tdelta_idx
|
| 958 |
+
TimedeltaIndex(['1 days 00:03:00.000002042'],
|
| 959 |
+
dtype='timedelta64[ns]', freq=None)
|
| 960 |
+
>>> tdelta_idx.components
|
| 961 |
+
days hours minutes seconds milliseconds microseconds nanoseconds
|
| 962 |
+
0 1 0 3 0 0 2 42
|
| 963 |
+
"""
|
| 964 |
+
from pandas import DataFrame
|
| 965 |
+
|
| 966 |
+
columns = [
|
| 967 |
+
"days",
|
| 968 |
+
"hours",
|
| 969 |
+
"minutes",
|
| 970 |
+
"seconds",
|
| 971 |
+
"milliseconds",
|
| 972 |
+
"microseconds",
|
| 973 |
+
"nanoseconds",
|
| 974 |
+
]
|
| 975 |
+
hasnans = self._hasna
|
| 976 |
+
if hasnans:
|
| 977 |
+
|
| 978 |
+
def f(x):
|
| 979 |
+
if isna(x):
|
| 980 |
+
return [np.nan] * len(columns)
|
| 981 |
+
return x.components
|
| 982 |
+
|
| 983 |
+
else:
|
| 984 |
+
|
| 985 |
+
def f(x):
|
| 986 |
+
return x.components
|
| 987 |
+
|
| 988 |
+
result = DataFrame([f(x) for x in self], columns=columns)
|
| 989 |
+
if not hasnans:
|
| 990 |
+
result = result.astype("int64")
|
| 991 |
+
return result
|
| 992 |
+
|
| 993 |
+
|
| 994 |
+
# ---------------------------------------------------------------------
|
| 995 |
+
# Constructor Helpers
|
| 996 |
+
|
| 997 |
+
|
| 998 |
+
def sequence_to_td64ns(
|
| 999 |
+
data,
|
| 1000 |
+
copy: bool = False,
|
| 1001 |
+
unit=None,
|
| 1002 |
+
errors: DateTimeErrorChoices = "raise",
|
| 1003 |
+
) -> tuple[np.ndarray, Tick | None]:
|
| 1004 |
+
"""
|
| 1005 |
+
Parameters
|
| 1006 |
+
----------
|
| 1007 |
+
data : list-like
|
| 1008 |
+
copy : bool, default False
|
| 1009 |
+
unit : str, optional
|
| 1010 |
+
The timedelta unit to treat integers as multiples of. For numeric
|
| 1011 |
+
data this defaults to ``'ns'``.
|
| 1012 |
+
Must be un-specified if the data contains a str and ``errors=="raise"``.
|
| 1013 |
+
errors : {"raise", "coerce", "ignore"}, default "raise"
|
| 1014 |
+
How to handle elements that cannot be converted to timedelta64[ns].
|
| 1015 |
+
See ``pandas.to_timedelta`` for details.
|
| 1016 |
+
|
| 1017 |
+
Returns
|
| 1018 |
+
-------
|
| 1019 |
+
converted : numpy.ndarray
|
| 1020 |
+
The sequence converted to a numpy array with dtype ``timedelta64[ns]``.
|
| 1021 |
+
inferred_freq : Tick or None
|
| 1022 |
+
The inferred frequency of the sequence.
|
| 1023 |
+
|
| 1024 |
+
Raises
|
| 1025 |
+
------
|
| 1026 |
+
ValueError : Data cannot be converted to timedelta64[ns].
|
| 1027 |
+
|
| 1028 |
+
Notes
|
| 1029 |
+
-----
|
| 1030 |
+
Unlike `pandas.to_timedelta`, if setting ``errors=ignore`` will not cause
|
| 1031 |
+
errors to be ignored; they are caught and subsequently ignored at a
|
| 1032 |
+
higher level.
|
| 1033 |
+
"""
|
| 1034 |
+
assert unit not in ["Y", "y", "M"] # caller is responsible for checking
|
| 1035 |
+
|
| 1036 |
+
inferred_freq = None
|
| 1037 |
+
if unit is not None:
|
| 1038 |
+
unit = parse_timedelta_unit(unit)
|
| 1039 |
+
|
| 1040 |
+
data, copy = dtl.ensure_arraylike_for_datetimelike(
|
| 1041 |
+
data, copy, cls_name="TimedeltaArray"
|
| 1042 |
+
)
|
| 1043 |
+
|
| 1044 |
+
if isinstance(data, TimedeltaArray):
|
| 1045 |
+
inferred_freq = data.freq
|
| 1046 |
+
|
| 1047 |
+
# Convert whatever we have into timedelta64[ns] dtype
|
| 1048 |
+
if data.dtype == object or is_string_dtype(data.dtype):
|
| 1049 |
+
# no need to make a copy, need to convert if string-dtyped
|
| 1050 |
+
data = _objects_to_td64ns(data, unit=unit, errors=errors)
|
| 1051 |
+
copy = False
|
| 1052 |
+
|
| 1053 |
+
elif is_integer_dtype(data.dtype):
|
| 1054 |
+
# treat as multiples of the given unit
|
| 1055 |
+
data, copy_made = _ints_to_td64ns(data, unit=unit)
|
| 1056 |
+
copy = copy and not copy_made
|
| 1057 |
+
|
| 1058 |
+
elif is_float_dtype(data.dtype):
|
| 1059 |
+
# cast the unit, multiply base/frac separately
|
| 1060 |
+
# to avoid precision issues from float -> int
|
| 1061 |
+
if isinstance(data.dtype, ExtensionDtype):
|
| 1062 |
+
mask = data._mask
|
| 1063 |
+
data = data._data
|
| 1064 |
+
else:
|
| 1065 |
+
mask = np.isnan(data)
|
| 1066 |
+
|
| 1067 |
+
data = cast_from_unit_vectorized(data, unit or "ns")
|
| 1068 |
+
data[mask] = iNaT
|
| 1069 |
+
data = data.view("m8[ns]")
|
| 1070 |
+
copy = False
|
| 1071 |
+
|
| 1072 |
+
elif lib.is_np_dtype(data.dtype, "m"):
|
| 1073 |
+
if not is_supported_dtype(data.dtype):
|
| 1074 |
+
# cast to closest supported unit, i.e. s or ns
|
| 1075 |
+
new_dtype = get_supported_dtype(data.dtype)
|
| 1076 |
+
data = astype_overflowsafe(data, dtype=new_dtype, copy=False)
|
| 1077 |
+
copy = False
|
| 1078 |
+
|
| 1079 |
+
else:
|
| 1080 |
+
# This includes datetime64-dtype, see GH#23539, GH#29794
|
| 1081 |
+
raise TypeError(f"dtype {data.dtype} cannot be converted to timedelta64[ns]")
|
| 1082 |
+
|
| 1083 |
+
if not copy:
|
| 1084 |
+
data = np.asarray(data)
|
| 1085 |
+
else:
|
| 1086 |
+
data = np.array(data, copy=copy)
|
| 1087 |
+
|
| 1088 |
+
assert data.dtype.kind == "m"
|
| 1089 |
+
assert data.dtype != "m8" # i.e. not unit-less
|
| 1090 |
+
|
| 1091 |
+
return data, inferred_freq
|
| 1092 |
+
|
| 1093 |
+
|
| 1094 |
+
def _ints_to_td64ns(data, unit: str = "ns"):
|
| 1095 |
+
"""
|
| 1096 |
+
Convert an ndarray with integer-dtype to timedelta64[ns] dtype, treating
|
| 1097 |
+
the integers as multiples of the given timedelta unit.
|
| 1098 |
+
|
| 1099 |
+
Parameters
|
| 1100 |
+
----------
|
| 1101 |
+
data : numpy.ndarray with integer-dtype
|
| 1102 |
+
unit : str, default "ns"
|
| 1103 |
+
The timedelta unit to treat integers as multiples of.
|
| 1104 |
+
|
| 1105 |
+
Returns
|
| 1106 |
+
-------
|
| 1107 |
+
numpy.ndarray : timedelta64[ns] array converted from data
|
| 1108 |
+
bool : whether a copy was made
|
| 1109 |
+
"""
|
| 1110 |
+
copy_made = False
|
| 1111 |
+
unit = unit if unit is not None else "ns"
|
| 1112 |
+
|
| 1113 |
+
if data.dtype != np.int64:
|
| 1114 |
+
# converting to int64 makes a copy, so we can avoid
|
| 1115 |
+
# re-copying later
|
| 1116 |
+
data = data.astype(np.int64)
|
| 1117 |
+
copy_made = True
|
| 1118 |
+
|
| 1119 |
+
if unit != "ns":
|
| 1120 |
+
dtype_str = f"timedelta64[{unit}]"
|
| 1121 |
+
data = data.view(dtype_str)
|
| 1122 |
+
|
| 1123 |
+
data = astype_overflowsafe(data, dtype=TD64NS_DTYPE)
|
| 1124 |
+
|
| 1125 |
+
# the astype conversion makes a copy, so we can avoid re-copying later
|
| 1126 |
+
copy_made = True
|
| 1127 |
+
|
| 1128 |
+
else:
|
| 1129 |
+
data = data.view("timedelta64[ns]")
|
| 1130 |
+
|
| 1131 |
+
return data, copy_made
|
| 1132 |
+
|
| 1133 |
+
|
| 1134 |
+
def _objects_to_td64ns(data, unit=None, errors: DateTimeErrorChoices = "raise"):
|
| 1135 |
+
"""
|
| 1136 |
+
Convert a object-dtyped or string-dtyped array into an
|
| 1137 |
+
timedelta64[ns]-dtyped array.
|
| 1138 |
+
|
| 1139 |
+
Parameters
|
| 1140 |
+
----------
|
| 1141 |
+
data : ndarray or Index
|
| 1142 |
+
unit : str, default "ns"
|
| 1143 |
+
The timedelta unit to treat integers as multiples of.
|
| 1144 |
+
Must not be specified if the data contains a str.
|
| 1145 |
+
errors : {"raise", "coerce", "ignore"}, default "raise"
|
| 1146 |
+
How to handle elements that cannot be converted to timedelta64[ns].
|
| 1147 |
+
See ``pandas.to_timedelta`` for details.
|
| 1148 |
+
|
| 1149 |
+
Returns
|
| 1150 |
+
-------
|
| 1151 |
+
numpy.ndarray : timedelta64[ns] array converted from data
|
| 1152 |
+
|
| 1153 |
+
Raises
|
| 1154 |
+
------
|
| 1155 |
+
ValueError : Data cannot be converted to timedelta64[ns].
|
| 1156 |
+
|
| 1157 |
+
Notes
|
| 1158 |
+
-----
|
| 1159 |
+
Unlike `pandas.to_timedelta`, if setting `errors=ignore` will not cause
|
| 1160 |
+
errors to be ignored; they are caught and subsequently ignored at a
|
| 1161 |
+
higher level.
|
| 1162 |
+
"""
|
| 1163 |
+
# coerce Index to np.ndarray, converting string-dtype if necessary
|
| 1164 |
+
values = np.asarray(data, dtype=np.object_)
|
| 1165 |
+
|
| 1166 |
+
result = array_to_timedelta64(values, unit=unit, errors=errors)
|
| 1167 |
+
return result.view("timedelta64[ns]")
|
| 1168 |
+
|
| 1169 |
+
|
| 1170 |
+
def _validate_td64_dtype(dtype) -> DtypeObj:
|
| 1171 |
+
dtype = pandas_dtype(dtype)
|
| 1172 |
+
if dtype == np.dtype("m8"):
|
| 1173 |
+
# no precision disallowed GH#24806
|
| 1174 |
+
msg = (
|
| 1175 |
+
"Passing in 'timedelta' dtype with no precision is not allowed. "
|
| 1176 |
+
"Please pass in 'timedelta64[ns]' instead."
|
| 1177 |
+
)
|
| 1178 |
+
raise ValueError(msg)
|
| 1179 |
+
|
| 1180 |
+
if not lib.is_np_dtype(dtype, "m"):
|
| 1181 |
+
raise ValueError(f"dtype '{dtype}' is invalid, should be np.timedelta64 dtype")
|
| 1182 |
+
elif not is_supported_dtype(dtype):
|
| 1183 |
+
raise ValueError("Supported timedelta64 resolutions are 's', 'ms', 'us', 'ns'")
|
| 1184 |
+
|
| 1185 |
+
return dtype
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/internals/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (1.87 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/internals/__pycache__/api.cpython-312.pyc
ADDED
|
Binary file (4.09 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/internals/__pycache__/array_manager.cpython-312.pyc
ADDED
|
Binary file (54.4 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/internals/__pycache__/base.cpython-312.pyc
ADDED
|
Binary file (14.3 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/internals/__pycache__/blocks.cpython-312.pyc
ADDED
|
Binary file (93.8 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/internals/__pycache__/concat.cpython-312.pyc
ADDED
|
Binary file (22.2 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/internals/__pycache__/construction.cpython-312.pyc
ADDED
|
Binary file (35.8 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/internals/__pycache__/managers.cpython-312.pyc
ADDED
|
Binary file (91.5 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/internals/__pycache__/ops.cpython-312.pyc
ADDED
|
Binary file (5.02 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/methods/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (225 Bytes). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/methods/__pycache__/describe.cpython-312.pyc
ADDED
|
Binary file (15.1 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/methods/__pycache__/selectn.cpython-312.pyc
ADDED
|
Binary file (9.24 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/methods/__pycache__/to_dict.cpython-312.pyc
ADDED
|
Binary file (11.5 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/ops/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (1.53 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/ops/__pycache__/array_ops.cpython-312.pyc
ADDED
|
Binary file (18.5 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/ops/__pycache__/common.cpython-312.pyc
ADDED
|
Binary file (4.34 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/ops/__pycache__/dispatch.cpython-312.pyc
ADDED
|
Binary file (1.01 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/ops/__pycache__/docstrings.cpython-312.pyc
ADDED
|
Binary file (16.4 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/ops/__pycache__/invalid.cpython-312.pyc
ADDED
|
Binary file (2.32 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/ops/__pycache__/mask_ops.cpython-312.pyc
ADDED
|
Binary file (5.4 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/ops/__pycache__/missing.cpython-312.pyc
ADDED
|
Binary file (5.66 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/tools/__pycache__/datetimes.cpython-312.pyc
ADDED
|
Binary file (45.4 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/tools/__pycache__/numeric.cpython-312.pyc
ADDED
|
Binary file (11 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/tools/__pycache__/timedeltas.cpython-312.pyc
ADDED
|
Binary file (8.95 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/window/__pycache__/online.cpython-312.pyc
ADDED
|
Binary file (4.9 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/errors/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (31 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/io/clipboard/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (29.5 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/io/excel/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (739 Bytes). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/io/excel/__pycache__/_base.cpython-312.pyc
ADDED
|
Binary file (58.9 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/io/excel/__pycache__/_calamine.cpython-312.pyc
ADDED
|
Binary file (5.08 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/io/excel/__pycache__/_odfreader.cpython-312.pyc
ADDED
|
Binary file (10.5 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/io/excel/__pycache__/_odswriter.cpython-312.pyc
ADDED
|
Binary file (13.3 kB). View file
|
|
|