Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/__init__.py +0 -0
- py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_can_hold_element.py +79 -0
- py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_construct_from_scalar.py +55 -0
- py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_construct_ndarray.py +36 -0
- py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_construct_object_arr.py +20 -0
- py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_dict_compat.py +14 -0
- py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_downcast.py +97 -0
- py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_find_common_type.py +175 -0
- py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_infer_datetimelike.py +28 -0
- py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_infer_dtype.py +216 -0
- py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_maybe_box_native.py +40 -0
- py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_promote.py +530 -0
- py311/lib/python3.11/site-packages/pandas/tests/extension/base/casting.py +87 -0
- py311/lib/python3.11/site-packages/pandas/tests/extension/base/constructors.py +142 -0
- py311/lib/python3.11/site-packages/pandas/tests/extension/base/dim2.py +345 -0
- py311/lib/python3.11/site-packages/pandas/tests/extension/base/setitem.py +451 -0
- py311/lib/python3.11/site-packages/pandas/tests/extension/decimal/__init__.py +8 -0
- py311/lib/python3.11/site-packages/pandas/tests/extension/decimal/array.py +311 -0
- py311/lib/python3.11/site-packages/pandas/tests/extension/decimal/test_decimal.py +587 -0
- py311/lib/python3.11/site-packages/pandas/tests/extension/list/__init__.py +7 -0
- py311/lib/python3.11/site-packages/pandas/tests/extension/list/array.py +137 -0
- py311/lib/python3.11/site-packages/pandas/tests/extension/list/test_list.py +33 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/__init__.py +0 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_append.py +62 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_category.py +391 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_constructors.py +142 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_equals.py +96 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_fillna.py +54 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_formats.py +120 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_indexing.py +420 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_map.py +144 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_reindex.py +78 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_setops.py +18 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/__init__.py +0 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_arithmetic.py +56 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_constructors.py +1204 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_date_range.py +1721 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_datetime.py +216 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_formats.py +356 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_freq_attr.py +61 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_indexing.py +717 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_iter.py +76 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_join.py +153 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_npfuncs.py +13 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_ops.py +56 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_partial_slicing.py +466 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_pickle.py +45 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_reindex.py +56 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_scalar_compat.py +329 -0
- py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_setops.py +666 -0
py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/__init__.py
ADDED
|
File without changes
|
py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_can_hold_element.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
|
| 3 |
+
from pandas.core.dtypes.cast import can_hold_element
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def test_can_hold_element_range(any_int_numpy_dtype):
|
| 7 |
+
# GH#44261
|
| 8 |
+
dtype = np.dtype(any_int_numpy_dtype)
|
| 9 |
+
arr = np.array([], dtype=dtype)
|
| 10 |
+
|
| 11 |
+
rng = range(2, 127)
|
| 12 |
+
assert can_hold_element(arr, rng)
|
| 13 |
+
|
| 14 |
+
# negatives -> can't be held by uint dtypes
|
| 15 |
+
rng = range(-2, 127)
|
| 16 |
+
if dtype.kind == "i":
|
| 17 |
+
assert can_hold_element(arr, rng)
|
| 18 |
+
else:
|
| 19 |
+
assert not can_hold_element(arr, rng)
|
| 20 |
+
|
| 21 |
+
rng = range(2, 255)
|
| 22 |
+
if dtype == "int8":
|
| 23 |
+
assert not can_hold_element(arr, rng)
|
| 24 |
+
else:
|
| 25 |
+
assert can_hold_element(arr, rng)
|
| 26 |
+
|
| 27 |
+
rng = range(-255, 65537)
|
| 28 |
+
if dtype.kind == "u":
|
| 29 |
+
assert not can_hold_element(arr, rng)
|
| 30 |
+
elif dtype.itemsize < 4:
|
| 31 |
+
assert not can_hold_element(arr, rng)
|
| 32 |
+
else:
|
| 33 |
+
assert can_hold_element(arr, rng)
|
| 34 |
+
|
| 35 |
+
# empty
|
| 36 |
+
rng = range(-(10**10), -(10**10))
|
| 37 |
+
assert len(rng) == 0
|
| 38 |
+
# assert can_hold_element(arr, rng)
|
| 39 |
+
|
| 40 |
+
rng = range(10**10, 10**10)
|
| 41 |
+
assert len(rng) == 0
|
| 42 |
+
assert can_hold_element(arr, rng)
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def test_can_hold_element_int_values_float_ndarray():
|
| 46 |
+
arr = np.array([], dtype=np.int64)
|
| 47 |
+
|
| 48 |
+
element = np.array([1.0, 2.0])
|
| 49 |
+
assert can_hold_element(arr, element)
|
| 50 |
+
|
| 51 |
+
assert not can_hold_element(arr, element + 0.5)
|
| 52 |
+
|
| 53 |
+
# integer but not losslessly castable to int64
|
| 54 |
+
element = np.array([3, 2**65], dtype=np.float64)
|
| 55 |
+
assert not can_hold_element(arr, element)
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def test_can_hold_element_int8_int():
|
| 59 |
+
arr = np.array([], dtype=np.int8)
|
| 60 |
+
|
| 61 |
+
element = 2
|
| 62 |
+
assert can_hold_element(arr, element)
|
| 63 |
+
assert can_hold_element(arr, np.int8(element))
|
| 64 |
+
assert can_hold_element(arr, np.uint8(element))
|
| 65 |
+
assert can_hold_element(arr, np.int16(element))
|
| 66 |
+
assert can_hold_element(arr, np.uint16(element))
|
| 67 |
+
assert can_hold_element(arr, np.int32(element))
|
| 68 |
+
assert can_hold_element(arr, np.uint32(element))
|
| 69 |
+
assert can_hold_element(arr, np.int64(element))
|
| 70 |
+
assert can_hold_element(arr, np.uint64(element))
|
| 71 |
+
|
| 72 |
+
element = 2**9
|
| 73 |
+
assert not can_hold_element(arr, element)
|
| 74 |
+
assert not can_hold_element(arr, np.int16(element))
|
| 75 |
+
assert not can_hold_element(arr, np.uint16(element))
|
| 76 |
+
assert not can_hold_element(arr, np.int32(element))
|
| 77 |
+
assert not can_hold_element(arr, np.uint32(element))
|
| 78 |
+
assert not can_hold_element(arr, np.int64(element))
|
| 79 |
+
assert not can_hold_element(arr, np.uint64(element))
|
py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_construct_from_scalar.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
from pandas.core.dtypes.cast import construct_1d_arraylike_from_scalar
|
| 5 |
+
from pandas.core.dtypes.dtypes import CategoricalDtype
|
| 6 |
+
|
| 7 |
+
from pandas import (
|
| 8 |
+
Categorical,
|
| 9 |
+
Timedelta,
|
| 10 |
+
)
|
| 11 |
+
import pandas._testing as tm
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def test_cast_1d_array_like_from_scalar_categorical():
|
| 15 |
+
# see gh-19565
|
| 16 |
+
#
|
| 17 |
+
# Categorical result from scalar did not maintain
|
| 18 |
+
# categories and ordering of the passed dtype.
|
| 19 |
+
cats = ["a", "b", "c"]
|
| 20 |
+
cat_type = CategoricalDtype(categories=cats, ordered=False)
|
| 21 |
+
expected = Categorical(["a", "a"], categories=cats)
|
| 22 |
+
|
| 23 |
+
result = construct_1d_arraylike_from_scalar("a", len(expected), cat_type)
|
| 24 |
+
tm.assert_categorical_equal(result, expected)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def test_cast_1d_array_like_from_timestamp(fixed_now_ts):
|
| 28 |
+
# check we dont lose nanoseconds
|
| 29 |
+
ts = fixed_now_ts + Timedelta(1)
|
| 30 |
+
res = construct_1d_arraylike_from_scalar(ts, 2, np.dtype("M8[ns]"))
|
| 31 |
+
assert res[0] == ts
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def test_cast_1d_array_like_from_timedelta():
|
| 35 |
+
# check we dont lose nanoseconds
|
| 36 |
+
td = Timedelta(1)
|
| 37 |
+
res = construct_1d_arraylike_from_scalar(td, 2, np.dtype("m8[ns]"))
|
| 38 |
+
assert res[0] == td
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def test_cast_1d_array_like_mismatched_datetimelike():
|
| 42 |
+
td = np.timedelta64("NaT", "ns")
|
| 43 |
+
dt = np.datetime64("NaT", "ns")
|
| 44 |
+
|
| 45 |
+
with pytest.raises(TypeError, match="Cannot cast"):
|
| 46 |
+
construct_1d_arraylike_from_scalar(td, 2, dt.dtype)
|
| 47 |
+
|
| 48 |
+
with pytest.raises(TypeError, match="Cannot cast"):
|
| 49 |
+
construct_1d_arraylike_from_scalar(np.timedelta64(4, "ns"), 2, dt.dtype)
|
| 50 |
+
|
| 51 |
+
with pytest.raises(TypeError, match="Cannot cast"):
|
| 52 |
+
construct_1d_arraylike_from_scalar(dt, 2, td.dtype)
|
| 53 |
+
|
| 54 |
+
with pytest.raises(TypeError, match="Cannot cast"):
|
| 55 |
+
construct_1d_arraylike_from_scalar(np.datetime64(4, "ns"), 2, td.dtype)
|
py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_construct_ndarray.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import pandas._testing as tm
|
| 6 |
+
from pandas.core.construction import sanitize_array
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
@pytest.mark.parametrize(
|
| 10 |
+
"values, dtype, expected",
|
| 11 |
+
[
|
| 12 |
+
([1, 2, 3], None, np.array([1, 2, 3], dtype=np.int64)),
|
| 13 |
+
(np.array([1, 2, 3]), None, np.array([1, 2, 3])),
|
| 14 |
+
(["1", "2", None], None, np.array(["1", "2", None])),
|
| 15 |
+
(["1", "2", None], np.dtype("str"), np.array(["1", "2", None])),
|
| 16 |
+
([1, 2, None], np.dtype("str"), np.array(["1", "2", None])),
|
| 17 |
+
],
|
| 18 |
+
)
|
| 19 |
+
def test_construct_1d_ndarray_preserving_na(
|
| 20 |
+
values, dtype, expected, using_infer_string
|
| 21 |
+
):
|
| 22 |
+
result = sanitize_array(values, index=None, dtype=dtype)
|
| 23 |
+
if using_infer_string and expected.dtype == object and dtype is None:
|
| 24 |
+
tm.assert_extension_array_equal(result, pd.array(expected, dtype="str"))
|
| 25 |
+
else:
|
| 26 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
@pytest.mark.parametrize("dtype", ["m8[ns]", "M8[ns]"])
|
| 30 |
+
def test_construct_1d_ndarray_preserving_na_datetimelike(dtype):
|
| 31 |
+
arr = np.arange(5, dtype=np.int64).view(dtype)
|
| 32 |
+
expected = np.array(list(arr), dtype=object)
|
| 33 |
+
assert all(isinstance(x, type(arr[0])) for x in expected)
|
| 34 |
+
|
| 35 |
+
result = sanitize_array(arr, index=None, dtype=np.dtype(object))
|
| 36 |
+
tm.assert_numpy_array_equal(result, expected)
|
py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_construct_object_arr.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
|
| 3 |
+
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
@pytest.mark.parametrize("datum1", [1, 2.0, "3", (4, 5), [6, 7], None])
|
| 7 |
+
@pytest.mark.parametrize("datum2", [8, 9.0, "10", (11, 12), [13, 14], None])
|
| 8 |
+
def test_cast_1d_array(datum1, datum2):
|
| 9 |
+
data = [datum1, datum2]
|
| 10 |
+
result = construct_1d_object_array_from_listlike(data)
|
| 11 |
+
|
| 12 |
+
# Direct comparison fails: https://github.com/numpy/numpy/issues/10218
|
| 13 |
+
assert result.dtype == "object"
|
| 14 |
+
assert list(result) == data
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
@pytest.mark.parametrize("val", [1, 2.0, None])
|
| 18 |
+
def test_cast_1d_array_invalid_scalar(val):
|
| 19 |
+
with pytest.raises(TypeError, match="has no len()"):
|
| 20 |
+
construct_1d_object_array_from_listlike(val)
|
py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_dict_compat.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
|
| 3 |
+
from pandas.core.dtypes.cast import dict_compat
|
| 4 |
+
|
| 5 |
+
from pandas import Timestamp
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def test_dict_compat():
|
| 9 |
+
data_datetime64 = {np.datetime64("1990-03-15"): 1, np.datetime64("2015-03-15"): 2}
|
| 10 |
+
data_unchanged = {1: 2, 3: 4, 5: 6}
|
| 11 |
+
expected = {Timestamp("1990-3-15"): 1, Timestamp("2015-03-15"): 2}
|
| 12 |
+
assert dict_compat(data_datetime64) == expected
|
| 13 |
+
assert dict_compat(expected) == expected
|
| 14 |
+
assert dict_compat(data_unchanged) == data_unchanged
|
py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_downcast.py
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import decimal
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
import pytest
|
| 5 |
+
|
| 6 |
+
from pandas.core.dtypes.cast import maybe_downcast_to_dtype
|
| 7 |
+
|
| 8 |
+
from pandas import (
|
| 9 |
+
Series,
|
| 10 |
+
Timedelta,
|
| 11 |
+
)
|
| 12 |
+
import pandas._testing as tm
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
@pytest.mark.parametrize(
|
| 16 |
+
"arr,dtype,expected",
|
| 17 |
+
[
|
| 18 |
+
(
|
| 19 |
+
np.array([8.5, 8.6, 8.7, 8.8, 8.9999999999995]),
|
| 20 |
+
"infer",
|
| 21 |
+
np.array([8.5, 8.6, 8.7, 8.8, 8.9999999999995]),
|
| 22 |
+
),
|
| 23 |
+
(
|
| 24 |
+
np.array([8.0, 8.0, 8.0, 8.0, 8.9999999999995]),
|
| 25 |
+
"infer",
|
| 26 |
+
np.array([8, 8, 8, 8, 9], dtype=np.int64),
|
| 27 |
+
),
|
| 28 |
+
(
|
| 29 |
+
np.array([8.0, 8.0, 8.0, 8.0, 9.0000000000005]),
|
| 30 |
+
"infer",
|
| 31 |
+
np.array([8, 8, 8, 8, 9], dtype=np.int64),
|
| 32 |
+
),
|
| 33 |
+
(
|
| 34 |
+
# This is a judgement call, but we do _not_ downcast Decimal
|
| 35 |
+
# objects
|
| 36 |
+
np.array([decimal.Decimal(0.0)]),
|
| 37 |
+
"int64",
|
| 38 |
+
np.array([decimal.Decimal(0.0)]),
|
| 39 |
+
),
|
| 40 |
+
(
|
| 41 |
+
# GH#45837
|
| 42 |
+
np.array([Timedelta(days=1), Timedelta(days=2)], dtype=object),
|
| 43 |
+
"infer",
|
| 44 |
+
np.array([1, 2], dtype="m8[D]").astype("m8[ns]"),
|
| 45 |
+
),
|
| 46 |
+
# TODO: similar for dt64, dt64tz, Period, Interval?
|
| 47 |
+
],
|
| 48 |
+
)
|
| 49 |
+
def test_downcast(arr, expected, dtype):
|
| 50 |
+
result = maybe_downcast_to_dtype(arr, dtype)
|
| 51 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def test_downcast_booleans():
|
| 55 |
+
# see gh-16875: coercing of booleans.
|
| 56 |
+
ser = Series([True, True, False])
|
| 57 |
+
result = maybe_downcast_to_dtype(ser, np.dtype(np.float64))
|
| 58 |
+
|
| 59 |
+
expected = ser.values
|
| 60 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def test_downcast_conversion_no_nan(any_real_numpy_dtype):
|
| 64 |
+
dtype = any_real_numpy_dtype
|
| 65 |
+
expected = np.array([1, 2])
|
| 66 |
+
arr = np.array([1.0, 2.0], dtype=dtype)
|
| 67 |
+
|
| 68 |
+
result = maybe_downcast_to_dtype(arr, "infer")
|
| 69 |
+
tm.assert_almost_equal(result, expected, check_dtype=False)
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def test_downcast_conversion_nan(float_numpy_dtype):
|
| 73 |
+
dtype = float_numpy_dtype
|
| 74 |
+
data = [1.0, 2.0, np.nan]
|
| 75 |
+
|
| 76 |
+
expected = np.array(data, dtype=dtype)
|
| 77 |
+
arr = np.array(data, dtype=dtype)
|
| 78 |
+
|
| 79 |
+
result = maybe_downcast_to_dtype(arr, "infer")
|
| 80 |
+
tm.assert_almost_equal(result, expected)
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def test_downcast_conversion_empty(any_real_numpy_dtype):
|
| 84 |
+
dtype = any_real_numpy_dtype
|
| 85 |
+
arr = np.array([], dtype=dtype)
|
| 86 |
+
result = maybe_downcast_to_dtype(arr, np.dtype("int64"))
|
| 87 |
+
tm.assert_numpy_array_equal(result, np.array([], dtype=np.int64))
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
@pytest.mark.parametrize("klass", [np.datetime64, np.timedelta64])
|
| 91 |
+
def test_datetime_likes_nan(klass):
|
| 92 |
+
dtype = klass.__name__ + "[ns]"
|
| 93 |
+
arr = np.array([1, 2, np.nan])
|
| 94 |
+
|
| 95 |
+
exp = np.array([1, 2, klass("NaT")], dtype)
|
| 96 |
+
res = maybe_downcast_to_dtype(arr, dtype)
|
| 97 |
+
tm.assert_numpy_array_equal(res, exp)
|
py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_find_common_type.py
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
from pandas.core.dtypes.cast import find_common_type
|
| 5 |
+
from pandas.core.dtypes.common import pandas_dtype
|
| 6 |
+
from pandas.core.dtypes.dtypes import (
|
| 7 |
+
CategoricalDtype,
|
| 8 |
+
DatetimeTZDtype,
|
| 9 |
+
IntervalDtype,
|
| 10 |
+
PeriodDtype,
|
| 11 |
+
)
|
| 12 |
+
|
| 13 |
+
from pandas import (
|
| 14 |
+
Categorical,
|
| 15 |
+
Index,
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
@pytest.mark.parametrize(
|
| 20 |
+
"source_dtypes,expected_common_dtype",
|
| 21 |
+
[
|
| 22 |
+
((np.int64,), np.int64),
|
| 23 |
+
((np.uint64,), np.uint64),
|
| 24 |
+
((np.float32,), np.float32),
|
| 25 |
+
((object,), object),
|
| 26 |
+
# Into ints.
|
| 27 |
+
((np.int16, np.int64), np.int64),
|
| 28 |
+
((np.int32, np.uint32), np.int64),
|
| 29 |
+
((np.uint16, np.uint64), np.uint64),
|
| 30 |
+
# Into floats.
|
| 31 |
+
((np.float16, np.float32), np.float32),
|
| 32 |
+
((np.float16, np.int16), np.float32),
|
| 33 |
+
((np.float32, np.int16), np.float32),
|
| 34 |
+
((np.uint64, np.int64), np.float64),
|
| 35 |
+
((np.int16, np.float64), np.float64),
|
| 36 |
+
((np.float16, np.int64), np.float64),
|
| 37 |
+
# Into others.
|
| 38 |
+
((np.complex128, np.int32), np.complex128),
|
| 39 |
+
((object, np.float32), object),
|
| 40 |
+
((object, np.int16), object),
|
| 41 |
+
# Bool with int.
|
| 42 |
+
((np.dtype("bool"), np.int64), object),
|
| 43 |
+
((np.dtype("bool"), np.int32), object),
|
| 44 |
+
((np.dtype("bool"), np.int16), object),
|
| 45 |
+
((np.dtype("bool"), np.int8), object),
|
| 46 |
+
((np.dtype("bool"), np.uint64), object),
|
| 47 |
+
((np.dtype("bool"), np.uint32), object),
|
| 48 |
+
((np.dtype("bool"), np.uint16), object),
|
| 49 |
+
((np.dtype("bool"), np.uint8), object),
|
| 50 |
+
# Bool with float.
|
| 51 |
+
((np.dtype("bool"), np.float64), object),
|
| 52 |
+
((np.dtype("bool"), np.float32), object),
|
| 53 |
+
(
|
| 54 |
+
(np.dtype("datetime64[ns]"), np.dtype("datetime64[ns]")),
|
| 55 |
+
np.dtype("datetime64[ns]"),
|
| 56 |
+
),
|
| 57 |
+
(
|
| 58 |
+
(np.dtype("timedelta64[ns]"), np.dtype("timedelta64[ns]")),
|
| 59 |
+
np.dtype("timedelta64[ns]"),
|
| 60 |
+
),
|
| 61 |
+
(
|
| 62 |
+
(np.dtype("datetime64[ns]"), np.dtype("datetime64[ms]")),
|
| 63 |
+
np.dtype("datetime64[ns]"),
|
| 64 |
+
),
|
| 65 |
+
(
|
| 66 |
+
(np.dtype("timedelta64[ms]"), np.dtype("timedelta64[ns]")),
|
| 67 |
+
np.dtype("timedelta64[ns]"),
|
| 68 |
+
),
|
| 69 |
+
((np.dtype("datetime64[ns]"), np.dtype("timedelta64[ns]")), object),
|
| 70 |
+
((np.dtype("datetime64[ns]"), np.int64), object),
|
| 71 |
+
],
|
| 72 |
+
)
|
| 73 |
+
def test_numpy_dtypes(source_dtypes, expected_common_dtype):
|
| 74 |
+
source_dtypes = [pandas_dtype(x) for x in source_dtypes]
|
| 75 |
+
assert find_common_type(source_dtypes) == expected_common_dtype
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def test_raises_empty_input():
|
| 79 |
+
with pytest.raises(ValueError, match="no types given"):
|
| 80 |
+
find_common_type([])
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
@pytest.mark.parametrize(
|
| 84 |
+
"dtypes,exp_type",
|
| 85 |
+
[
|
| 86 |
+
([CategoricalDtype()], "category"),
|
| 87 |
+
([object, CategoricalDtype()], object),
|
| 88 |
+
([CategoricalDtype(), CategoricalDtype()], "category"),
|
| 89 |
+
],
|
| 90 |
+
)
|
| 91 |
+
def test_categorical_dtype(dtypes, exp_type):
|
| 92 |
+
assert find_common_type(dtypes) == exp_type
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def test_datetimetz_dtype_match():
|
| 96 |
+
dtype = DatetimeTZDtype(unit="ns", tz="US/Eastern")
|
| 97 |
+
assert find_common_type([dtype, dtype]) == "datetime64[ns, US/Eastern]"
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
@pytest.mark.parametrize(
|
| 101 |
+
"dtype2",
|
| 102 |
+
[
|
| 103 |
+
DatetimeTZDtype(unit="ns", tz="Asia/Tokyo"),
|
| 104 |
+
np.dtype("datetime64[ns]"),
|
| 105 |
+
object,
|
| 106 |
+
np.int64,
|
| 107 |
+
],
|
| 108 |
+
)
|
| 109 |
+
def test_datetimetz_dtype_mismatch(dtype2):
|
| 110 |
+
dtype = DatetimeTZDtype(unit="ns", tz="US/Eastern")
|
| 111 |
+
assert find_common_type([dtype, dtype2]) == object
|
| 112 |
+
assert find_common_type([dtype2, dtype]) == object
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
def test_period_dtype_match():
|
| 116 |
+
dtype = PeriodDtype(freq="D")
|
| 117 |
+
assert find_common_type([dtype, dtype]) == "period[D]"
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
@pytest.mark.parametrize(
|
| 121 |
+
"dtype2",
|
| 122 |
+
[
|
| 123 |
+
DatetimeTZDtype(unit="ns", tz="Asia/Tokyo"),
|
| 124 |
+
PeriodDtype(freq="2D"),
|
| 125 |
+
PeriodDtype(freq="h"),
|
| 126 |
+
np.dtype("datetime64[ns]"),
|
| 127 |
+
object,
|
| 128 |
+
np.int64,
|
| 129 |
+
],
|
| 130 |
+
)
|
| 131 |
+
def test_period_dtype_mismatch(dtype2):
|
| 132 |
+
dtype = PeriodDtype(freq="D")
|
| 133 |
+
assert find_common_type([dtype, dtype2]) == object
|
| 134 |
+
assert find_common_type([dtype2, dtype]) == object
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
interval_dtypes = [
|
| 138 |
+
IntervalDtype(np.int64, "right"),
|
| 139 |
+
IntervalDtype(np.float64, "right"),
|
| 140 |
+
IntervalDtype(np.uint64, "right"),
|
| 141 |
+
IntervalDtype(DatetimeTZDtype(unit="ns", tz="US/Eastern"), "right"),
|
| 142 |
+
IntervalDtype("M8[ns]", "right"),
|
| 143 |
+
IntervalDtype("m8[ns]", "right"),
|
| 144 |
+
]
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
@pytest.mark.parametrize("left", interval_dtypes)
|
| 148 |
+
@pytest.mark.parametrize("right", interval_dtypes)
|
| 149 |
+
def test_interval_dtype(left, right):
|
| 150 |
+
result = find_common_type([left, right])
|
| 151 |
+
|
| 152 |
+
if left is right:
|
| 153 |
+
assert result is left
|
| 154 |
+
|
| 155 |
+
elif left.subtype.kind in ["i", "u", "f"]:
|
| 156 |
+
# i.e. numeric
|
| 157 |
+
if right.subtype.kind in ["i", "u", "f"]:
|
| 158 |
+
# both numeric -> common numeric subtype
|
| 159 |
+
expected = IntervalDtype(np.float64, "right")
|
| 160 |
+
assert result == expected
|
| 161 |
+
else:
|
| 162 |
+
assert result == object
|
| 163 |
+
|
| 164 |
+
else:
|
| 165 |
+
assert result == object
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
@pytest.mark.parametrize("dtype", interval_dtypes)
|
| 169 |
+
def test_interval_dtype_with_categorical(dtype):
|
| 170 |
+
obj = Index([], dtype=dtype)
|
| 171 |
+
|
| 172 |
+
cat = Categorical([], categories=obj)
|
| 173 |
+
|
| 174 |
+
result = find_common_type([dtype, cat.dtype])
|
| 175 |
+
assert result == dtype
|
py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_infer_datetimelike.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
from pandas import (
|
| 5 |
+
DataFrame,
|
| 6 |
+
NaT,
|
| 7 |
+
Series,
|
| 8 |
+
Timestamp,
|
| 9 |
+
)
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
@pytest.mark.parametrize(
|
| 13 |
+
"data,exp_size",
|
| 14 |
+
[
|
| 15 |
+
# see gh-16362.
|
| 16 |
+
([[NaT, "a", "b", 0], [NaT, "b", "c", 1]], 8),
|
| 17 |
+
([[NaT, "a", 0], [NaT, "b", 1]], 6),
|
| 18 |
+
],
|
| 19 |
+
)
|
| 20 |
+
def test_maybe_infer_to_datetimelike_df_construct(data, exp_size):
|
| 21 |
+
result = DataFrame(np.array(data))
|
| 22 |
+
assert result.size == exp_size
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def test_maybe_infer_to_datetimelike_ser_construct():
|
| 26 |
+
# see gh-19671.
|
| 27 |
+
result = Series(["M1701", Timestamp("20130101")])
|
| 28 |
+
assert result.dtype.kind == "O"
|
py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_infer_dtype.py
ADDED
|
@@ -0,0 +1,216 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datetime import (
|
| 2 |
+
date,
|
| 3 |
+
datetime,
|
| 4 |
+
timedelta,
|
| 5 |
+
)
|
| 6 |
+
|
| 7 |
+
import numpy as np
|
| 8 |
+
import pytest
|
| 9 |
+
|
| 10 |
+
from pandas.core.dtypes.cast import (
|
| 11 |
+
infer_dtype_from,
|
| 12 |
+
infer_dtype_from_array,
|
| 13 |
+
infer_dtype_from_scalar,
|
| 14 |
+
)
|
| 15 |
+
from pandas.core.dtypes.common import is_dtype_equal
|
| 16 |
+
|
| 17 |
+
from pandas import (
|
| 18 |
+
Categorical,
|
| 19 |
+
Interval,
|
| 20 |
+
Period,
|
| 21 |
+
Series,
|
| 22 |
+
Timedelta,
|
| 23 |
+
Timestamp,
|
| 24 |
+
date_range,
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def test_infer_dtype_from_int_scalar(any_int_numpy_dtype):
|
| 29 |
+
# Test that infer_dtype_from_scalar is
|
| 30 |
+
# returning correct dtype for int and float.
|
| 31 |
+
data = np.dtype(any_int_numpy_dtype).type(12)
|
| 32 |
+
dtype, val = infer_dtype_from_scalar(data)
|
| 33 |
+
assert dtype == type(data)
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def test_infer_dtype_from_float_scalar(float_numpy_dtype):
|
| 37 |
+
float_numpy_dtype = np.dtype(float_numpy_dtype).type
|
| 38 |
+
data = float_numpy_dtype(12)
|
| 39 |
+
|
| 40 |
+
dtype, val = infer_dtype_from_scalar(data)
|
| 41 |
+
assert dtype == float_numpy_dtype
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
@pytest.mark.parametrize(
|
| 45 |
+
"data,exp_dtype", [(12, np.int64), (np.float64(12), np.float64)]
|
| 46 |
+
)
|
| 47 |
+
def test_infer_dtype_from_python_scalar(data, exp_dtype):
|
| 48 |
+
dtype, val = infer_dtype_from_scalar(data)
|
| 49 |
+
assert dtype == exp_dtype
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
@pytest.mark.parametrize("bool_val", [True, False])
|
| 53 |
+
def test_infer_dtype_from_boolean(bool_val):
|
| 54 |
+
dtype, val = infer_dtype_from_scalar(bool_val)
|
| 55 |
+
assert dtype == np.bool_
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def test_infer_dtype_from_complex(complex_dtype):
|
| 59 |
+
data = np.dtype(complex_dtype).type(1)
|
| 60 |
+
dtype, val = infer_dtype_from_scalar(data)
|
| 61 |
+
assert dtype == np.complex128
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def test_infer_dtype_from_datetime():
|
| 65 |
+
dt64 = np.datetime64(1, "ns")
|
| 66 |
+
dtype, val = infer_dtype_from_scalar(dt64)
|
| 67 |
+
assert dtype == "M8[ns]"
|
| 68 |
+
|
| 69 |
+
ts = Timestamp(1)
|
| 70 |
+
dtype, val = infer_dtype_from_scalar(ts)
|
| 71 |
+
assert dtype == "M8[ns]"
|
| 72 |
+
|
| 73 |
+
dt = datetime(2000, 1, 1, 0, 0)
|
| 74 |
+
dtype, val = infer_dtype_from_scalar(dt)
|
| 75 |
+
assert dtype == "M8[us]"
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def test_infer_dtype_from_timedelta():
|
| 79 |
+
td64 = np.timedelta64(1, "ns")
|
| 80 |
+
dtype, val = infer_dtype_from_scalar(td64)
|
| 81 |
+
assert dtype == "m8[ns]"
|
| 82 |
+
|
| 83 |
+
pytd = timedelta(1)
|
| 84 |
+
dtype, val = infer_dtype_from_scalar(pytd)
|
| 85 |
+
assert dtype == "m8[us]"
|
| 86 |
+
|
| 87 |
+
td = Timedelta(1)
|
| 88 |
+
dtype, val = infer_dtype_from_scalar(td)
|
| 89 |
+
assert dtype == "m8[ns]"
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
@pytest.mark.parametrize("freq", ["M", "D"])
|
| 93 |
+
def test_infer_dtype_from_period(freq):
|
| 94 |
+
p = Period("2011-01-01", freq=freq)
|
| 95 |
+
dtype, val = infer_dtype_from_scalar(p)
|
| 96 |
+
|
| 97 |
+
exp_dtype = f"period[{freq}]"
|
| 98 |
+
|
| 99 |
+
assert dtype == exp_dtype
|
| 100 |
+
assert val == p
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
def test_infer_dtype_misc():
|
| 104 |
+
dt = date(2000, 1, 1)
|
| 105 |
+
dtype, val = infer_dtype_from_scalar(dt)
|
| 106 |
+
assert dtype == np.object_
|
| 107 |
+
|
| 108 |
+
ts = Timestamp(1, tz="US/Eastern")
|
| 109 |
+
dtype, val = infer_dtype_from_scalar(ts)
|
| 110 |
+
assert dtype == "datetime64[ns, US/Eastern]"
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
@pytest.mark.parametrize("tz", ["UTC", "US/Eastern", "Asia/Tokyo"])
|
| 114 |
+
def test_infer_from_scalar_tz(tz):
|
| 115 |
+
dt = Timestamp(1, tz=tz)
|
| 116 |
+
dtype, val = infer_dtype_from_scalar(dt)
|
| 117 |
+
|
| 118 |
+
exp_dtype = f"datetime64[ns, {tz}]"
|
| 119 |
+
|
| 120 |
+
assert dtype == exp_dtype
|
| 121 |
+
assert val == dt
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
@pytest.mark.parametrize(
|
| 125 |
+
"left, right, subtype",
|
| 126 |
+
[
|
| 127 |
+
(0, 1, "int64"),
|
| 128 |
+
(0.0, 1.0, "float64"),
|
| 129 |
+
(Timestamp(0), Timestamp(1), "datetime64[ns]"),
|
| 130 |
+
(Timestamp(0, tz="UTC"), Timestamp(1, tz="UTC"), "datetime64[ns, UTC]"),
|
| 131 |
+
(Timedelta(0), Timedelta(1), "timedelta64[ns]"),
|
| 132 |
+
],
|
| 133 |
+
)
|
| 134 |
+
def test_infer_from_interval(left, right, subtype, closed):
|
| 135 |
+
# GH 30337
|
| 136 |
+
interval = Interval(left, right, closed)
|
| 137 |
+
result_dtype, result_value = infer_dtype_from_scalar(interval)
|
| 138 |
+
expected_dtype = f"interval[{subtype}, {closed}]"
|
| 139 |
+
assert result_dtype == expected_dtype
|
| 140 |
+
assert result_value == interval
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
def test_infer_dtype_from_scalar_errors():
|
| 144 |
+
msg = "invalid ndarray passed to infer_dtype_from_scalar"
|
| 145 |
+
|
| 146 |
+
with pytest.raises(ValueError, match=msg):
|
| 147 |
+
infer_dtype_from_scalar(np.array([1]))
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
@pytest.mark.parametrize(
|
| 151 |
+
"value, expected",
|
| 152 |
+
[
|
| 153 |
+
("foo", np.object_),
|
| 154 |
+
(b"foo", np.object_),
|
| 155 |
+
(1, np.int64),
|
| 156 |
+
(1.5, np.float64),
|
| 157 |
+
(np.datetime64("2016-01-01"), np.dtype("M8[s]")),
|
| 158 |
+
(Timestamp("20160101"), np.dtype("M8[s]")),
|
| 159 |
+
(Timestamp("20160101", tz="UTC"), "datetime64[s, UTC]"),
|
| 160 |
+
],
|
| 161 |
+
)
|
| 162 |
+
def test_infer_dtype_from_scalar(value, expected, using_infer_string):
|
| 163 |
+
dtype, _ = infer_dtype_from_scalar(value)
|
| 164 |
+
if using_infer_string and value == "foo":
|
| 165 |
+
expected = "string"
|
| 166 |
+
assert is_dtype_equal(dtype, expected)
|
| 167 |
+
|
| 168 |
+
with pytest.raises(TypeError, match="must be list-like"):
|
| 169 |
+
infer_dtype_from_array(value)
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
@pytest.mark.parametrize(
|
| 173 |
+
"arr, expected",
|
| 174 |
+
[
|
| 175 |
+
([1], np.dtype(int)),
|
| 176 |
+
(np.array([1], dtype=np.int64), np.int64),
|
| 177 |
+
([np.nan, 1, ""], np.object_),
|
| 178 |
+
(np.array([[1.0, 2.0]]), np.float64),
|
| 179 |
+
(Categorical(list("aabc")), "category"),
|
| 180 |
+
(Categorical([1, 2, 3]), "category"),
|
| 181 |
+
(date_range("20160101", periods=3), np.dtype("=M8[ns]")),
|
| 182 |
+
(
|
| 183 |
+
date_range("20160101", periods=3, tz="US/Eastern"),
|
| 184 |
+
"datetime64[ns, US/Eastern]",
|
| 185 |
+
),
|
| 186 |
+
(Series([1.0, 2, 3]), np.float64),
|
| 187 |
+
(Series(list("abc")), np.object_),
|
| 188 |
+
(
|
| 189 |
+
Series(date_range("20160101", periods=3, tz="US/Eastern")),
|
| 190 |
+
"datetime64[ns, US/Eastern]",
|
| 191 |
+
),
|
| 192 |
+
],
|
| 193 |
+
)
|
| 194 |
+
def test_infer_dtype_from_array(arr, expected, using_infer_string):
|
| 195 |
+
dtype, _ = infer_dtype_from_array(arr)
|
| 196 |
+
if (
|
| 197 |
+
using_infer_string
|
| 198 |
+
and isinstance(arr, Series)
|
| 199 |
+
and arr.tolist() == ["a", "b", "c"]
|
| 200 |
+
):
|
| 201 |
+
expected = "string"
|
| 202 |
+
assert is_dtype_equal(dtype, expected)
|
| 203 |
+
|
| 204 |
+
|
| 205 |
+
@pytest.mark.parametrize("cls", [np.datetime64, np.timedelta64])
|
| 206 |
+
def test_infer_dtype_from_scalar_zerodim_datetimelike(cls):
|
| 207 |
+
# ndarray.item() can incorrectly return int instead of td64/dt64
|
| 208 |
+
val = cls(1234, "ns")
|
| 209 |
+
arr = np.array(val)
|
| 210 |
+
|
| 211 |
+
dtype, res = infer_dtype_from_scalar(arr)
|
| 212 |
+
assert dtype.type is cls
|
| 213 |
+
assert isinstance(res, cls)
|
| 214 |
+
|
| 215 |
+
dtype, res = infer_dtype_from(arr)
|
| 216 |
+
assert dtype.type is cls
|
py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_maybe_box_native.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datetime import datetime
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
import pytest
|
| 5 |
+
|
| 6 |
+
from pandas.core.dtypes.cast import maybe_box_native
|
| 7 |
+
|
| 8 |
+
from pandas import (
|
| 9 |
+
Interval,
|
| 10 |
+
Period,
|
| 11 |
+
Timedelta,
|
| 12 |
+
Timestamp,
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
@pytest.mark.parametrize(
|
| 17 |
+
"obj,expected_dtype",
|
| 18 |
+
[
|
| 19 |
+
(b"\x00\x10", bytes),
|
| 20 |
+
(int(4), int),
|
| 21 |
+
(np.uint(4), int),
|
| 22 |
+
(np.int32(-4), int),
|
| 23 |
+
(np.uint8(4), int),
|
| 24 |
+
(float(454.98), float),
|
| 25 |
+
(np.float16(0.4), float),
|
| 26 |
+
(np.float64(1.4), float),
|
| 27 |
+
(np.bool_(False), bool),
|
| 28 |
+
(datetime(2005, 2, 25), datetime),
|
| 29 |
+
(np.datetime64("2005-02-25"), Timestamp),
|
| 30 |
+
(Timestamp("2005-02-25"), Timestamp),
|
| 31 |
+
(np.timedelta64(1, "D"), Timedelta),
|
| 32 |
+
(Timedelta(1, "D"), Timedelta),
|
| 33 |
+
(Interval(0, 1), Interval),
|
| 34 |
+
(Period("4Q2005"), Period),
|
| 35 |
+
],
|
| 36 |
+
)
|
| 37 |
+
def test_maybe_box_native(obj, expected_dtype):
|
| 38 |
+
boxed_obj = maybe_box_native(obj)
|
| 39 |
+
result_dtype = type(boxed_obj)
|
| 40 |
+
assert result_dtype is expected_dtype
|
py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_promote.py
ADDED
|
@@ -0,0 +1,530 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
These test the method maybe_promote from core/dtypes/cast.py
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import datetime
|
| 6 |
+
from decimal import Decimal
|
| 7 |
+
|
| 8 |
+
import numpy as np
|
| 9 |
+
import pytest
|
| 10 |
+
|
| 11 |
+
from pandas._libs.tslibs import NaT
|
| 12 |
+
|
| 13 |
+
from pandas.core.dtypes.cast import maybe_promote
|
| 14 |
+
from pandas.core.dtypes.common import is_scalar
|
| 15 |
+
from pandas.core.dtypes.dtypes import DatetimeTZDtype
|
| 16 |
+
from pandas.core.dtypes.missing import isna
|
| 17 |
+
|
| 18 |
+
import pandas as pd
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar=None):
|
| 22 |
+
"""
|
| 23 |
+
Auxiliary function to unify testing of scalar/array promotion.
|
| 24 |
+
|
| 25 |
+
Parameters
|
| 26 |
+
----------
|
| 27 |
+
dtype : dtype
|
| 28 |
+
The value to pass on as the first argument to maybe_promote.
|
| 29 |
+
fill_value : scalar
|
| 30 |
+
The value to pass on as the second argument to maybe_promote as
|
| 31 |
+
a scalar.
|
| 32 |
+
expected_dtype : dtype
|
| 33 |
+
The expected dtype returned by maybe_promote (by design this is the
|
| 34 |
+
same regardless of whether fill_value was passed as a scalar or in an
|
| 35 |
+
array!).
|
| 36 |
+
exp_val_for_scalar : scalar
|
| 37 |
+
The expected value for the (potentially upcast) fill_value returned by
|
| 38 |
+
maybe_promote.
|
| 39 |
+
"""
|
| 40 |
+
assert is_scalar(fill_value)
|
| 41 |
+
|
| 42 |
+
# here, we pass on fill_value as a scalar directly; the expected value
|
| 43 |
+
# returned from maybe_promote is fill_value, potentially upcast to the
|
| 44 |
+
# returned dtype.
|
| 45 |
+
result_dtype, result_fill_value = maybe_promote(dtype, fill_value)
|
| 46 |
+
expected_fill_value = exp_val_for_scalar
|
| 47 |
+
|
| 48 |
+
assert result_dtype == expected_dtype
|
| 49 |
+
_assert_match(result_fill_value, expected_fill_value)
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def _assert_match(result_fill_value, expected_fill_value):
|
| 53 |
+
# GH#23982/25425 require the same type in addition to equality/NA-ness
|
| 54 |
+
res_type = type(result_fill_value)
|
| 55 |
+
ex_type = type(expected_fill_value)
|
| 56 |
+
|
| 57 |
+
if hasattr(result_fill_value, "dtype"):
|
| 58 |
+
# Compare types in a way that is robust to platform-specific
|
| 59 |
+
# idiosyncrasies where e.g. sometimes we get "ulonglong" as an alias
|
| 60 |
+
# for "uint64" or "intc" as an alias for "int32"
|
| 61 |
+
assert result_fill_value.dtype.kind == expected_fill_value.dtype.kind
|
| 62 |
+
assert result_fill_value.dtype.itemsize == expected_fill_value.dtype.itemsize
|
| 63 |
+
else:
|
| 64 |
+
# On some builds, type comparison fails, e.g. np.int32 != np.int32
|
| 65 |
+
assert res_type == ex_type or res_type.__name__ == ex_type.__name__
|
| 66 |
+
|
| 67 |
+
match_value = result_fill_value == expected_fill_value
|
| 68 |
+
if match_value is pd.NA:
|
| 69 |
+
match_value = False
|
| 70 |
+
|
| 71 |
+
# Note: type check above ensures that we have the _same_ NA value
|
| 72 |
+
# for missing values, None == None (which is checked
|
| 73 |
+
# through match_value above), but np.nan != np.nan and pd.NaT != pd.NaT
|
| 74 |
+
match_missing = isna(result_fill_value) and isna(expected_fill_value)
|
| 75 |
+
|
| 76 |
+
assert match_value or match_missing
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
@pytest.mark.parametrize(
|
| 80 |
+
"dtype, fill_value, expected_dtype",
|
| 81 |
+
[
|
| 82 |
+
# size 8
|
| 83 |
+
("int8", 1, "int8"),
|
| 84 |
+
("int8", np.iinfo("int8").max + 1, "int16"),
|
| 85 |
+
("int8", np.iinfo("int16").max + 1, "int32"),
|
| 86 |
+
("int8", np.iinfo("int32").max + 1, "int64"),
|
| 87 |
+
("int8", np.iinfo("int64").max + 1, "object"),
|
| 88 |
+
("int8", -1, "int8"),
|
| 89 |
+
("int8", np.iinfo("int8").min - 1, "int16"),
|
| 90 |
+
("int8", np.iinfo("int16").min - 1, "int32"),
|
| 91 |
+
("int8", np.iinfo("int32").min - 1, "int64"),
|
| 92 |
+
("int8", np.iinfo("int64").min - 1, "object"),
|
| 93 |
+
# keep signed-ness as long as possible
|
| 94 |
+
("uint8", 1, "uint8"),
|
| 95 |
+
("uint8", np.iinfo("int8").max + 1, "uint8"),
|
| 96 |
+
("uint8", np.iinfo("uint8").max + 1, "uint16"),
|
| 97 |
+
("uint8", np.iinfo("int16").max + 1, "uint16"),
|
| 98 |
+
("uint8", np.iinfo("uint16").max + 1, "uint32"),
|
| 99 |
+
("uint8", np.iinfo("int32").max + 1, "uint32"),
|
| 100 |
+
("uint8", np.iinfo("uint32").max + 1, "uint64"),
|
| 101 |
+
("uint8", np.iinfo("int64").max + 1, "uint64"),
|
| 102 |
+
("uint8", np.iinfo("uint64").max + 1, "object"),
|
| 103 |
+
# max of uint8 cannot be contained in int8
|
| 104 |
+
("uint8", -1, "int16"),
|
| 105 |
+
("uint8", np.iinfo("int8").min - 1, "int16"),
|
| 106 |
+
("uint8", np.iinfo("int16").min - 1, "int32"),
|
| 107 |
+
("uint8", np.iinfo("int32").min - 1, "int64"),
|
| 108 |
+
("uint8", np.iinfo("int64").min - 1, "object"),
|
| 109 |
+
# size 16
|
| 110 |
+
("int16", 1, "int16"),
|
| 111 |
+
("int16", np.iinfo("int8").max + 1, "int16"),
|
| 112 |
+
("int16", np.iinfo("int16").max + 1, "int32"),
|
| 113 |
+
("int16", np.iinfo("int32").max + 1, "int64"),
|
| 114 |
+
("int16", np.iinfo("int64").max + 1, "object"),
|
| 115 |
+
("int16", -1, "int16"),
|
| 116 |
+
("int16", np.iinfo("int8").min - 1, "int16"),
|
| 117 |
+
("int16", np.iinfo("int16").min - 1, "int32"),
|
| 118 |
+
("int16", np.iinfo("int32").min - 1, "int64"),
|
| 119 |
+
("int16", np.iinfo("int64").min - 1, "object"),
|
| 120 |
+
("uint16", 1, "uint16"),
|
| 121 |
+
("uint16", np.iinfo("int8").max + 1, "uint16"),
|
| 122 |
+
("uint16", np.iinfo("uint8").max + 1, "uint16"),
|
| 123 |
+
("uint16", np.iinfo("int16").max + 1, "uint16"),
|
| 124 |
+
("uint16", np.iinfo("uint16").max + 1, "uint32"),
|
| 125 |
+
("uint16", np.iinfo("int32").max + 1, "uint32"),
|
| 126 |
+
("uint16", np.iinfo("uint32").max + 1, "uint64"),
|
| 127 |
+
("uint16", np.iinfo("int64").max + 1, "uint64"),
|
| 128 |
+
("uint16", np.iinfo("uint64").max + 1, "object"),
|
| 129 |
+
("uint16", -1, "int32"),
|
| 130 |
+
("uint16", np.iinfo("int8").min - 1, "int32"),
|
| 131 |
+
("uint16", np.iinfo("int16").min - 1, "int32"),
|
| 132 |
+
("uint16", np.iinfo("int32").min - 1, "int64"),
|
| 133 |
+
("uint16", np.iinfo("int64").min - 1, "object"),
|
| 134 |
+
# size 32
|
| 135 |
+
("int32", 1, "int32"),
|
| 136 |
+
("int32", np.iinfo("int8").max + 1, "int32"),
|
| 137 |
+
("int32", np.iinfo("int16").max + 1, "int32"),
|
| 138 |
+
("int32", np.iinfo("int32").max + 1, "int64"),
|
| 139 |
+
("int32", np.iinfo("int64").max + 1, "object"),
|
| 140 |
+
("int32", -1, "int32"),
|
| 141 |
+
("int32", np.iinfo("int8").min - 1, "int32"),
|
| 142 |
+
("int32", np.iinfo("int16").min - 1, "int32"),
|
| 143 |
+
("int32", np.iinfo("int32").min - 1, "int64"),
|
| 144 |
+
("int32", np.iinfo("int64").min - 1, "object"),
|
| 145 |
+
("uint32", 1, "uint32"),
|
| 146 |
+
("uint32", np.iinfo("int8").max + 1, "uint32"),
|
| 147 |
+
("uint32", np.iinfo("uint8").max + 1, "uint32"),
|
| 148 |
+
("uint32", np.iinfo("int16").max + 1, "uint32"),
|
| 149 |
+
("uint32", np.iinfo("uint16").max + 1, "uint32"),
|
| 150 |
+
("uint32", np.iinfo("int32").max + 1, "uint32"),
|
| 151 |
+
("uint32", np.iinfo("uint32").max + 1, "uint64"),
|
| 152 |
+
("uint32", np.iinfo("int64").max + 1, "uint64"),
|
| 153 |
+
("uint32", np.iinfo("uint64").max + 1, "object"),
|
| 154 |
+
("uint32", -1, "int64"),
|
| 155 |
+
("uint32", np.iinfo("int8").min - 1, "int64"),
|
| 156 |
+
("uint32", np.iinfo("int16").min - 1, "int64"),
|
| 157 |
+
("uint32", np.iinfo("int32").min - 1, "int64"),
|
| 158 |
+
("uint32", np.iinfo("int64").min - 1, "object"),
|
| 159 |
+
# size 64
|
| 160 |
+
("int64", 1, "int64"),
|
| 161 |
+
("int64", np.iinfo("int8").max + 1, "int64"),
|
| 162 |
+
("int64", np.iinfo("int16").max + 1, "int64"),
|
| 163 |
+
("int64", np.iinfo("int32").max + 1, "int64"),
|
| 164 |
+
("int64", np.iinfo("int64").max + 1, "object"),
|
| 165 |
+
("int64", -1, "int64"),
|
| 166 |
+
("int64", np.iinfo("int8").min - 1, "int64"),
|
| 167 |
+
("int64", np.iinfo("int16").min - 1, "int64"),
|
| 168 |
+
("int64", np.iinfo("int32").min - 1, "int64"),
|
| 169 |
+
("int64", np.iinfo("int64").min - 1, "object"),
|
| 170 |
+
("uint64", 1, "uint64"),
|
| 171 |
+
("uint64", np.iinfo("int8").max + 1, "uint64"),
|
| 172 |
+
("uint64", np.iinfo("uint8").max + 1, "uint64"),
|
| 173 |
+
("uint64", np.iinfo("int16").max + 1, "uint64"),
|
| 174 |
+
("uint64", np.iinfo("uint16").max + 1, "uint64"),
|
| 175 |
+
("uint64", np.iinfo("int32").max + 1, "uint64"),
|
| 176 |
+
("uint64", np.iinfo("uint32").max + 1, "uint64"),
|
| 177 |
+
("uint64", np.iinfo("int64").max + 1, "uint64"),
|
| 178 |
+
("uint64", np.iinfo("uint64").max + 1, "object"),
|
| 179 |
+
("uint64", -1, "object"),
|
| 180 |
+
("uint64", np.iinfo("int8").min - 1, "object"),
|
| 181 |
+
("uint64", np.iinfo("int16").min - 1, "object"),
|
| 182 |
+
("uint64", np.iinfo("int32").min - 1, "object"),
|
| 183 |
+
("uint64", np.iinfo("int64").min - 1, "object"),
|
| 184 |
+
],
|
| 185 |
+
)
|
| 186 |
+
def test_maybe_promote_int_with_int(dtype, fill_value, expected_dtype):
|
| 187 |
+
dtype = np.dtype(dtype)
|
| 188 |
+
expected_dtype = np.dtype(expected_dtype)
|
| 189 |
+
|
| 190 |
+
# output is not a generic int, but corresponds to expected_dtype
|
| 191 |
+
exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0]
|
| 192 |
+
|
| 193 |
+
_check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
def test_maybe_promote_int_with_float(any_int_numpy_dtype, float_numpy_dtype):
|
| 197 |
+
dtype = np.dtype(any_int_numpy_dtype)
|
| 198 |
+
fill_dtype = np.dtype(float_numpy_dtype)
|
| 199 |
+
|
| 200 |
+
# create array of given dtype; casts "1" to correct dtype
|
| 201 |
+
fill_value = np.array([1], dtype=fill_dtype)[0]
|
| 202 |
+
|
| 203 |
+
# filling int with float always upcasts to float64
|
| 204 |
+
expected_dtype = np.float64
|
| 205 |
+
# fill_value can be different float type
|
| 206 |
+
exp_val_for_scalar = np.float64(fill_value)
|
| 207 |
+
|
| 208 |
+
_check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
|
| 209 |
+
|
| 210 |
+
|
| 211 |
+
def test_maybe_promote_float_with_int(float_numpy_dtype, any_int_numpy_dtype):
|
| 212 |
+
dtype = np.dtype(float_numpy_dtype)
|
| 213 |
+
fill_dtype = np.dtype(any_int_numpy_dtype)
|
| 214 |
+
|
| 215 |
+
# create array of given dtype; casts "1" to correct dtype
|
| 216 |
+
fill_value = np.array([1], dtype=fill_dtype)[0]
|
| 217 |
+
|
| 218 |
+
# filling float with int always keeps float dtype
|
| 219 |
+
# because: np.finfo('float32').max > np.iinfo('uint64').max
|
| 220 |
+
expected_dtype = dtype
|
| 221 |
+
# output is not a generic float, but corresponds to expected_dtype
|
| 222 |
+
exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0]
|
| 223 |
+
|
| 224 |
+
_check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
|
| 225 |
+
|
| 226 |
+
|
| 227 |
+
@pytest.mark.parametrize(
|
| 228 |
+
"dtype, fill_value, expected_dtype",
|
| 229 |
+
[
|
| 230 |
+
# float filled with float
|
| 231 |
+
("float32", 1, "float32"),
|
| 232 |
+
("float32", float(np.finfo("float32").max) * 1.1, "float64"),
|
| 233 |
+
("float64", 1, "float64"),
|
| 234 |
+
("float64", float(np.finfo("float32").max) * 1.1, "float64"),
|
| 235 |
+
# complex filled with float
|
| 236 |
+
("complex64", 1, "complex64"),
|
| 237 |
+
("complex64", float(np.finfo("float32").max) * 1.1, "complex128"),
|
| 238 |
+
("complex128", 1, "complex128"),
|
| 239 |
+
("complex128", float(np.finfo("float32").max) * 1.1, "complex128"),
|
| 240 |
+
# float filled with complex
|
| 241 |
+
("float32", 1 + 1j, "complex64"),
|
| 242 |
+
("float32", float(np.finfo("float32").max) * (1.1 + 1j), "complex128"),
|
| 243 |
+
("float64", 1 + 1j, "complex128"),
|
| 244 |
+
("float64", float(np.finfo("float32").max) * (1.1 + 1j), "complex128"),
|
| 245 |
+
# complex filled with complex
|
| 246 |
+
("complex64", 1 + 1j, "complex64"),
|
| 247 |
+
("complex64", float(np.finfo("float32").max) * (1.1 + 1j), "complex128"),
|
| 248 |
+
("complex128", 1 + 1j, "complex128"),
|
| 249 |
+
("complex128", float(np.finfo("float32").max) * (1.1 + 1j), "complex128"),
|
| 250 |
+
],
|
| 251 |
+
)
|
| 252 |
+
def test_maybe_promote_float_with_float(dtype, fill_value, expected_dtype):
|
| 253 |
+
dtype = np.dtype(dtype)
|
| 254 |
+
expected_dtype = np.dtype(expected_dtype)
|
| 255 |
+
|
| 256 |
+
# output is not a generic float, but corresponds to expected_dtype
|
| 257 |
+
exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0]
|
| 258 |
+
|
| 259 |
+
_check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
|
| 260 |
+
|
| 261 |
+
|
| 262 |
+
def test_maybe_promote_bool_with_any(any_numpy_dtype):
|
| 263 |
+
dtype = np.dtype(bool)
|
| 264 |
+
fill_dtype = np.dtype(any_numpy_dtype)
|
| 265 |
+
|
| 266 |
+
# create array of given dtype; casts "1" to correct dtype
|
| 267 |
+
fill_value = np.array([1], dtype=fill_dtype)[0]
|
| 268 |
+
|
| 269 |
+
# filling bool with anything but bool casts to object
|
| 270 |
+
expected_dtype = np.dtype(object) if fill_dtype != bool else fill_dtype
|
| 271 |
+
exp_val_for_scalar = fill_value
|
| 272 |
+
|
| 273 |
+
_check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
|
| 274 |
+
|
| 275 |
+
|
| 276 |
+
def test_maybe_promote_any_with_bool(any_numpy_dtype):
|
| 277 |
+
dtype = np.dtype(any_numpy_dtype)
|
| 278 |
+
fill_value = True
|
| 279 |
+
|
| 280 |
+
# filling anything but bool with bool casts to object
|
| 281 |
+
expected_dtype = np.dtype(object) if dtype != bool else dtype
|
| 282 |
+
# output is not a generic bool, but corresponds to expected_dtype
|
| 283 |
+
exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0]
|
| 284 |
+
|
| 285 |
+
_check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
|
| 286 |
+
|
| 287 |
+
|
| 288 |
+
def test_maybe_promote_bytes_with_any(bytes_dtype, any_numpy_dtype):
|
| 289 |
+
dtype = np.dtype(bytes_dtype)
|
| 290 |
+
fill_dtype = np.dtype(any_numpy_dtype)
|
| 291 |
+
|
| 292 |
+
# create array of given dtype; casts "1" to correct dtype
|
| 293 |
+
fill_value = np.array([1], dtype=fill_dtype)[0]
|
| 294 |
+
|
| 295 |
+
# we never use bytes dtype internally, always promote to object
|
| 296 |
+
expected_dtype = np.dtype(np.object_)
|
| 297 |
+
exp_val_for_scalar = fill_value
|
| 298 |
+
|
| 299 |
+
_check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
|
| 300 |
+
|
| 301 |
+
|
| 302 |
+
def test_maybe_promote_any_with_bytes(any_numpy_dtype):
|
| 303 |
+
dtype = np.dtype(any_numpy_dtype)
|
| 304 |
+
|
| 305 |
+
# create array of given dtype
|
| 306 |
+
fill_value = b"abc"
|
| 307 |
+
|
| 308 |
+
# we never use bytes dtype internally, always promote to object
|
| 309 |
+
expected_dtype = np.dtype(np.object_)
|
| 310 |
+
# output is not a generic bytes, but corresponds to expected_dtype
|
| 311 |
+
exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0]
|
| 312 |
+
|
| 313 |
+
_check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
|
| 314 |
+
|
| 315 |
+
|
| 316 |
+
def test_maybe_promote_datetime64_with_any(datetime64_dtype, any_numpy_dtype):
|
| 317 |
+
dtype = np.dtype(datetime64_dtype)
|
| 318 |
+
fill_dtype = np.dtype(any_numpy_dtype)
|
| 319 |
+
|
| 320 |
+
# create array of given dtype; casts "1" to correct dtype
|
| 321 |
+
fill_value = np.array([1], dtype=fill_dtype)[0]
|
| 322 |
+
|
| 323 |
+
# filling datetime with anything but datetime casts to object
|
| 324 |
+
if fill_dtype.kind == "M":
|
| 325 |
+
expected_dtype = dtype
|
| 326 |
+
# for datetime dtypes, scalar values get cast to to_datetime64
|
| 327 |
+
exp_val_for_scalar = pd.Timestamp(fill_value).to_datetime64()
|
| 328 |
+
else:
|
| 329 |
+
expected_dtype = np.dtype(object)
|
| 330 |
+
exp_val_for_scalar = fill_value
|
| 331 |
+
|
| 332 |
+
_check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
|
| 333 |
+
|
| 334 |
+
|
| 335 |
+
@pytest.mark.parametrize(
|
| 336 |
+
"fill_value",
|
| 337 |
+
[
|
| 338 |
+
pd.Timestamp("now"),
|
| 339 |
+
np.datetime64("now"),
|
| 340 |
+
datetime.datetime.now(),
|
| 341 |
+
datetime.date.today(),
|
| 342 |
+
],
|
| 343 |
+
ids=["pd.Timestamp", "np.datetime64", "datetime.datetime", "datetime.date"],
|
| 344 |
+
)
|
| 345 |
+
def test_maybe_promote_any_with_datetime64(any_numpy_dtype, fill_value):
|
| 346 |
+
dtype = np.dtype(any_numpy_dtype)
|
| 347 |
+
|
| 348 |
+
# filling datetime with anything but datetime casts to object
|
| 349 |
+
if dtype.kind == "M":
|
| 350 |
+
expected_dtype = dtype
|
| 351 |
+
# for datetime dtypes, scalar values get cast to pd.Timestamp.value
|
| 352 |
+
exp_val_for_scalar = pd.Timestamp(fill_value).to_datetime64()
|
| 353 |
+
else:
|
| 354 |
+
expected_dtype = np.dtype(object)
|
| 355 |
+
exp_val_for_scalar = fill_value
|
| 356 |
+
|
| 357 |
+
if type(fill_value) is datetime.date and dtype.kind == "M":
|
| 358 |
+
# Casting date to dt64 is deprecated, in 2.0 enforced to cast to object
|
| 359 |
+
expected_dtype = np.dtype(object)
|
| 360 |
+
exp_val_for_scalar = fill_value
|
| 361 |
+
|
| 362 |
+
_check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
|
| 363 |
+
|
| 364 |
+
|
| 365 |
+
@pytest.mark.parametrize(
|
| 366 |
+
"fill_value",
|
| 367 |
+
[
|
| 368 |
+
pd.Timestamp(2023, 1, 1),
|
| 369 |
+
np.datetime64("2023-01-01"),
|
| 370 |
+
datetime.datetime(2023, 1, 1),
|
| 371 |
+
datetime.date(2023, 1, 1),
|
| 372 |
+
],
|
| 373 |
+
ids=["pd.Timestamp", "np.datetime64", "datetime.datetime", "datetime.date"],
|
| 374 |
+
)
|
| 375 |
+
def test_maybe_promote_any_numpy_dtype_with_datetimetz(
|
| 376 |
+
any_numpy_dtype, tz_aware_fixture, fill_value
|
| 377 |
+
):
|
| 378 |
+
dtype = np.dtype(any_numpy_dtype)
|
| 379 |
+
fill_dtype = DatetimeTZDtype(tz=tz_aware_fixture)
|
| 380 |
+
|
| 381 |
+
fill_value = pd.Series([fill_value], dtype=fill_dtype)[0]
|
| 382 |
+
|
| 383 |
+
# filling any numpy dtype with datetimetz casts to object
|
| 384 |
+
expected_dtype = np.dtype(object)
|
| 385 |
+
exp_val_for_scalar = fill_value
|
| 386 |
+
|
| 387 |
+
_check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
|
| 388 |
+
|
| 389 |
+
|
| 390 |
+
def test_maybe_promote_timedelta64_with_any(timedelta64_dtype, any_numpy_dtype):
|
| 391 |
+
dtype = np.dtype(timedelta64_dtype)
|
| 392 |
+
fill_dtype = np.dtype(any_numpy_dtype)
|
| 393 |
+
|
| 394 |
+
# create array of given dtype; casts "1" to correct dtype
|
| 395 |
+
fill_value = np.array([1], dtype=fill_dtype)[0]
|
| 396 |
+
|
| 397 |
+
# filling timedelta with anything but timedelta casts to object
|
| 398 |
+
if fill_dtype.kind == "m":
|
| 399 |
+
expected_dtype = dtype
|
| 400 |
+
# for timedelta dtypes, scalar values get cast to pd.Timedelta.value
|
| 401 |
+
exp_val_for_scalar = pd.Timedelta(fill_value).to_timedelta64()
|
| 402 |
+
else:
|
| 403 |
+
expected_dtype = np.dtype(object)
|
| 404 |
+
exp_val_for_scalar = fill_value
|
| 405 |
+
|
| 406 |
+
_check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
|
| 407 |
+
|
| 408 |
+
|
| 409 |
+
@pytest.mark.parametrize(
|
| 410 |
+
"fill_value",
|
| 411 |
+
[pd.Timedelta(days=1), np.timedelta64(24, "h"), datetime.timedelta(1)],
|
| 412 |
+
ids=["pd.Timedelta", "np.timedelta64", "datetime.timedelta"],
|
| 413 |
+
)
|
| 414 |
+
def test_maybe_promote_any_with_timedelta64(any_numpy_dtype, fill_value):
|
| 415 |
+
dtype = np.dtype(any_numpy_dtype)
|
| 416 |
+
|
| 417 |
+
# filling anything but timedelta with timedelta casts to object
|
| 418 |
+
if dtype.kind == "m":
|
| 419 |
+
expected_dtype = dtype
|
| 420 |
+
# for timedelta dtypes, scalar values get cast to pd.Timedelta.value
|
| 421 |
+
exp_val_for_scalar = pd.Timedelta(fill_value).to_timedelta64()
|
| 422 |
+
else:
|
| 423 |
+
expected_dtype = np.dtype(object)
|
| 424 |
+
exp_val_for_scalar = fill_value
|
| 425 |
+
|
| 426 |
+
_check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
|
| 427 |
+
|
| 428 |
+
|
| 429 |
+
def test_maybe_promote_string_with_any(string_dtype, any_numpy_dtype):
|
| 430 |
+
dtype = np.dtype(string_dtype)
|
| 431 |
+
fill_dtype = np.dtype(any_numpy_dtype)
|
| 432 |
+
|
| 433 |
+
# create array of given dtype; casts "1" to correct dtype
|
| 434 |
+
fill_value = np.array([1], dtype=fill_dtype)[0]
|
| 435 |
+
|
| 436 |
+
# filling string with anything casts to object
|
| 437 |
+
expected_dtype = np.dtype(object)
|
| 438 |
+
exp_val_for_scalar = fill_value
|
| 439 |
+
|
| 440 |
+
_check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
|
| 441 |
+
|
| 442 |
+
|
| 443 |
+
def test_maybe_promote_any_with_string(any_numpy_dtype):
|
| 444 |
+
dtype = np.dtype(any_numpy_dtype)
|
| 445 |
+
|
| 446 |
+
# create array of given dtype
|
| 447 |
+
fill_value = "abc"
|
| 448 |
+
|
| 449 |
+
# filling anything with a string casts to object
|
| 450 |
+
expected_dtype = np.dtype(object)
|
| 451 |
+
exp_val_for_scalar = fill_value
|
| 452 |
+
|
| 453 |
+
_check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
|
| 454 |
+
|
| 455 |
+
|
| 456 |
+
def test_maybe_promote_object_with_any(object_dtype, any_numpy_dtype):
|
| 457 |
+
dtype = np.dtype(object_dtype)
|
| 458 |
+
fill_dtype = np.dtype(any_numpy_dtype)
|
| 459 |
+
|
| 460 |
+
# create array of given dtype; casts "1" to correct dtype
|
| 461 |
+
fill_value = np.array([1], dtype=fill_dtype)[0]
|
| 462 |
+
|
| 463 |
+
# filling object with anything stays object
|
| 464 |
+
expected_dtype = np.dtype(object)
|
| 465 |
+
exp_val_for_scalar = fill_value
|
| 466 |
+
|
| 467 |
+
_check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
|
| 468 |
+
|
| 469 |
+
|
| 470 |
+
def test_maybe_promote_any_with_object(any_numpy_dtype):
|
| 471 |
+
dtype = np.dtype(any_numpy_dtype)
|
| 472 |
+
|
| 473 |
+
# create array of object dtype from a scalar value (i.e. passing
|
| 474 |
+
# dtypes.common.is_scalar), which can however not be cast to int/float etc.
|
| 475 |
+
fill_value = pd.DateOffset(1)
|
| 476 |
+
|
| 477 |
+
# filling object with anything stays object
|
| 478 |
+
expected_dtype = np.dtype(object)
|
| 479 |
+
exp_val_for_scalar = fill_value
|
| 480 |
+
|
| 481 |
+
_check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
|
| 482 |
+
|
| 483 |
+
|
| 484 |
+
def test_maybe_promote_any_numpy_dtype_with_na(any_numpy_dtype, nulls_fixture):
|
| 485 |
+
fill_value = nulls_fixture
|
| 486 |
+
dtype = np.dtype(any_numpy_dtype)
|
| 487 |
+
|
| 488 |
+
if isinstance(fill_value, Decimal):
|
| 489 |
+
# Subject to change, but ATM (When Decimal(NAN) is being added to nulls_fixture)
|
| 490 |
+
# this is the existing behavior in maybe_promote,
|
| 491 |
+
# hinges on is_valid_na_for_dtype
|
| 492 |
+
if dtype.kind in "iufc":
|
| 493 |
+
if dtype.kind in "iu":
|
| 494 |
+
expected_dtype = np.dtype(np.float64)
|
| 495 |
+
else:
|
| 496 |
+
expected_dtype = dtype
|
| 497 |
+
exp_val_for_scalar = np.nan
|
| 498 |
+
else:
|
| 499 |
+
expected_dtype = np.dtype(object)
|
| 500 |
+
exp_val_for_scalar = fill_value
|
| 501 |
+
elif dtype.kind in "iu" and fill_value is not NaT:
|
| 502 |
+
# integer + other missing value (np.nan / None) casts to float
|
| 503 |
+
expected_dtype = np.float64
|
| 504 |
+
exp_val_for_scalar = np.nan
|
| 505 |
+
elif dtype == object and fill_value is NaT:
|
| 506 |
+
# inserting into object does not cast the value
|
| 507 |
+
# but *does* cast None to np.nan
|
| 508 |
+
expected_dtype = np.dtype(object)
|
| 509 |
+
exp_val_for_scalar = fill_value
|
| 510 |
+
elif dtype.kind in "mM":
|
| 511 |
+
# datetime / timedelta cast all missing values to dtyped-NaT
|
| 512 |
+
expected_dtype = dtype
|
| 513 |
+
exp_val_for_scalar = dtype.type("NaT", "ns")
|
| 514 |
+
elif fill_value is NaT:
|
| 515 |
+
# NaT upcasts everything that's not datetime/timedelta to object
|
| 516 |
+
expected_dtype = np.dtype(object)
|
| 517 |
+
exp_val_for_scalar = NaT
|
| 518 |
+
elif dtype.kind in "fc":
|
| 519 |
+
# float / complex + missing value (!= NaT) stays the same
|
| 520 |
+
expected_dtype = dtype
|
| 521 |
+
exp_val_for_scalar = np.nan
|
| 522 |
+
else:
|
| 523 |
+
# all other cases cast to object, and use np.nan as missing value
|
| 524 |
+
expected_dtype = np.dtype(object)
|
| 525 |
+
if fill_value is pd.NA:
|
| 526 |
+
exp_val_for_scalar = pd.NA
|
| 527 |
+
else:
|
| 528 |
+
exp_val_for_scalar = np.nan
|
| 529 |
+
|
| 530 |
+
_check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
|
py311/lib/python3.11/site-packages/pandas/tests/extension/base/casting.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
import pandas.util._test_decorators as td
|
| 5 |
+
|
| 6 |
+
import pandas as pd
|
| 7 |
+
import pandas._testing as tm
|
| 8 |
+
from pandas.core.internals.blocks import NumpyBlock
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class BaseCastingTests:
|
| 12 |
+
"""Casting to and from ExtensionDtypes"""
|
| 13 |
+
|
| 14 |
+
def test_astype_object_series(self, all_data):
|
| 15 |
+
ser = pd.Series(all_data, name="A")
|
| 16 |
+
result = ser.astype(object)
|
| 17 |
+
assert result.dtype == np.dtype(object)
|
| 18 |
+
if hasattr(result._mgr, "blocks"):
|
| 19 |
+
blk = result._mgr.blocks[0]
|
| 20 |
+
assert isinstance(blk, NumpyBlock)
|
| 21 |
+
assert blk.is_object
|
| 22 |
+
assert isinstance(result._mgr.array, np.ndarray)
|
| 23 |
+
assert result._mgr.array.dtype == np.dtype(object)
|
| 24 |
+
|
| 25 |
+
def test_astype_object_frame(self, all_data):
|
| 26 |
+
df = pd.DataFrame({"A": all_data})
|
| 27 |
+
|
| 28 |
+
result = df.astype(object)
|
| 29 |
+
if hasattr(result._mgr, "blocks"):
|
| 30 |
+
blk = result._mgr.blocks[0]
|
| 31 |
+
assert isinstance(blk, NumpyBlock), type(blk)
|
| 32 |
+
assert blk.is_object
|
| 33 |
+
assert isinstance(result._mgr.arrays[0], np.ndarray)
|
| 34 |
+
assert result._mgr.arrays[0].dtype == np.dtype(object)
|
| 35 |
+
|
| 36 |
+
# check that we can compare the dtypes
|
| 37 |
+
comp = result.dtypes == df.dtypes
|
| 38 |
+
assert not comp.any()
|
| 39 |
+
|
| 40 |
+
def test_tolist(self, data):
|
| 41 |
+
result = pd.Series(data).tolist()
|
| 42 |
+
expected = list(data)
|
| 43 |
+
assert result == expected
|
| 44 |
+
|
| 45 |
+
def test_astype_str(self, data):
|
| 46 |
+
result = pd.Series(data[:2]).astype(str)
|
| 47 |
+
expected = pd.Series([str(x) for x in data[:2]], dtype=str)
|
| 48 |
+
tm.assert_series_equal(result, expected)
|
| 49 |
+
|
| 50 |
+
@pytest.mark.parametrize(
|
| 51 |
+
"nullable_string_dtype",
|
| 52 |
+
[
|
| 53 |
+
"string[python]",
|
| 54 |
+
pytest.param("string[pyarrow]", marks=td.skip_if_no("pyarrow")),
|
| 55 |
+
],
|
| 56 |
+
)
|
| 57 |
+
def test_astype_string(self, data, nullable_string_dtype):
|
| 58 |
+
# GH-33465, GH#45326 as of 2.0 we decode bytes instead of calling str(obj)
|
| 59 |
+
result = pd.Series(data[:5]).astype(nullable_string_dtype)
|
| 60 |
+
expected = pd.Series(
|
| 61 |
+
[str(x) if not isinstance(x, bytes) else x.decode() for x in data[:5]],
|
| 62 |
+
dtype=nullable_string_dtype,
|
| 63 |
+
)
|
| 64 |
+
tm.assert_series_equal(result, expected)
|
| 65 |
+
|
| 66 |
+
def test_to_numpy(self, data):
|
| 67 |
+
expected = np.asarray(data)
|
| 68 |
+
|
| 69 |
+
result = data.to_numpy()
|
| 70 |
+
tm.assert_equal(result, expected)
|
| 71 |
+
|
| 72 |
+
result = pd.Series(data).to_numpy()
|
| 73 |
+
tm.assert_equal(result, expected)
|
| 74 |
+
|
| 75 |
+
def test_astype_empty_dataframe(self, dtype):
|
| 76 |
+
# https://github.com/pandas-dev/pandas/issues/33113
|
| 77 |
+
df = pd.DataFrame()
|
| 78 |
+
result = df.astype(dtype)
|
| 79 |
+
tm.assert_frame_equal(result, df)
|
| 80 |
+
|
| 81 |
+
@pytest.mark.parametrize("copy", [True, False])
|
| 82 |
+
def test_astype_own_type(self, data, copy):
|
| 83 |
+
# ensure that astype returns the original object for equal dtype and copy=False
|
| 84 |
+
# https://github.com/pandas-dev/pandas/issues/28488
|
| 85 |
+
result = data.astype(data.dtype, copy=copy)
|
| 86 |
+
assert (result is data) is (not copy)
|
| 87 |
+
tm.assert_extension_array_equal(result, data)
|
py311/lib/python3.11/site-packages/pandas/tests/extension/base/constructors.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import pandas._testing as tm
|
| 6 |
+
from pandas.api.extensions import ExtensionArray
|
| 7 |
+
from pandas.core.internals.blocks import EABackedBlock
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class BaseConstructorsTests:
|
| 11 |
+
def test_from_sequence_from_cls(self, data):
|
| 12 |
+
result = type(data)._from_sequence(data, dtype=data.dtype)
|
| 13 |
+
tm.assert_extension_array_equal(result, data)
|
| 14 |
+
|
| 15 |
+
data = data[:0]
|
| 16 |
+
result = type(data)._from_sequence(data, dtype=data.dtype)
|
| 17 |
+
tm.assert_extension_array_equal(result, data)
|
| 18 |
+
|
| 19 |
+
def test_array_from_scalars(self, data):
|
| 20 |
+
scalars = [data[0], data[1], data[2]]
|
| 21 |
+
result = data._from_sequence(scalars, dtype=data.dtype)
|
| 22 |
+
assert isinstance(result, type(data))
|
| 23 |
+
|
| 24 |
+
def test_series_constructor(self, data):
|
| 25 |
+
result = pd.Series(data, copy=False)
|
| 26 |
+
assert result.dtype == data.dtype
|
| 27 |
+
assert len(result) == len(data)
|
| 28 |
+
if hasattr(result._mgr, "blocks"):
|
| 29 |
+
assert isinstance(result._mgr.blocks[0], EABackedBlock)
|
| 30 |
+
assert result._mgr.array is data
|
| 31 |
+
|
| 32 |
+
# Series[EA] is unboxed / boxed correctly
|
| 33 |
+
result2 = pd.Series(result)
|
| 34 |
+
assert result2.dtype == data.dtype
|
| 35 |
+
if hasattr(result._mgr, "blocks"):
|
| 36 |
+
assert isinstance(result2._mgr.blocks[0], EABackedBlock)
|
| 37 |
+
|
| 38 |
+
def test_series_constructor_no_data_with_index(self, dtype, na_value):
|
| 39 |
+
result = pd.Series(index=[1, 2, 3], dtype=dtype)
|
| 40 |
+
expected = pd.Series([na_value] * 3, index=[1, 2, 3], dtype=dtype)
|
| 41 |
+
tm.assert_series_equal(result, expected)
|
| 42 |
+
|
| 43 |
+
# GH 33559 - empty index
|
| 44 |
+
result = pd.Series(index=[], dtype=dtype)
|
| 45 |
+
expected = pd.Series([], index=pd.Index([], dtype="object"), dtype=dtype)
|
| 46 |
+
tm.assert_series_equal(result, expected)
|
| 47 |
+
|
| 48 |
+
def test_series_constructor_scalar_na_with_index(self, dtype, na_value):
|
| 49 |
+
result = pd.Series(na_value, index=[1, 2, 3], dtype=dtype)
|
| 50 |
+
expected = pd.Series([na_value] * 3, index=[1, 2, 3], dtype=dtype)
|
| 51 |
+
tm.assert_series_equal(result, expected)
|
| 52 |
+
|
| 53 |
+
def test_series_constructor_scalar_with_index(self, data, dtype):
|
| 54 |
+
scalar = data[0]
|
| 55 |
+
result = pd.Series(scalar, index=[1, 2, 3], dtype=dtype)
|
| 56 |
+
expected = pd.Series([scalar] * 3, index=[1, 2, 3], dtype=dtype)
|
| 57 |
+
tm.assert_series_equal(result, expected)
|
| 58 |
+
|
| 59 |
+
result = pd.Series(scalar, index=["foo"], dtype=dtype)
|
| 60 |
+
expected = pd.Series([scalar], index=["foo"], dtype=dtype)
|
| 61 |
+
tm.assert_series_equal(result, expected)
|
| 62 |
+
|
| 63 |
+
@pytest.mark.parametrize("from_series", [True, False])
|
| 64 |
+
def test_dataframe_constructor_from_dict(self, data, from_series):
|
| 65 |
+
if from_series:
|
| 66 |
+
data = pd.Series(data)
|
| 67 |
+
result = pd.DataFrame({"A": data})
|
| 68 |
+
assert result.dtypes["A"] == data.dtype
|
| 69 |
+
assert result.shape == (len(data), 1)
|
| 70 |
+
if hasattr(result._mgr, "blocks"):
|
| 71 |
+
assert isinstance(result._mgr.blocks[0], EABackedBlock)
|
| 72 |
+
assert isinstance(result._mgr.arrays[0], ExtensionArray)
|
| 73 |
+
|
| 74 |
+
def test_dataframe_from_series(self, data):
|
| 75 |
+
result = pd.DataFrame(pd.Series(data))
|
| 76 |
+
assert result.dtypes[0] == data.dtype
|
| 77 |
+
assert result.shape == (len(data), 1)
|
| 78 |
+
if hasattr(result._mgr, "blocks"):
|
| 79 |
+
assert isinstance(result._mgr.blocks[0], EABackedBlock)
|
| 80 |
+
assert isinstance(result._mgr.arrays[0], ExtensionArray)
|
| 81 |
+
|
| 82 |
+
def test_series_given_mismatched_index_raises(self, data):
|
| 83 |
+
msg = r"Length of values \(3\) does not match length of index \(5\)"
|
| 84 |
+
with pytest.raises(ValueError, match=msg):
|
| 85 |
+
pd.Series(data[:3], index=[0, 1, 2, 3, 4])
|
| 86 |
+
|
| 87 |
+
def test_from_dtype(self, data):
|
| 88 |
+
# construct from our dtype & string dtype
|
| 89 |
+
dtype = data.dtype
|
| 90 |
+
|
| 91 |
+
expected = pd.Series(data)
|
| 92 |
+
result = pd.Series(list(data), dtype=dtype)
|
| 93 |
+
tm.assert_series_equal(result, expected)
|
| 94 |
+
|
| 95 |
+
result = pd.Series(list(data), dtype=str(dtype))
|
| 96 |
+
tm.assert_series_equal(result, expected)
|
| 97 |
+
|
| 98 |
+
# gh-30280
|
| 99 |
+
|
| 100 |
+
expected = pd.DataFrame(data).astype(dtype)
|
| 101 |
+
result = pd.DataFrame(list(data), dtype=dtype)
|
| 102 |
+
tm.assert_frame_equal(result, expected)
|
| 103 |
+
|
| 104 |
+
result = pd.DataFrame(list(data), dtype=str(dtype))
|
| 105 |
+
tm.assert_frame_equal(result, expected)
|
| 106 |
+
|
| 107 |
+
def test_pandas_array(self, data):
|
| 108 |
+
# pd.array(extension_array) should be idempotent...
|
| 109 |
+
result = pd.array(data)
|
| 110 |
+
tm.assert_extension_array_equal(result, data)
|
| 111 |
+
|
| 112 |
+
def test_pandas_array_dtype(self, data):
|
| 113 |
+
# ... but specifying dtype will override idempotency
|
| 114 |
+
result = pd.array(data, dtype=np.dtype(object))
|
| 115 |
+
expected = pd.arrays.NumpyExtensionArray(np.asarray(data, dtype=object))
|
| 116 |
+
tm.assert_equal(result, expected)
|
| 117 |
+
|
| 118 |
+
def test_construct_empty_dataframe(self, dtype):
|
| 119 |
+
# GH 33623
|
| 120 |
+
result = pd.DataFrame(columns=["a"], dtype=dtype)
|
| 121 |
+
expected = pd.DataFrame(
|
| 122 |
+
{"a": pd.array([], dtype=dtype)}, index=pd.RangeIndex(0)
|
| 123 |
+
)
|
| 124 |
+
tm.assert_frame_equal(result, expected)
|
| 125 |
+
|
| 126 |
+
def test_empty(self, dtype):
|
| 127 |
+
cls = dtype.construct_array_type()
|
| 128 |
+
result = cls._empty((4,), dtype=dtype)
|
| 129 |
+
assert isinstance(result, cls)
|
| 130 |
+
assert result.dtype == dtype
|
| 131 |
+
assert result.shape == (4,)
|
| 132 |
+
|
| 133 |
+
# GH#19600 method on ExtensionDtype
|
| 134 |
+
result2 = dtype.empty((4,))
|
| 135 |
+
assert isinstance(result2, cls)
|
| 136 |
+
assert result2.dtype == dtype
|
| 137 |
+
assert result2.shape == (4,)
|
| 138 |
+
|
| 139 |
+
result2 = dtype.empty(4)
|
| 140 |
+
assert isinstance(result2, cls)
|
| 141 |
+
assert result2.dtype == dtype
|
| 142 |
+
assert result2.shape == (4,)
|
py311/lib/python3.11/site-packages/pandas/tests/extension/base/dim2.py
ADDED
|
@@ -0,0 +1,345 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Tests for 2D compatibility.
|
| 3 |
+
"""
|
| 4 |
+
import numpy as np
|
| 5 |
+
import pytest
|
| 6 |
+
|
| 7 |
+
from pandas._libs.missing import is_matching_na
|
| 8 |
+
|
| 9 |
+
from pandas.core.dtypes.common import (
|
| 10 |
+
is_bool_dtype,
|
| 11 |
+
is_integer_dtype,
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
import pandas as pd
|
| 15 |
+
import pandas._testing as tm
|
| 16 |
+
from pandas.core.arrays.integer import NUMPY_INT_TO_DTYPE
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class Dim2CompatTests:
|
| 20 |
+
# Note: these are ONLY for ExtensionArray subclasses that support 2D arrays.
|
| 21 |
+
# i.e. not for pyarrow-backed EAs.
|
| 22 |
+
|
| 23 |
+
@pytest.fixture(autouse=True)
|
| 24 |
+
def skip_if_doesnt_support_2d(self, dtype, request):
|
| 25 |
+
if not dtype._supports_2d:
|
| 26 |
+
node = request.node
|
| 27 |
+
# In cases where we are mixed in to ExtensionTests, we only want to
|
| 28 |
+
# skip tests that are defined in Dim2CompatTests
|
| 29 |
+
test_func = node._obj
|
| 30 |
+
if test_func.__qualname__.startswith("Dim2CompatTests"):
|
| 31 |
+
# TODO: is there a less hacky way of checking this?
|
| 32 |
+
pytest.skip(f"{dtype} does not support 2D.")
|
| 33 |
+
|
| 34 |
+
def test_transpose(self, data):
|
| 35 |
+
arr2d = data.repeat(2).reshape(-1, 2)
|
| 36 |
+
shape = arr2d.shape
|
| 37 |
+
assert shape[0] != shape[-1] # otherwise the rest of the test is useless
|
| 38 |
+
|
| 39 |
+
assert arr2d.T.shape == shape[::-1]
|
| 40 |
+
|
| 41 |
+
def test_frame_from_2d_array(self, data):
|
| 42 |
+
arr2d = data.repeat(2).reshape(-1, 2)
|
| 43 |
+
|
| 44 |
+
df = pd.DataFrame(arr2d)
|
| 45 |
+
expected = pd.DataFrame({0: arr2d[:, 0], 1: arr2d[:, 1]})
|
| 46 |
+
tm.assert_frame_equal(df, expected)
|
| 47 |
+
|
| 48 |
+
def test_swapaxes(self, data):
|
| 49 |
+
arr2d = data.repeat(2).reshape(-1, 2)
|
| 50 |
+
|
| 51 |
+
result = arr2d.swapaxes(0, 1)
|
| 52 |
+
expected = arr2d.T
|
| 53 |
+
tm.assert_extension_array_equal(result, expected)
|
| 54 |
+
|
| 55 |
+
def test_delete_2d(self, data):
|
| 56 |
+
arr2d = data.repeat(3).reshape(-1, 3)
|
| 57 |
+
|
| 58 |
+
# axis = 0
|
| 59 |
+
result = arr2d.delete(1, axis=0)
|
| 60 |
+
expected = data.delete(1).repeat(3).reshape(-1, 3)
|
| 61 |
+
tm.assert_extension_array_equal(result, expected)
|
| 62 |
+
|
| 63 |
+
# axis = 1
|
| 64 |
+
result = arr2d.delete(1, axis=1)
|
| 65 |
+
expected = data.repeat(2).reshape(-1, 2)
|
| 66 |
+
tm.assert_extension_array_equal(result, expected)
|
| 67 |
+
|
| 68 |
+
def test_take_2d(self, data):
|
| 69 |
+
arr2d = data.reshape(-1, 1)
|
| 70 |
+
|
| 71 |
+
result = arr2d.take([0, 0, -1], axis=0)
|
| 72 |
+
|
| 73 |
+
expected = data.take([0, 0, -1]).reshape(-1, 1)
|
| 74 |
+
tm.assert_extension_array_equal(result, expected)
|
| 75 |
+
|
| 76 |
+
def test_repr_2d(self, data):
|
| 77 |
+
# this could fail in a corner case where an element contained the name
|
| 78 |
+
res = repr(data.reshape(1, -1))
|
| 79 |
+
assert res.count(f"<{type(data).__name__}") == 1
|
| 80 |
+
|
| 81 |
+
res = repr(data.reshape(-1, 1))
|
| 82 |
+
assert res.count(f"<{type(data).__name__}") == 1
|
| 83 |
+
|
| 84 |
+
def test_reshape(self, data):
|
| 85 |
+
arr2d = data.reshape(-1, 1)
|
| 86 |
+
assert arr2d.shape == (data.size, 1)
|
| 87 |
+
assert len(arr2d) == len(data)
|
| 88 |
+
|
| 89 |
+
arr2d = data.reshape((-1, 1))
|
| 90 |
+
assert arr2d.shape == (data.size, 1)
|
| 91 |
+
assert len(arr2d) == len(data)
|
| 92 |
+
|
| 93 |
+
with pytest.raises(ValueError):
|
| 94 |
+
data.reshape((data.size, 2))
|
| 95 |
+
with pytest.raises(ValueError):
|
| 96 |
+
data.reshape(data.size, 2)
|
| 97 |
+
|
| 98 |
+
def test_getitem_2d(self, data):
|
| 99 |
+
arr2d = data.reshape(1, -1)
|
| 100 |
+
|
| 101 |
+
result = arr2d[0]
|
| 102 |
+
tm.assert_extension_array_equal(result, data)
|
| 103 |
+
|
| 104 |
+
with pytest.raises(IndexError):
|
| 105 |
+
arr2d[1]
|
| 106 |
+
|
| 107 |
+
with pytest.raises(IndexError):
|
| 108 |
+
arr2d[-2]
|
| 109 |
+
|
| 110 |
+
result = arr2d[:]
|
| 111 |
+
tm.assert_extension_array_equal(result, arr2d)
|
| 112 |
+
|
| 113 |
+
result = arr2d[:, :]
|
| 114 |
+
tm.assert_extension_array_equal(result, arr2d)
|
| 115 |
+
|
| 116 |
+
result = arr2d[:, 0]
|
| 117 |
+
expected = data[[0]]
|
| 118 |
+
tm.assert_extension_array_equal(result, expected)
|
| 119 |
+
|
| 120 |
+
# dimension-expanding getitem on 1D
|
| 121 |
+
result = data[:, np.newaxis]
|
| 122 |
+
tm.assert_extension_array_equal(result, arr2d.T)
|
| 123 |
+
|
| 124 |
+
def test_iter_2d(self, data):
|
| 125 |
+
arr2d = data.reshape(1, -1)
|
| 126 |
+
|
| 127 |
+
objs = list(iter(arr2d))
|
| 128 |
+
assert len(objs) == arr2d.shape[0]
|
| 129 |
+
|
| 130 |
+
for obj in objs:
|
| 131 |
+
assert isinstance(obj, type(data))
|
| 132 |
+
assert obj.dtype == data.dtype
|
| 133 |
+
assert obj.ndim == 1
|
| 134 |
+
assert len(obj) == arr2d.shape[1]
|
| 135 |
+
|
| 136 |
+
def test_tolist_2d(self, data):
|
| 137 |
+
arr2d = data.reshape(1, -1)
|
| 138 |
+
|
| 139 |
+
result = arr2d.tolist()
|
| 140 |
+
expected = [data.tolist()]
|
| 141 |
+
|
| 142 |
+
assert isinstance(result, list)
|
| 143 |
+
assert all(isinstance(x, list) for x in result)
|
| 144 |
+
|
| 145 |
+
assert result == expected
|
| 146 |
+
|
| 147 |
+
def test_concat_2d(self, data):
|
| 148 |
+
left = type(data)._concat_same_type([data, data]).reshape(-1, 2)
|
| 149 |
+
right = left.copy()
|
| 150 |
+
|
| 151 |
+
# axis=0
|
| 152 |
+
result = left._concat_same_type([left, right], axis=0)
|
| 153 |
+
expected = data._concat_same_type([data] * 4).reshape(-1, 2)
|
| 154 |
+
tm.assert_extension_array_equal(result, expected)
|
| 155 |
+
|
| 156 |
+
# axis=1
|
| 157 |
+
result = left._concat_same_type([left, right], axis=1)
|
| 158 |
+
assert result.shape == (len(data), 4)
|
| 159 |
+
tm.assert_extension_array_equal(result[:, :2], left)
|
| 160 |
+
tm.assert_extension_array_equal(result[:, 2:], right)
|
| 161 |
+
|
| 162 |
+
# axis > 1 -> invalid
|
| 163 |
+
msg = "axis 2 is out of bounds for array of dimension 2"
|
| 164 |
+
with pytest.raises(ValueError, match=msg):
|
| 165 |
+
left._concat_same_type([left, right], axis=2)
|
| 166 |
+
|
| 167 |
+
@pytest.mark.parametrize("method", ["backfill", "pad"])
|
| 168 |
+
def test_fillna_2d_method(self, data_missing, method):
|
| 169 |
+
# pad_or_backfill is always along axis=0
|
| 170 |
+
arr = data_missing.repeat(2).reshape(2, 2)
|
| 171 |
+
assert arr[0].isna().all()
|
| 172 |
+
assert not arr[1].isna().any()
|
| 173 |
+
|
| 174 |
+
result = arr._pad_or_backfill(method=method, limit=None)
|
| 175 |
+
|
| 176 |
+
expected = data_missing._pad_or_backfill(method=method).repeat(2).reshape(2, 2)
|
| 177 |
+
tm.assert_extension_array_equal(result, expected)
|
| 178 |
+
|
| 179 |
+
# Reverse so that backfill is not a no-op.
|
| 180 |
+
arr2 = arr[::-1]
|
| 181 |
+
assert not arr2[0].isna().any()
|
| 182 |
+
assert arr2[1].isna().all()
|
| 183 |
+
|
| 184 |
+
result2 = arr2._pad_or_backfill(method=method, limit=None)
|
| 185 |
+
|
| 186 |
+
expected2 = (
|
| 187 |
+
data_missing[::-1]._pad_or_backfill(method=method).repeat(2).reshape(2, 2)
|
| 188 |
+
)
|
| 189 |
+
tm.assert_extension_array_equal(result2, expected2)
|
| 190 |
+
|
| 191 |
+
@pytest.mark.parametrize("method", ["mean", "median", "var", "std", "sum", "prod"])
|
| 192 |
+
def test_reductions_2d_axis_none(self, data, method):
|
| 193 |
+
arr2d = data.reshape(1, -1)
|
| 194 |
+
|
| 195 |
+
err_expected = None
|
| 196 |
+
err_result = None
|
| 197 |
+
try:
|
| 198 |
+
expected = getattr(data, method)()
|
| 199 |
+
except Exception as err:
|
| 200 |
+
# if the 1D reduction is invalid, the 2D reduction should be as well
|
| 201 |
+
err_expected = err
|
| 202 |
+
try:
|
| 203 |
+
result = getattr(arr2d, method)(axis=None)
|
| 204 |
+
except Exception as err2:
|
| 205 |
+
err_result = err2
|
| 206 |
+
|
| 207 |
+
else:
|
| 208 |
+
result = getattr(arr2d, method)(axis=None)
|
| 209 |
+
|
| 210 |
+
if err_result is not None or err_expected is not None:
|
| 211 |
+
assert type(err_result) == type(err_expected)
|
| 212 |
+
return
|
| 213 |
+
|
| 214 |
+
assert is_matching_na(result, expected) or result == expected
|
| 215 |
+
|
| 216 |
+
@pytest.mark.parametrize("method", ["mean", "median", "var", "std", "sum", "prod"])
|
| 217 |
+
@pytest.mark.parametrize("min_count", [0, 1])
|
| 218 |
+
def test_reductions_2d_axis0(self, data, method, min_count):
|
| 219 |
+
if min_count == 1 and method not in ["sum", "prod"]:
|
| 220 |
+
pytest.skip(f"min_count not relevant for {method}")
|
| 221 |
+
|
| 222 |
+
arr2d = data.reshape(1, -1)
|
| 223 |
+
|
| 224 |
+
kwargs = {}
|
| 225 |
+
if method in ["std", "var"]:
|
| 226 |
+
# pass ddof=0 so we get all-zero std instead of all-NA std
|
| 227 |
+
kwargs["ddof"] = 0
|
| 228 |
+
elif method in ["prod", "sum"]:
|
| 229 |
+
kwargs["min_count"] = min_count
|
| 230 |
+
|
| 231 |
+
try:
|
| 232 |
+
result = getattr(arr2d, method)(axis=0, **kwargs)
|
| 233 |
+
except Exception as err:
|
| 234 |
+
try:
|
| 235 |
+
getattr(data, method)()
|
| 236 |
+
except Exception as err2:
|
| 237 |
+
assert type(err) == type(err2)
|
| 238 |
+
return
|
| 239 |
+
else:
|
| 240 |
+
raise AssertionError("Both reductions should raise or neither")
|
| 241 |
+
|
| 242 |
+
def get_reduction_result_dtype(dtype):
|
| 243 |
+
# windows and 32bit builds will in some cases have int32/uint32
|
| 244 |
+
# where other builds will have int64/uint64.
|
| 245 |
+
if dtype.itemsize == 8:
|
| 246 |
+
return dtype
|
| 247 |
+
elif dtype.kind in "ib":
|
| 248 |
+
return NUMPY_INT_TO_DTYPE[np.dtype(int)]
|
| 249 |
+
else:
|
| 250 |
+
# i.e. dtype.kind == "u"
|
| 251 |
+
return NUMPY_INT_TO_DTYPE[np.dtype("uint")]
|
| 252 |
+
|
| 253 |
+
if method in ["sum", "prod"]:
|
| 254 |
+
# std and var are not dtype-preserving
|
| 255 |
+
expected = data
|
| 256 |
+
if data.dtype.kind in "iub":
|
| 257 |
+
dtype = get_reduction_result_dtype(data.dtype)
|
| 258 |
+
expected = data.astype(dtype)
|
| 259 |
+
assert dtype == expected.dtype
|
| 260 |
+
|
| 261 |
+
if min_count == 0:
|
| 262 |
+
fill_value = 1 if method == "prod" else 0
|
| 263 |
+
expected = expected.fillna(fill_value)
|
| 264 |
+
|
| 265 |
+
tm.assert_extension_array_equal(result, expected)
|
| 266 |
+
elif method == "median":
|
| 267 |
+
# std and var are not dtype-preserving
|
| 268 |
+
expected = data
|
| 269 |
+
tm.assert_extension_array_equal(result, expected)
|
| 270 |
+
elif method in ["mean", "std", "var"]:
|
| 271 |
+
if is_integer_dtype(data) or is_bool_dtype(data):
|
| 272 |
+
data = data.astype("Float64")
|
| 273 |
+
if method == "mean":
|
| 274 |
+
tm.assert_extension_array_equal(result, data)
|
| 275 |
+
else:
|
| 276 |
+
tm.assert_extension_array_equal(result, data - data)
|
| 277 |
+
|
| 278 |
+
@pytest.mark.parametrize("method", ["mean", "median", "var", "std", "sum", "prod"])
|
| 279 |
+
def test_reductions_2d_axis1(self, data, method):
|
| 280 |
+
arr2d = data.reshape(1, -1)
|
| 281 |
+
|
| 282 |
+
try:
|
| 283 |
+
result = getattr(arr2d, method)(axis=1)
|
| 284 |
+
except Exception as err:
|
| 285 |
+
try:
|
| 286 |
+
getattr(data, method)()
|
| 287 |
+
except Exception as err2:
|
| 288 |
+
assert type(err) == type(err2)
|
| 289 |
+
return
|
| 290 |
+
else:
|
| 291 |
+
raise AssertionError("Both reductions should raise or neither")
|
| 292 |
+
|
| 293 |
+
# not necessarily type/dtype-preserving, so weaker assertions
|
| 294 |
+
assert result.shape == (1,)
|
| 295 |
+
expected_scalar = getattr(data, method)()
|
| 296 |
+
res = result[0]
|
| 297 |
+
assert is_matching_na(res, expected_scalar) or res == expected_scalar
|
| 298 |
+
|
| 299 |
+
|
| 300 |
+
class NDArrayBacked2DTests(Dim2CompatTests):
|
| 301 |
+
# More specific tests for NDArrayBackedExtensionArray subclasses
|
| 302 |
+
|
| 303 |
+
def test_copy_order(self, data):
|
| 304 |
+
# We should be matching numpy semantics for the "order" keyword in 'copy'
|
| 305 |
+
arr2d = data.repeat(2).reshape(-1, 2)
|
| 306 |
+
assert arr2d._ndarray.flags["C_CONTIGUOUS"]
|
| 307 |
+
|
| 308 |
+
res = arr2d.copy()
|
| 309 |
+
assert res._ndarray.flags["C_CONTIGUOUS"]
|
| 310 |
+
|
| 311 |
+
res = arr2d[::2, ::2].copy()
|
| 312 |
+
assert res._ndarray.flags["C_CONTIGUOUS"]
|
| 313 |
+
|
| 314 |
+
res = arr2d.copy("F")
|
| 315 |
+
assert not res._ndarray.flags["C_CONTIGUOUS"]
|
| 316 |
+
assert res._ndarray.flags["F_CONTIGUOUS"]
|
| 317 |
+
|
| 318 |
+
res = arr2d.copy("K")
|
| 319 |
+
assert res._ndarray.flags["C_CONTIGUOUS"]
|
| 320 |
+
|
| 321 |
+
res = arr2d.T.copy("K")
|
| 322 |
+
assert not res._ndarray.flags["C_CONTIGUOUS"]
|
| 323 |
+
assert res._ndarray.flags["F_CONTIGUOUS"]
|
| 324 |
+
|
| 325 |
+
# order not accepted by numpy
|
| 326 |
+
msg = r"order must be one of 'C', 'F', 'A', or 'K' \(got 'Q'\)"
|
| 327 |
+
with pytest.raises(ValueError, match=msg):
|
| 328 |
+
arr2d.copy("Q")
|
| 329 |
+
|
| 330 |
+
# neither contiguity
|
| 331 |
+
arr_nc = arr2d[::2]
|
| 332 |
+
assert not arr_nc._ndarray.flags["C_CONTIGUOUS"]
|
| 333 |
+
assert not arr_nc._ndarray.flags["F_CONTIGUOUS"]
|
| 334 |
+
|
| 335 |
+
assert arr_nc.copy()._ndarray.flags["C_CONTIGUOUS"]
|
| 336 |
+
assert not arr_nc.copy()._ndarray.flags["F_CONTIGUOUS"]
|
| 337 |
+
|
| 338 |
+
assert arr_nc.copy("C")._ndarray.flags["C_CONTIGUOUS"]
|
| 339 |
+
assert not arr_nc.copy("C")._ndarray.flags["F_CONTIGUOUS"]
|
| 340 |
+
|
| 341 |
+
assert not arr_nc.copy("F")._ndarray.flags["C_CONTIGUOUS"]
|
| 342 |
+
assert arr_nc.copy("F")._ndarray.flags["F_CONTIGUOUS"]
|
| 343 |
+
|
| 344 |
+
assert arr_nc.copy("K")._ndarray.flags["C_CONTIGUOUS"]
|
| 345 |
+
assert not arr_nc.copy("K")._ndarray.flags["F_CONTIGUOUS"]
|
py311/lib/python3.11/site-packages/pandas/tests/extension/base/setitem.py
ADDED
|
@@ -0,0 +1,451 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import pandas._testing as tm
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class BaseSetitemTests:
|
| 9 |
+
@pytest.fixture(
|
| 10 |
+
params=[
|
| 11 |
+
lambda x: x.index,
|
| 12 |
+
lambda x: list(x.index),
|
| 13 |
+
lambda x: slice(None),
|
| 14 |
+
lambda x: slice(0, len(x)),
|
| 15 |
+
lambda x: range(len(x)),
|
| 16 |
+
lambda x: list(range(len(x))),
|
| 17 |
+
lambda x: np.ones(len(x), dtype=bool),
|
| 18 |
+
],
|
| 19 |
+
ids=[
|
| 20 |
+
"index",
|
| 21 |
+
"list[index]",
|
| 22 |
+
"null_slice",
|
| 23 |
+
"full_slice",
|
| 24 |
+
"range",
|
| 25 |
+
"list(range)",
|
| 26 |
+
"mask",
|
| 27 |
+
],
|
| 28 |
+
)
|
| 29 |
+
def full_indexer(self, request):
|
| 30 |
+
"""
|
| 31 |
+
Fixture for an indexer to pass to obj.loc to get/set the full length of the
|
| 32 |
+
object.
|
| 33 |
+
|
| 34 |
+
In some cases, assumes that obj.index is the default RangeIndex.
|
| 35 |
+
"""
|
| 36 |
+
return request.param
|
| 37 |
+
|
| 38 |
+
@pytest.fixture(autouse=True)
|
| 39 |
+
def skip_if_immutable(self, dtype, request):
|
| 40 |
+
if dtype._is_immutable:
|
| 41 |
+
node = request.node
|
| 42 |
+
if node.name.split("[")[0] == "test_is_immutable":
|
| 43 |
+
# This fixture is auto-used, but we want to not-skip
|
| 44 |
+
# test_is_immutable.
|
| 45 |
+
return
|
| 46 |
+
|
| 47 |
+
# When BaseSetitemTests is mixed into ExtensionTests, we only
|
| 48 |
+
# want this fixture to operate on the tests defined in this
|
| 49 |
+
# class/file.
|
| 50 |
+
defined_in = node.function.__qualname__.split(".")[0]
|
| 51 |
+
if defined_in == "BaseSetitemTests":
|
| 52 |
+
pytest.skip("__setitem__ test not applicable with immutable dtype")
|
| 53 |
+
|
| 54 |
+
def test_is_immutable(self, data):
|
| 55 |
+
if data.dtype._is_immutable:
|
| 56 |
+
with pytest.raises(TypeError):
|
| 57 |
+
data[0] = data[0]
|
| 58 |
+
else:
|
| 59 |
+
data[0] = data[1]
|
| 60 |
+
assert data[0] == data[1]
|
| 61 |
+
|
| 62 |
+
def test_setitem_scalar_series(self, data, box_in_series):
|
| 63 |
+
if box_in_series:
|
| 64 |
+
data = pd.Series(data)
|
| 65 |
+
data[0] = data[1]
|
| 66 |
+
assert data[0] == data[1]
|
| 67 |
+
|
| 68 |
+
def test_setitem_sequence(self, data, box_in_series):
|
| 69 |
+
if box_in_series:
|
| 70 |
+
data = pd.Series(data)
|
| 71 |
+
original = data.copy()
|
| 72 |
+
|
| 73 |
+
data[[0, 1]] = [data[1], data[0]]
|
| 74 |
+
assert data[0] == original[1]
|
| 75 |
+
assert data[1] == original[0]
|
| 76 |
+
|
| 77 |
+
def test_setitem_sequence_mismatched_length_raises(self, data, as_array):
|
| 78 |
+
ser = pd.Series(data)
|
| 79 |
+
original = ser.copy()
|
| 80 |
+
value = [data[0]]
|
| 81 |
+
if as_array:
|
| 82 |
+
value = data._from_sequence(value, dtype=data.dtype)
|
| 83 |
+
|
| 84 |
+
xpr = "cannot set using a {} indexer with a different length"
|
| 85 |
+
with pytest.raises(ValueError, match=xpr.format("list-like")):
|
| 86 |
+
ser[[0, 1]] = value
|
| 87 |
+
# Ensure no modifications made before the exception
|
| 88 |
+
tm.assert_series_equal(ser, original)
|
| 89 |
+
|
| 90 |
+
with pytest.raises(ValueError, match=xpr.format("slice")):
|
| 91 |
+
ser[slice(3)] = value
|
| 92 |
+
tm.assert_series_equal(ser, original)
|
| 93 |
+
|
| 94 |
+
def test_setitem_empty_indexer(self, data, box_in_series):
|
| 95 |
+
if box_in_series:
|
| 96 |
+
data = pd.Series(data)
|
| 97 |
+
original = data.copy()
|
| 98 |
+
data[np.array([], dtype=int)] = []
|
| 99 |
+
tm.assert_equal(data, original)
|
| 100 |
+
|
| 101 |
+
def test_setitem_sequence_broadcasts(self, data, box_in_series):
|
| 102 |
+
if box_in_series:
|
| 103 |
+
data = pd.Series(data)
|
| 104 |
+
data[[0, 1]] = data[2]
|
| 105 |
+
assert data[0] == data[2]
|
| 106 |
+
assert data[1] == data[2]
|
| 107 |
+
|
| 108 |
+
@pytest.mark.parametrize("setter", ["loc", "iloc"])
|
| 109 |
+
def test_setitem_scalar(self, data, setter):
|
| 110 |
+
arr = pd.Series(data)
|
| 111 |
+
setter = getattr(arr, setter)
|
| 112 |
+
setter[0] = data[1]
|
| 113 |
+
assert arr[0] == data[1]
|
| 114 |
+
|
| 115 |
+
def test_setitem_loc_scalar_mixed(self, data):
|
| 116 |
+
df = pd.DataFrame({"A": np.arange(len(data)), "B": data})
|
| 117 |
+
df.loc[0, "B"] = data[1]
|
| 118 |
+
assert df.loc[0, "B"] == data[1]
|
| 119 |
+
|
| 120 |
+
def test_setitem_loc_scalar_single(self, data):
|
| 121 |
+
df = pd.DataFrame({"B": data})
|
| 122 |
+
df.loc[10, "B"] = data[1]
|
| 123 |
+
assert df.loc[10, "B"] == data[1]
|
| 124 |
+
|
| 125 |
+
def test_setitem_loc_scalar_multiple_homogoneous(self, data):
|
| 126 |
+
df = pd.DataFrame({"A": data, "B": data})
|
| 127 |
+
df.loc[10, "B"] = data[1]
|
| 128 |
+
assert df.loc[10, "B"] == data[1]
|
| 129 |
+
|
| 130 |
+
def test_setitem_iloc_scalar_mixed(self, data):
|
| 131 |
+
df = pd.DataFrame({"A": np.arange(len(data)), "B": data})
|
| 132 |
+
df.iloc[0, 1] = data[1]
|
| 133 |
+
assert df.loc[0, "B"] == data[1]
|
| 134 |
+
|
| 135 |
+
def test_setitem_iloc_scalar_single(self, data):
|
| 136 |
+
df = pd.DataFrame({"B": data})
|
| 137 |
+
df.iloc[10, 0] = data[1]
|
| 138 |
+
assert df.loc[10, "B"] == data[1]
|
| 139 |
+
|
| 140 |
+
def test_setitem_iloc_scalar_multiple_homogoneous(self, data):
|
| 141 |
+
df = pd.DataFrame({"A": data, "B": data})
|
| 142 |
+
df.iloc[10, 1] = data[1]
|
| 143 |
+
assert df.loc[10, "B"] == data[1]
|
| 144 |
+
|
| 145 |
+
@pytest.mark.parametrize(
|
| 146 |
+
"mask",
|
| 147 |
+
[
|
| 148 |
+
np.array([True, True, True, False, False]),
|
| 149 |
+
pd.array([True, True, True, False, False], dtype="boolean"),
|
| 150 |
+
pd.array([True, True, True, pd.NA, pd.NA], dtype="boolean"),
|
| 151 |
+
],
|
| 152 |
+
ids=["numpy-array", "boolean-array", "boolean-array-na"],
|
| 153 |
+
)
|
| 154 |
+
def test_setitem_mask(self, data, mask, box_in_series):
|
| 155 |
+
arr = data[:5].copy()
|
| 156 |
+
expected = arr.take([0, 0, 0, 3, 4])
|
| 157 |
+
if box_in_series:
|
| 158 |
+
arr = pd.Series(arr)
|
| 159 |
+
expected = pd.Series(expected)
|
| 160 |
+
arr[mask] = data[0]
|
| 161 |
+
tm.assert_equal(expected, arr)
|
| 162 |
+
|
| 163 |
+
def test_setitem_mask_raises(self, data, box_in_series):
|
| 164 |
+
# wrong length
|
| 165 |
+
mask = np.array([True, False])
|
| 166 |
+
|
| 167 |
+
if box_in_series:
|
| 168 |
+
data = pd.Series(data)
|
| 169 |
+
|
| 170 |
+
with pytest.raises(IndexError, match="wrong length"):
|
| 171 |
+
data[mask] = data[0]
|
| 172 |
+
|
| 173 |
+
mask = pd.array(mask, dtype="boolean")
|
| 174 |
+
with pytest.raises(IndexError, match="wrong length"):
|
| 175 |
+
data[mask] = data[0]
|
| 176 |
+
|
| 177 |
+
def test_setitem_mask_boolean_array_with_na(self, data, box_in_series):
|
| 178 |
+
mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean")
|
| 179 |
+
mask[:3] = True
|
| 180 |
+
mask[3:5] = pd.NA
|
| 181 |
+
|
| 182 |
+
if box_in_series:
|
| 183 |
+
data = pd.Series(data)
|
| 184 |
+
|
| 185 |
+
data[mask] = data[0]
|
| 186 |
+
|
| 187 |
+
assert (data[:3] == data[0]).all()
|
| 188 |
+
|
| 189 |
+
@pytest.mark.parametrize(
|
| 190 |
+
"idx",
|
| 191 |
+
[[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])],
|
| 192 |
+
ids=["list", "integer-array", "numpy-array"],
|
| 193 |
+
)
|
| 194 |
+
def test_setitem_integer_array(self, data, idx, box_in_series):
|
| 195 |
+
arr = data[:5].copy()
|
| 196 |
+
expected = data.take([0, 0, 0, 3, 4])
|
| 197 |
+
|
| 198 |
+
if box_in_series:
|
| 199 |
+
arr = pd.Series(arr)
|
| 200 |
+
expected = pd.Series(expected)
|
| 201 |
+
|
| 202 |
+
arr[idx] = arr[0]
|
| 203 |
+
tm.assert_equal(arr, expected)
|
| 204 |
+
|
| 205 |
+
@pytest.mark.parametrize(
|
| 206 |
+
"idx, box_in_series",
|
| 207 |
+
[
|
| 208 |
+
([0, 1, 2, pd.NA], False),
|
| 209 |
+
pytest.param(
|
| 210 |
+
[0, 1, 2, pd.NA], True, marks=pytest.mark.xfail(reason="GH-31948")
|
| 211 |
+
),
|
| 212 |
+
(pd.array([0, 1, 2, pd.NA], dtype="Int64"), False),
|
| 213 |
+
(pd.array([0, 1, 2, pd.NA], dtype="Int64"), False),
|
| 214 |
+
],
|
| 215 |
+
ids=["list-False", "list-True", "integer-array-False", "integer-array-True"],
|
| 216 |
+
)
|
| 217 |
+
def test_setitem_integer_with_missing_raises(self, data, idx, box_in_series):
|
| 218 |
+
arr = data.copy()
|
| 219 |
+
|
| 220 |
+
# TODO(xfail) this raises KeyError about labels not found (it tries label-based)
|
| 221 |
+
# for list of labels with Series
|
| 222 |
+
if box_in_series:
|
| 223 |
+
arr = pd.Series(data, index=[chr(100 + i) for i in range(len(data))])
|
| 224 |
+
|
| 225 |
+
msg = "Cannot index with an integer indexer containing NA values"
|
| 226 |
+
with pytest.raises(ValueError, match=msg):
|
| 227 |
+
arr[idx] = arr[0]
|
| 228 |
+
|
| 229 |
+
@pytest.mark.parametrize("as_callable", [True, False])
|
| 230 |
+
@pytest.mark.parametrize("setter", ["loc", None])
|
| 231 |
+
def test_setitem_mask_aligned(self, data, as_callable, setter):
|
| 232 |
+
ser = pd.Series(data)
|
| 233 |
+
mask = np.zeros(len(data), dtype=bool)
|
| 234 |
+
mask[:2] = True
|
| 235 |
+
|
| 236 |
+
if as_callable:
|
| 237 |
+
mask2 = lambda x: mask
|
| 238 |
+
else:
|
| 239 |
+
mask2 = mask
|
| 240 |
+
|
| 241 |
+
if setter:
|
| 242 |
+
# loc
|
| 243 |
+
target = getattr(ser, setter)
|
| 244 |
+
else:
|
| 245 |
+
# Series.__setitem__
|
| 246 |
+
target = ser
|
| 247 |
+
|
| 248 |
+
target[mask2] = data[5:7]
|
| 249 |
+
|
| 250 |
+
ser[mask2] = data[5:7]
|
| 251 |
+
assert ser[0] == data[5]
|
| 252 |
+
assert ser[1] == data[6]
|
| 253 |
+
|
| 254 |
+
@pytest.mark.parametrize("setter", ["loc", None])
|
| 255 |
+
def test_setitem_mask_broadcast(self, data, setter):
|
| 256 |
+
ser = pd.Series(data)
|
| 257 |
+
mask = np.zeros(len(data), dtype=bool)
|
| 258 |
+
mask[:2] = True
|
| 259 |
+
|
| 260 |
+
if setter: # loc
|
| 261 |
+
target = getattr(ser, setter)
|
| 262 |
+
else: # __setitem__
|
| 263 |
+
target = ser
|
| 264 |
+
|
| 265 |
+
target[mask] = data[10]
|
| 266 |
+
assert ser[0] == data[10]
|
| 267 |
+
assert ser[1] == data[10]
|
| 268 |
+
|
| 269 |
+
def test_setitem_expand_columns(self, data):
|
| 270 |
+
df = pd.DataFrame({"A": data})
|
| 271 |
+
result = df.copy()
|
| 272 |
+
result["B"] = 1
|
| 273 |
+
expected = pd.DataFrame({"A": data, "B": [1] * len(data)})
|
| 274 |
+
tm.assert_frame_equal(result, expected)
|
| 275 |
+
|
| 276 |
+
result = df.copy()
|
| 277 |
+
result.loc[:, "B"] = 1
|
| 278 |
+
tm.assert_frame_equal(result, expected)
|
| 279 |
+
|
| 280 |
+
# overwrite with new type
|
| 281 |
+
result["B"] = data
|
| 282 |
+
expected = pd.DataFrame({"A": data, "B": data})
|
| 283 |
+
tm.assert_frame_equal(result, expected)
|
| 284 |
+
|
| 285 |
+
def test_setitem_expand_with_extension(self, data):
|
| 286 |
+
df = pd.DataFrame({"A": [1] * len(data)})
|
| 287 |
+
result = df.copy()
|
| 288 |
+
result["B"] = data
|
| 289 |
+
expected = pd.DataFrame({"A": [1] * len(data), "B": data})
|
| 290 |
+
tm.assert_frame_equal(result, expected)
|
| 291 |
+
|
| 292 |
+
result = df.copy()
|
| 293 |
+
result.loc[:, "B"] = data
|
| 294 |
+
tm.assert_frame_equal(result, expected)
|
| 295 |
+
|
| 296 |
+
def test_setitem_frame_invalid_length(self, data):
|
| 297 |
+
df = pd.DataFrame({"A": [1] * len(data)})
|
| 298 |
+
xpr = (
|
| 299 |
+
rf"Length of values \({len(data[:5])}\) "
|
| 300 |
+
rf"does not match length of index \({len(df)}\)"
|
| 301 |
+
)
|
| 302 |
+
with pytest.raises(ValueError, match=xpr):
|
| 303 |
+
df["B"] = data[:5]
|
| 304 |
+
|
| 305 |
+
def test_setitem_tuple_index(self, data):
|
| 306 |
+
ser = pd.Series(data[:2], index=[(0, 0), (0, 1)])
|
| 307 |
+
expected = pd.Series(data.take([1, 1]), index=ser.index)
|
| 308 |
+
ser[(0, 0)] = data[1]
|
| 309 |
+
tm.assert_series_equal(ser, expected)
|
| 310 |
+
|
| 311 |
+
def test_setitem_slice(self, data, box_in_series):
|
| 312 |
+
arr = data[:5].copy()
|
| 313 |
+
expected = data.take([0, 0, 0, 3, 4])
|
| 314 |
+
if box_in_series:
|
| 315 |
+
arr = pd.Series(arr)
|
| 316 |
+
expected = pd.Series(expected)
|
| 317 |
+
|
| 318 |
+
arr[:3] = data[0]
|
| 319 |
+
tm.assert_equal(arr, expected)
|
| 320 |
+
|
| 321 |
+
def test_setitem_loc_iloc_slice(self, data):
|
| 322 |
+
arr = data[:5].copy()
|
| 323 |
+
s = pd.Series(arr, index=["a", "b", "c", "d", "e"])
|
| 324 |
+
expected = pd.Series(data.take([0, 0, 0, 3, 4]), index=s.index)
|
| 325 |
+
|
| 326 |
+
result = s.copy()
|
| 327 |
+
result.iloc[:3] = data[0]
|
| 328 |
+
tm.assert_equal(result, expected)
|
| 329 |
+
|
| 330 |
+
result = s.copy()
|
| 331 |
+
result.loc[:"c"] = data[0]
|
| 332 |
+
tm.assert_equal(result, expected)
|
| 333 |
+
|
| 334 |
+
def test_setitem_slice_mismatch_length_raises(self, data):
|
| 335 |
+
arr = data[:5]
|
| 336 |
+
with pytest.raises(ValueError):
|
| 337 |
+
arr[:1] = arr[:2]
|
| 338 |
+
|
| 339 |
+
def test_setitem_slice_array(self, data):
|
| 340 |
+
arr = data[:5].copy()
|
| 341 |
+
arr[:5] = data[-5:]
|
| 342 |
+
tm.assert_extension_array_equal(arr, data[-5:])
|
| 343 |
+
|
| 344 |
+
def test_setitem_scalar_key_sequence_raise(self, data):
|
| 345 |
+
arr = data[:5].copy()
|
| 346 |
+
with pytest.raises(ValueError):
|
| 347 |
+
arr[0] = arr[[0, 1]]
|
| 348 |
+
|
| 349 |
+
def test_setitem_preserves_views(self, data):
|
| 350 |
+
# GH#28150 setitem shouldn't swap the underlying data
|
| 351 |
+
view1 = data.view()
|
| 352 |
+
view2 = data[:]
|
| 353 |
+
|
| 354 |
+
data[0] = data[1]
|
| 355 |
+
assert view1[0] == data[1]
|
| 356 |
+
assert view2[0] == data[1]
|
| 357 |
+
|
| 358 |
+
def test_setitem_with_expansion_dataframe_column(self, data, full_indexer):
|
| 359 |
+
# https://github.com/pandas-dev/pandas/issues/32395
|
| 360 |
+
df = expected = pd.DataFrame({0: pd.Series(data)})
|
| 361 |
+
result = pd.DataFrame(index=df.index)
|
| 362 |
+
|
| 363 |
+
key = full_indexer(df)
|
| 364 |
+
result.loc[key, 0] = df[0]
|
| 365 |
+
|
| 366 |
+
tm.assert_frame_equal(result, expected)
|
| 367 |
+
|
| 368 |
+
def test_setitem_with_expansion_row(self, data, na_value):
|
| 369 |
+
df = pd.DataFrame({"data": data[:1]})
|
| 370 |
+
|
| 371 |
+
df.loc[1, "data"] = data[1]
|
| 372 |
+
expected = pd.DataFrame({"data": data[:2]})
|
| 373 |
+
tm.assert_frame_equal(df, expected)
|
| 374 |
+
|
| 375 |
+
# https://github.com/pandas-dev/pandas/issues/47284
|
| 376 |
+
df.loc[2, "data"] = na_value
|
| 377 |
+
expected = pd.DataFrame(
|
| 378 |
+
{"data": pd.Series([data[0], data[1], na_value], dtype=data.dtype)}
|
| 379 |
+
)
|
| 380 |
+
tm.assert_frame_equal(df, expected)
|
| 381 |
+
|
| 382 |
+
def test_setitem_series(self, data, full_indexer):
|
| 383 |
+
# https://github.com/pandas-dev/pandas/issues/32395
|
| 384 |
+
ser = pd.Series(data, name="data")
|
| 385 |
+
result = pd.Series(index=ser.index, dtype=object, name="data")
|
| 386 |
+
|
| 387 |
+
# because result has object dtype, the attempt to do setting inplace
|
| 388 |
+
# is successful, and object dtype is retained
|
| 389 |
+
key = full_indexer(ser)
|
| 390 |
+
result.loc[key] = ser
|
| 391 |
+
|
| 392 |
+
expected = pd.Series(
|
| 393 |
+
data.astype(object), index=ser.index, name="data", dtype=object
|
| 394 |
+
)
|
| 395 |
+
tm.assert_series_equal(result, expected)
|
| 396 |
+
|
| 397 |
+
def test_setitem_frame_2d_values(self, data):
|
| 398 |
+
# GH#44514
|
| 399 |
+
df = pd.DataFrame({"A": data})
|
| 400 |
+
|
| 401 |
+
# Avoiding using_array_manager fixture
|
| 402 |
+
# https://github.com/pandas-dev/pandas/pull/44514#discussion_r754002410
|
| 403 |
+
using_array_manager = isinstance(df._mgr, pd.core.internals.ArrayManager)
|
| 404 |
+
using_copy_on_write = pd.options.mode.copy_on_write
|
| 405 |
+
|
| 406 |
+
blk_data = df._mgr.arrays[0]
|
| 407 |
+
|
| 408 |
+
orig = df.copy()
|
| 409 |
+
|
| 410 |
+
df.iloc[:] = df.copy()
|
| 411 |
+
tm.assert_frame_equal(df, orig)
|
| 412 |
+
|
| 413 |
+
df.iloc[:-1] = df.iloc[:-1].copy()
|
| 414 |
+
tm.assert_frame_equal(df, orig)
|
| 415 |
+
|
| 416 |
+
df.iloc[:] = df.values
|
| 417 |
+
tm.assert_frame_equal(df, orig)
|
| 418 |
+
if not using_array_manager and not using_copy_on_write:
|
| 419 |
+
# GH#33457 Check that this setting occurred in-place
|
| 420 |
+
# FIXME(ArrayManager): this should work there too
|
| 421 |
+
assert df._mgr.arrays[0] is blk_data
|
| 422 |
+
|
| 423 |
+
df.iloc[:-1] = df.values[:-1]
|
| 424 |
+
tm.assert_frame_equal(df, orig)
|
| 425 |
+
|
| 426 |
+
def test_delitem_series(self, data):
|
| 427 |
+
# GH#40763
|
| 428 |
+
ser = pd.Series(data, name="data")
|
| 429 |
+
|
| 430 |
+
taker = np.arange(len(ser))
|
| 431 |
+
taker = np.delete(taker, 1)
|
| 432 |
+
|
| 433 |
+
expected = ser[taker]
|
| 434 |
+
del ser[1]
|
| 435 |
+
tm.assert_series_equal(ser, expected)
|
| 436 |
+
|
| 437 |
+
def test_setitem_invalid(self, data, invalid_scalar):
|
| 438 |
+
msg = "" # messages vary by subclass, so we do not test it
|
| 439 |
+
with pytest.raises((ValueError, TypeError), match=msg):
|
| 440 |
+
data[0] = invalid_scalar
|
| 441 |
+
|
| 442 |
+
with pytest.raises((ValueError, TypeError), match=msg):
|
| 443 |
+
data[:] = invalid_scalar
|
| 444 |
+
|
| 445 |
+
def test_setitem_2d_values(self, data):
|
| 446 |
+
# GH50085
|
| 447 |
+
original = data.copy()
|
| 448 |
+
df = pd.DataFrame({"a": data, "b": data})
|
| 449 |
+
df.loc[[0, 1], :] = df.loc[[1, 0], :].values
|
| 450 |
+
assert (df.loc[0, :] == original[1]).all()
|
| 451 |
+
assert (df.loc[1, :] == original[0]).all()
|
py311/lib/python3.11/site-packages/pandas/tests/extension/decimal/__init__.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pandas.tests.extension.decimal.array import (
|
| 2 |
+
DecimalArray,
|
| 3 |
+
DecimalDtype,
|
| 4 |
+
make_data,
|
| 5 |
+
to_decimal,
|
| 6 |
+
)
|
| 7 |
+
|
| 8 |
+
__all__ = ["DecimalArray", "DecimalDtype", "to_decimal", "make_data"]
|
py311/lib/python3.11/site-packages/pandas/tests/extension/decimal/array.py
ADDED
|
@@ -0,0 +1,311 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import decimal
|
| 4 |
+
import numbers
|
| 5 |
+
import sys
|
| 6 |
+
from typing import TYPE_CHECKING
|
| 7 |
+
|
| 8 |
+
import numpy as np
|
| 9 |
+
|
| 10 |
+
from pandas.core.dtypes.base import ExtensionDtype
|
| 11 |
+
from pandas.core.dtypes.common import (
|
| 12 |
+
is_dtype_equal,
|
| 13 |
+
is_float,
|
| 14 |
+
is_integer,
|
| 15 |
+
pandas_dtype,
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
import pandas as pd
|
| 19 |
+
from pandas.api.extensions import (
|
| 20 |
+
no_default,
|
| 21 |
+
register_extension_dtype,
|
| 22 |
+
)
|
| 23 |
+
from pandas.api.types import (
|
| 24 |
+
is_list_like,
|
| 25 |
+
is_scalar,
|
| 26 |
+
)
|
| 27 |
+
from pandas.core import arraylike
|
| 28 |
+
from pandas.core.algorithms import value_counts_internal as value_counts
|
| 29 |
+
from pandas.core.arraylike import OpsMixin
|
| 30 |
+
from pandas.core.arrays import (
|
| 31 |
+
ExtensionArray,
|
| 32 |
+
ExtensionScalarOpsMixin,
|
| 33 |
+
)
|
| 34 |
+
from pandas.core.indexers import check_array_indexer
|
| 35 |
+
|
| 36 |
+
if TYPE_CHECKING:
|
| 37 |
+
from pandas._typing import type_t
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
@register_extension_dtype
|
| 41 |
+
class DecimalDtype(ExtensionDtype):
|
| 42 |
+
type = decimal.Decimal
|
| 43 |
+
name = "decimal"
|
| 44 |
+
na_value = decimal.Decimal("NaN")
|
| 45 |
+
_metadata = ("context",)
|
| 46 |
+
|
| 47 |
+
def __init__(self, context=None) -> None:
|
| 48 |
+
self.context = context or decimal.getcontext()
|
| 49 |
+
|
| 50 |
+
def __repr__(self) -> str:
|
| 51 |
+
return f"DecimalDtype(context={self.context})"
|
| 52 |
+
|
| 53 |
+
@classmethod
|
| 54 |
+
def construct_array_type(cls) -> type_t[DecimalArray]:
|
| 55 |
+
"""
|
| 56 |
+
Return the array type associated with this dtype.
|
| 57 |
+
|
| 58 |
+
Returns
|
| 59 |
+
-------
|
| 60 |
+
type
|
| 61 |
+
"""
|
| 62 |
+
return DecimalArray
|
| 63 |
+
|
| 64 |
+
@property
|
| 65 |
+
def _is_numeric(self) -> bool:
|
| 66 |
+
return True
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
class DecimalArray(OpsMixin, ExtensionScalarOpsMixin, ExtensionArray):
|
| 70 |
+
__array_priority__ = 1000
|
| 71 |
+
|
| 72 |
+
def __init__(self, values, dtype=None, copy=False, context=None) -> None:
|
| 73 |
+
for i, val in enumerate(values):
|
| 74 |
+
if is_float(val) or is_integer(val):
|
| 75 |
+
if np.isnan(val):
|
| 76 |
+
values[i] = DecimalDtype.na_value
|
| 77 |
+
else:
|
| 78 |
+
# error: Argument 1 has incompatible type "float | int |
|
| 79 |
+
# integer[Any]"; expected "Decimal | float | str | tuple[int,
|
| 80 |
+
# Sequence[int], int]"
|
| 81 |
+
values[i] = DecimalDtype.type(val) # type: ignore[arg-type]
|
| 82 |
+
elif not isinstance(val, decimal.Decimal):
|
| 83 |
+
raise TypeError("All values must be of type " + str(decimal.Decimal))
|
| 84 |
+
values = np.asarray(values, dtype=object)
|
| 85 |
+
|
| 86 |
+
self._data = values
|
| 87 |
+
# Some aliases for common attribute names to ensure pandas supports
|
| 88 |
+
# these
|
| 89 |
+
self._items = self.data = self._data
|
| 90 |
+
# those aliases are currently not working due to assumptions
|
| 91 |
+
# in internal code (GH-20735)
|
| 92 |
+
# self._values = self.values = self.data
|
| 93 |
+
self._dtype = DecimalDtype(context)
|
| 94 |
+
|
| 95 |
+
@property
|
| 96 |
+
def dtype(self):
|
| 97 |
+
return self._dtype
|
| 98 |
+
|
| 99 |
+
@classmethod
|
| 100 |
+
def _from_sequence(cls, scalars, *, dtype=None, copy=False):
|
| 101 |
+
return cls(scalars)
|
| 102 |
+
|
| 103 |
+
@classmethod
|
| 104 |
+
def _from_sequence_of_strings(cls, strings, dtype=None, copy=False):
|
| 105 |
+
return cls._from_sequence(
|
| 106 |
+
[decimal.Decimal(x) for x in strings], dtype=dtype, copy=copy
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
@classmethod
|
| 110 |
+
def _from_factorized(cls, values, original):
|
| 111 |
+
return cls(values)
|
| 112 |
+
|
| 113 |
+
_HANDLED_TYPES = (decimal.Decimal, numbers.Number, np.ndarray)
|
| 114 |
+
|
| 115 |
+
def to_numpy(
|
| 116 |
+
self,
|
| 117 |
+
dtype=None,
|
| 118 |
+
copy: bool = False,
|
| 119 |
+
na_value: object = no_default,
|
| 120 |
+
decimals=None,
|
| 121 |
+
) -> np.ndarray:
|
| 122 |
+
result = np.asarray(self, dtype=dtype)
|
| 123 |
+
if decimals is not None:
|
| 124 |
+
result = np.asarray([round(x, decimals) for x in result])
|
| 125 |
+
return result
|
| 126 |
+
|
| 127 |
+
def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
|
| 128 |
+
#
|
| 129 |
+
if not all(
|
| 130 |
+
isinstance(t, self._HANDLED_TYPES + (DecimalArray,)) for t in inputs
|
| 131 |
+
):
|
| 132 |
+
return NotImplemented
|
| 133 |
+
|
| 134 |
+
result = arraylike.maybe_dispatch_ufunc_to_dunder_op(
|
| 135 |
+
self, ufunc, method, *inputs, **kwargs
|
| 136 |
+
)
|
| 137 |
+
if result is not NotImplemented:
|
| 138 |
+
# e.g. test_array_ufunc_series_scalar_other
|
| 139 |
+
return result
|
| 140 |
+
|
| 141 |
+
if "out" in kwargs:
|
| 142 |
+
return arraylike.dispatch_ufunc_with_out(
|
| 143 |
+
self, ufunc, method, *inputs, **kwargs
|
| 144 |
+
)
|
| 145 |
+
|
| 146 |
+
inputs = tuple(x._data if isinstance(x, DecimalArray) else x for x in inputs)
|
| 147 |
+
result = getattr(ufunc, method)(*inputs, **kwargs)
|
| 148 |
+
|
| 149 |
+
if method == "reduce":
|
| 150 |
+
result = arraylike.dispatch_reduction_ufunc(
|
| 151 |
+
self, ufunc, method, *inputs, **kwargs
|
| 152 |
+
)
|
| 153 |
+
if result is not NotImplemented:
|
| 154 |
+
return result
|
| 155 |
+
|
| 156 |
+
def reconstruct(x):
|
| 157 |
+
if isinstance(x, (decimal.Decimal, numbers.Number)):
|
| 158 |
+
return x
|
| 159 |
+
else:
|
| 160 |
+
return type(self)._from_sequence(x, dtype=self.dtype)
|
| 161 |
+
|
| 162 |
+
if ufunc.nout > 1:
|
| 163 |
+
return tuple(reconstruct(x) for x in result)
|
| 164 |
+
else:
|
| 165 |
+
return reconstruct(result)
|
| 166 |
+
|
| 167 |
+
def __getitem__(self, item):
|
| 168 |
+
if isinstance(item, numbers.Integral):
|
| 169 |
+
return self._data[item]
|
| 170 |
+
else:
|
| 171 |
+
# array, slice.
|
| 172 |
+
item = pd.api.indexers.check_array_indexer(self, item)
|
| 173 |
+
return type(self)(self._data[item])
|
| 174 |
+
|
| 175 |
+
def take(self, indexer, allow_fill=False, fill_value=None):
|
| 176 |
+
from pandas.api.extensions import take
|
| 177 |
+
|
| 178 |
+
data = self._data
|
| 179 |
+
if allow_fill and fill_value is None:
|
| 180 |
+
fill_value = self.dtype.na_value
|
| 181 |
+
|
| 182 |
+
result = take(data, indexer, fill_value=fill_value, allow_fill=allow_fill)
|
| 183 |
+
return self._from_sequence(result, dtype=self.dtype)
|
| 184 |
+
|
| 185 |
+
def copy(self):
|
| 186 |
+
return type(self)(self._data.copy(), dtype=self.dtype)
|
| 187 |
+
|
| 188 |
+
def astype(self, dtype, copy=True):
|
| 189 |
+
if is_dtype_equal(dtype, self._dtype):
|
| 190 |
+
if not copy:
|
| 191 |
+
return self
|
| 192 |
+
dtype = pandas_dtype(dtype)
|
| 193 |
+
if isinstance(dtype, type(self.dtype)):
|
| 194 |
+
return type(self)(self._data, copy=copy, context=dtype.context)
|
| 195 |
+
|
| 196 |
+
return super().astype(dtype, copy=copy)
|
| 197 |
+
|
| 198 |
+
def __setitem__(self, key, value) -> None:
|
| 199 |
+
if is_list_like(value):
|
| 200 |
+
if is_scalar(key):
|
| 201 |
+
raise ValueError("setting an array element with a sequence.")
|
| 202 |
+
value = [decimal.Decimal(v) for v in value]
|
| 203 |
+
else:
|
| 204 |
+
value = decimal.Decimal(value)
|
| 205 |
+
|
| 206 |
+
key = check_array_indexer(self, key)
|
| 207 |
+
self._data[key] = value
|
| 208 |
+
|
| 209 |
+
def __len__(self) -> int:
|
| 210 |
+
return len(self._data)
|
| 211 |
+
|
| 212 |
+
def __contains__(self, item) -> bool | np.bool_:
|
| 213 |
+
if not isinstance(item, decimal.Decimal):
|
| 214 |
+
return False
|
| 215 |
+
elif item.is_nan():
|
| 216 |
+
return self.isna().any()
|
| 217 |
+
else:
|
| 218 |
+
return super().__contains__(item)
|
| 219 |
+
|
| 220 |
+
@property
|
| 221 |
+
def nbytes(self) -> int:
|
| 222 |
+
n = len(self)
|
| 223 |
+
if n:
|
| 224 |
+
return n * sys.getsizeof(self[0])
|
| 225 |
+
return 0
|
| 226 |
+
|
| 227 |
+
def isna(self):
|
| 228 |
+
return np.array([x.is_nan() for x in self._data], dtype=bool)
|
| 229 |
+
|
| 230 |
+
@property
|
| 231 |
+
def _na_value(self):
|
| 232 |
+
return decimal.Decimal("NaN")
|
| 233 |
+
|
| 234 |
+
def _formatter(self, boxed=False):
|
| 235 |
+
if boxed:
|
| 236 |
+
return "Decimal: {}".format
|
| 237 |
+
return repr
|
| 238 |
+
|
| 239 |
+
@classmethod
|
| 240 |
+
def _concat_same_type(cls, to_concat):
|
| 241 |
+
return cls(np.concatenate([x._data for x in to_concat]))
|
| 242 |
+
|
| 243 |
+
def _reduce(
|
| 244 |
+
self, name: str, *, skipna: bool = True, keepdims: bool = False, **kwargs
|
| 245 |
+
):
|
| 246 |
+
if skipna and self.isna().any():
|
| 247 |
+
# If we don't have any NAs, we can ignore skipna
|
| 248 |
+
other = self[~self.isna()]
|
| 249 |
+
result = other._reduce(name, **kwargs)
|
| 250 |
+
elif name == "sum" and len(self) == 0:
|
| 251 |
+
# GH#29630 avoid returning int 0 or np.bool_(False) on old numpy
|
| 252 |
+
result = decimal.Decimal(0)
|
| 253 |
+
else:
|
| 254 |
+
try:
|
| 255 |
+
op = getattr(self.data, name)
|
| 256 |
+
except AttributeError as err:
|
| 257 |
+
raise NotImplementedError(
|
| 258 |
+
f"decimal does not support the {name} operation"
|
| 259 |
+
) from err
|
| 260 |
+
result = op(axis=0)
|
| 261 |
+
|
| 262 |
+
if keepdims:
|
| 263 |
+
return type(self)([result])
|
| 264 |
+
else:
|
| 265 |
+
return result
|
| 266 |
+
|
| 267 |
+
def _cmp_method(self, other, op):
|
| 268 |
+
# For use with OpsMixin
|
| 269 |
+
def convert_values(param):
|
| 270 |
+
if isinstance(param, ExtensionArray) or is_list_like(param):
|
| 271 |
+
ovalues = param
|
| 272 |
+
else:
|
| 273 |
+
# Assume it's an object
|
| 274 |
+
ovalues = [param] * len(self)
|
| 275 |
+
return ovalues
|
| 276 |
+
|
| 277 |
+
lvalues = self
|
| 278 |
+
rvalues = convert_values(other)
|
| 279 |
+
|
| 280 |
+
# If the operator is not defined for the underlying objects,
|
| 281 |
+
# a TypeError should be raised
|
| 282 |
+
res = [op(a, b) for (a, b) in zip(lvalues, rvalues)]
|
| 283 |
+
|
| 284 |
+
return np.asarray(res, dtype=bool)
|
| 285 |
+
|
| 286 |
+
def value_counts(self, dropna: bool = True):
|
| 287 |
+
return value_counts(self.to_numpy(), dropna=dropna)
|
| 288 |
+
|
| 289 |
+
# We override fillna here to simulate a 3rd party EA that has done so. This
|
| 290 |
+
# lets us test the deprecation telling authors to implement _pad_or_backfill
|
| 291 |
+
# Simulate a 3rd-party EA that has not yet updated to include a "copy"
|
| 292 |
+
# keyword in its fillna method.
|
| 293 |
+
# error: Signature of "fillna" incompatible with supertype "ExtensionArray"
|
| 294 |
+
def fillna( # type: ignore[override]
|
| 295 |
+
self,
|
| 296 |
+
value=None,
|
| 297 |
+
method=None,
|
| 298 |
+
limit: int | None = None,
|
| 299 |
+
):
|
| 300 |
+
return super().fillna(value=value, method=method, limit=limit, copy=True)
|
| 301 |
+
|
| 302 |
+
|
| 303 |
+
def to_decimal(values, context=None):
|
| 304 |
+
return DecimalArray([decimal.Decimal(x) for x in values], context=context)
|
| 305 |
+
|
| 306 |
+
|
| 307 |
+
def make_data():
|
| 308 |
+
return [decimal.Decimal(val) for val in np.random.default_rng(2).random(100)]
|
| 309 |
+
|
| 310 |
+
|
| 311 |
+
DecimalArray._add_arithmetic_ops()
|
py311/lib/python3.11/site-packages/pandas/tests/extension/decimal/test_decimal.py
ADDED
|
@@ -0,0 +1,587 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import decimal
|
| 4 |
+
import operator
|
| 5 |
+
|
| 6 |
+
import numpy as np
|
| 7 |
+
import pytest
|
| 8 |
+
|
| 9 |
+
from pandas.compat.numpy import np_version_gt2
|
| 10 |
+
|
| 11 |
+
import pandas as pd
|
| 12 |
+
import pandas._testing as tm
|
| 13 |
+
from pandas.tests.extension import base
|
| 14 |
+
from pandas.tests.extension.decimal.array import (
|
| 15 |
+
DecimalArray,
|
| 16 |
+
DecimalDtype,
|
| 17 |
+
make_data,
|
| 18 |
+
to_decimal,
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
@pytest.fixture
|
| 23 |
+
def dtype():
|
| 24 |
+
return DecimalDtype()
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
@pytest.fixture
|
| 28 |
+
def data():
|
| 29 |
+
return DecimalArray(make_data())
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
@pytest.fixture
|
| 33 |
+
def data_for_twos():
|
| 34 |
+
return DecimalArray([decimal.Decimal(2) for _ in range(100)])
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
@pytest.fixture
|
| 38 |
+
def data_missing():
|
| 39 |
+
return DecimalArray([decimal.Decimal("NaN"), decimal.Decimal(1)])
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
@pytest.fixture
|
| 43 |
+
def data_for_sorting():
|
| 44 |
+
return DecimalArray(
|
| 45 |
+
[decimal.Decimal("1"), decimal.Decimal("2"), decimal.Decimal("0")]
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
@pytest.fixture
|
| 50 |
+
def data_missing_for_sorting():
|
| 51 |
+
return DecimalArray(
|
| 52 |
+
[decimal.Decimal("1"), decimal.Decimal("NaN"), decimal.Decimal("0")]
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
@pytest.fixture
|
| 57 |
+
def na_cmp():
|
| 58 |
+
return lambda x, y: x.is_nan() and y.is_nan()
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
@pytest.fixture
|
| 62 |
+
def data_for_grouping():
|
| 63 |
+
b = decimal.Decimal("1.0")
|
| 64 |
+
a = decimal.Decimal("0.0")
|
| 65 |
+
c = decimal.Decimal("2.0")
|
| 66 |
+
na = decimal.Decimal("NaN")
|
| 67 |
+
return DecimalArray([b, b, na, na, a, a, b, c])
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
class TestDecimalArray(base.ExtensionTests):
|
| 71 |
+
def _get_expected_exception(
|
| 72 |
+
self, op_name: str, obj, other
|
| 73 |
+
) -> type[Exception] | tuple[type[Exception], ...] | None:
|
| 74 |
+
return None
|
| 75 |
+
|
| 76 |
+
def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool:
|
| 77 |
+
return True
|
| 78 |
+
|
| 79 |
+
def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool):
|
| 80 |
+
if op_name == "count":
|
| 81 |
+
return super().check_reduce(ser, op_name, skipna)
|
| 82 |
+
else:
|
| 83 |
+
result = getattr(ser, op_name)(skipna=skipna)
|
| 84 |
+
expected = getattr(np.asarray(ser), op_name)()
|
| 85 |
+
tm.assert_almost_equal(result, expected)
|
| 86 |
+
|
| 87 |
+
def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna, request):
|
| 88 |
+
if all_numeric_reductions in ["kurt", "skew", "sem", "median"]:
|
| 89 |
+
mark = pytest.mark.xfail(raises=NotImplementedError)
|
| 90 |
+
request.applymarker(mark)
|
| 91 |
+
super().test_reduce_series_numeric(data, all_numeric_reductions, skipna)
|
| 92 |
+
|
| 93 |
+
def test_reduce_frame(self, data, all_numeric_reductions, skipna, request):
|
| 94 |
+
op_name = all_numeric_reductions
|
| 95 |
+
if op_name in ["skew", "median"]:
|
| 96 |
+
mark = pytest.mark.xfail(raises=NotImplementedError)
|
| 97 |
+
request.applymarker(mark)
|
| 98 |
+
|
| 99 |
+
return super().test_reduce_frame(data, all_numeric_reductions, skipna)
|
| 100 |
+
|
| 101 |
+
def test_compare_scalar(self, data, comparison_op):
|
| 102 |
+
ser = pd.Series(data)
|
| 103 |
+
self._compare_other(ser, data, comparison_op, 0.5)
|
| 104 |
+
|
| 105 |
+
def test_compare_array(self, data, comparison_op):
|
| 106 |
+
ser = pd.Series(data)
|
| 107 |
+
|
| 108 |
+
alter = np.random.default_rng(2).choice([-1, 0, 1], len(data))
|
| 109 |
+
# Randomly double, halve or keep same value
|
| 110 |
+
other = pd.Series(data) * [decimal.Decimal(pow(2.0, i)) for i in alter]
|
| 111 |
+
self._compare_other(ser, data, comparison_op, other)
|
| 112 |
+
|
| 113 |
+
def test_arith_series_with_array(self, data, all_arithmetic_operators):
|
| 114 |
+
op_name = all_arithmetic_operators
|
| 115 |
+
ser = pd.Series(data)
|
| 116 |
+
|
| 117 |
+
context = decimal.getcontext()
|
| 118 |
+
divbyzerotrap = context.traps[decimal.DivisionByZero]
|
| 119 |
+
invalidoptrap = context.traps[decimal.InvalidOperation]
|
| 120 |
+
context.traps[decimal.DivisionByZero] = 0
|
| 121 |
+
context.traps[decimal.InvalidOperation] = 0
|
| 122 |
+
|
| 123 |
+
# Decimal supports ops with int, but not float
|
| 124 |
+
other = pd.Series([int(d * 100) for d in data])
|
| 125 |
+
self.check_opname(ser, op_name, other)
|
| 126 |
+
|
| 127 |
+
if "mod" not in op_name:
|
| 128 |
+
self.check_opname(ser, op_name, ser * 2)
|
| 129 |
+
|
| 130 |
+
self.check_opname(ser, op_name, 0)
|
| 131 |
+
self.check_opname(ser, op_name, 5)
|
| 132 |
+
context.traps[decimal.DivisionByZero] = divbyzerotrap
|
| 133 |
+
context.traps[decimal.InvalidOperation] = invalidoptrap
|
| 134 |
+
|
| 135 |
+
def test_fillna_frame(self, data_missing):
|
| 136 |
+
msg = "ExtensionArray.fillna added a 'copy' keyword"
|
| 137 |
+
with tm.assert_produces_warning(
|
| 138 |
+
DeprecationWarning, match=msg, check_stacklevel=False
|
| 139 |
+
):
|
| 140 |
+
super().test_fillna_frame(data_missing)
|
| 141 |
+
|
| 142 |
+
def test_fillna_limit_pad(self, data_missing):
|
| 143 |
+
msg = "ExtensionArray.fillna 'method' keyword is deprecated"
|
| 144 |
+
with tm.assert_produces_warning(
|
| 145 |
+
DeprecationWarning,
|
| 146 |
+
match=msg,
|
| 147 |
+
check_stacklevel=False,
|
| 148 |
+
raise_on_extra_warnings=False,
|
| 149 |
+
):
|
| 150 |
+
super().test_fillna_limit_pad(data_missing)
|
| 151 |
+
|
| 152 |
+
msg = "The 'method' keyword in DecimalArray.fillna is deprecated"
|
| 153 |
+
with tm.assert_produces_warning(
|
| 154 |
+
FutureWarning,
|
| 155 |
+
match=msg,
|
| 156 |
+
check_stacklevel=False,
|
| 157 |
+
raise_on_extra_warnings=False,
|
| 158 |
+
):
|
| 159 |
+
super().test_fillna_limit_pad(data_missing)
|
| 160 |
+
|
| 161 |
+
@pytest.mark.parametrize(
|
| 162 |
+
"limit_area, input_ilocs, expected_ilocs",
|
| 163 |
+
[
|
| 164 |
+
("outside", [1, 0, 0, 0, 1], [1, 0, 0, 0, 1]),
|
| 165 |
+
("outside", [1, 0, 1, 0, 1], [1, 0, 1, 0, 1]),
|
| 166 |
+
("outside", [0, 1, 1, 1, 0], [0, 1, 1, 1, 1]),
|
| 167 |
+
("outside", [0, 1, 0, 1, 0], [0, 1, 0, 1, 1]),
|
| 168 |
+
("inside", [1, 0, 0, 0, 1], [1, 1, 1, 1, 1]),
|
| 169 |
+
("inside", [1, 0, 1, 0, 1], [1, 1, 1, 1, 1]),
|
| 170 |
+
("inside", [0, 1, 1, 1, 0], [0, 1, 1, 1, 0]),
|
| 171 |
+
("inside", [0, 1, 0, 1, 0], [0, 1, 1, 1, 0]),
|
| 172 |
+
],
|
| 173 |
+
)
|
| 174 |
+
def test_ffill_limit_area(
|
| 175 |
+
self, data_missing, limit_area, input_ilocs, expected_ilocs
|
| 176 |
+
):
|
| 177 |
+
# GH#56616
|
| 178 |
+
msg = "ExtensionArray.fillna 'method' keyword is deprecated"
|
| 179 |
+
with tm.assert_produces_warning(
|
| 180 |
+
DeprecationWarning,
|
| 181 |
+
match=msg,
|
| 182 |
+
check_stacklevel=False,
|
| 183 |
+
raise_on_extra_warnings=False,
|
| 184 |
+
):
|
| 185 |
+
msg = "DecimalArray does not implement limit_area"
|
| 186 |
+
with pytest.raises(NotImplementedError, match=msg):
|
| 187 |
+
super().test_ffill_limit_area(
|
| 188 |
+
data_missing, limit_area, input_ilocs, expected_ilocs
|
| 189 |
+
)
|
| 190 |
+
|
| 191 |
+
def test_fillna_limit_backfill(self, data_missing):
|
| 192 |
+
msg = "Series.fillna with 'method' is deprecated"
|
| 193 |
+
with tm.assert_produces_warning(
|
| 194 |
+
FutureWarning,
|
| 195 |
+
match=msg,
|
| 196 |
+
check_stacklevel=False,
|
| 197 |
+
raise_on_extra_warnings=False,
|
| 198 |
+
):
|
| 199 |
+
super().test_fillna_limit_backfill(data_missing)
|
| 200 |
+
|
| 201 |
+
msg = "ExtensionArray.fillna 'method' keyword is deprecated"
|
| 202 |
+
with tm.assert_produces_warning(
|
| 203 |
+
DeprecationWarning,
|
| 204 |
+
match=msg,
|
| 205 |
+
check_stacklevel=False,
|
| 206 |
+
raise_on_extra_warnings=False,
|
| 207 |
+
):
|
| 208 |
+
super().test_fillna_limit_backfill(data_missing)
|
| 209 |
+
|
| 210 |
+
msg = "The 'method' keyword in DecimalArray.fillna is deprecated"
|
| 211 |
+
with tm.assert_produces_warning(
|
| 212 |
+
FutureWarning,
|
| 213 |
+
match=msg,
|
| 214 |
+
check_stacklevel=False,
|
| 215 |
+
raise_on_extra_warnings=False,
|
| 216 |
+
):
|
| 217 |
+
super().test_fillna_limit_backfill(data_missing)
|
| 218 |
+
|
| 219 |
+
def test_fillna_no_op_returns_copy(self, data):
|
| 220 |
+
msg = "|".join(
|
| 221 |
+
[
|
| 222 |
+
"ExtensionArray.fillna 'method' keyword is deprecated",
|
| 223 |
+
"The 'method' keyword in DecimalArray.fillna is deprecated",
|
| 224 |
+
]
|
| 225 |
+
)
|
| 226 |
+
with tm.assert_produces_warning(
|
| 227 |
+
(FutureWarning, DeprecationWarning), match=msg, check_stacklevel=False
|
| 228 |
+
):
|
| 229 |
+
super().test_fillna_no_op_returns_copy(data)
|
| 230 |
+
|
| 231 |
+
def test_fillna_series(self, data_missing):
|
| 232 |
+
msg = "ExtensionArray.fillna added a 'copy' keyword"
|
| 233 |
+
with tm.assert_produces_warning(
|
| 234 |
+
DeprecationWarning, match=msg, check_stacklevel=False
|
| 235 |
+
):
|
| 236 |
+
super().test_fillna_series(data_missing)
|
| 237 |
+
|
| 238 |
+
def test_fillna_series_method(self, data_missing, fillna_method):
|
| 239 |
+
msg = "|".join(
|
| 240 |
+
[
|
| 241 |
+
"ExtensionArray.fillna 'method' keyword is deprecated",
|
| 242 |
+
"The 'method' keyword in DecimalArray.fillna is deprecated",
|
| 243 |
+
]
|
| 244 |
+
)
|
| 245 |
+
with tm.assert_produces_warning(
|
| 246 |
+
(FutureWarning, DeprecationWarning), match=msg, check_stacklevel=False
|
| 247 |
+
):
|
| 248 |
+
super().test_fillna_series_method(data_missing, fillna_method)
|
| 249 |
+
|
| 250 |
+
def test_fillna_copy_frame(self, data_missing, using_copy_on_write):
|
| 251 |
+
warn = DeprecationWarning if not using_copy_on_write else None
|
| 252 |
+
msg = "ExtensionArray.fillna added a 'copy' keyword"
|
| 253 |
+
with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False):
|
| 254 |
+
super().test_fillna_copy_frame(data_missing)
|
| 255 |
+
|
| 256 |
+
def test_fillna_copy_series(self, data_missing, using_copy_on_write):
|
| 257 |
+
warn = DeprecationWarning if not using_copy_on_write else None
|
| 258 |
+
msg = "ExtensionArray.fillna added a 'copy' keyword"
|
| 259 |
+
with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False):
|
| 260 |
+
super().test_fillna_copy_series(data_missing)
|
| 261 |
+
|
| 262 |
+
@pytest.mark.parametrize("dropna", [True, False])
|
| 263 |
+
def test_value_counts(self, all_data, dropna, request):
|
| 264 |
+
all_data = all_data[:10]
|
| 265 |
+
if dropna:
|
| 266 |
+
other = np.array(all_data[~all_data.isna()])
|
| 267 |
+
else:
|
| 268 |
+
other = all_data
|
| 269 |
+
|
| 270 |
+
vcs = pd.Series(all_data).value_counts(dropna=dropna)
|
| 271 |
+
vcs_ex = pd.Series(other).value_counts(dropna=dropna)
|
| 272 |
+
|
| 273 |
+
with decimal.localcontext() as ctx:
|
| 274 |
+
# avoid raising when comparing Decimal("NAN") < Decimal(2)
|
| 275 |
+
ctx.traps[decimal.InvalidOperation] = False
|
| 276 |
+
|
| 277 |
+
result = vcs.sort_index()
|
| 278 |
+
expected = vcs_ex.sort_index()
|
| 279 |
+
|
| 280 |
+
tm.assert_series_equal(result, expected)
|
| 281 |
+
|
| 282 |
+
def test_series_repr(self, data):
|
| 283 |
+
# Overriding this base test to explicitly test that
|
| 284 |
+
# the custom _formatter is used
|
| 285 |
+
ser = pd.Series(data)
|
| 286 |
+
assert data.dtype.name in repr(ser)
|
| 287 |
+
assert "Decimal: " in repr(ser)
|
| 288 |
+
|
| 289 |
+
@pytest.mark.xfail(reason="Inconsistent array-vs-scalar behavior")
|
| 290 |
+
@pytest.mark.parametrize("ufunc", [np.positive, np.negative, np.abs])
|
| 291 |
+
def test_unary_ufunc_dunder_equivalence(self, data, ufunc):
|
| 292 |
+
super().test_unary_ufunc_dunder_equivalence(data, ufunc)
|
| 293 |
+
|
| 294 |
+
def test_array_interface_copy(self, data):
|
| 295 |
+
result_copy1 = np.array(data, copy=True)
|
| 296 |
+
result_copy2 = np.array(data, copy=True)
|
| 297 |
+
assert not np.may_share_memory(result_copy1, result_copy2)
|
| 298 |
+
if not np_version_gt2:
|
| 299 |
+
# copy=False semantics are only supported in NumPy>=2.
|
| 300 |
+
return
|
| 301 |
+
|
| 302 |
+
try:
|
| 303 |
+
result_nocopy1 = np.array(data, copy=False)
|
| 304 |
+
except ValueError:
|
| 305 |
+
# An error is always acceptable for `copy=False`
|
| 306 |
+
return
|
| 307 |
+
|
| 308 |
+
result_nocopy2 = np.array(data, copy=False)
|
| 309 |
+
# If copy=False was given and did not raise, these must share the same data
|
| 310 |
+
assert np.may_share_memory(result_nocopy1, result_nocopy2)
|
| 311 |
+
|
| 312 |
+
|
| 313 |
+
def test_take_na_value_other_decimal():
|
| 314 |
+
arr = DecimalArray([decimal.Decimal("1.0"), decimal.Decimal("2.0")])
|
| 315 |
+
result = arr.take([0, -1], allow_fill=True, fill_value=decimal.Decimal("-1.0"))
|
| 316 |
+
expected = DecimalArray([decimal.Decimal("1.0"), decimal.Decimal("-1.0")])
|
| 317 |
+
tm.assert_extension_array_equal(result, expected)
|
| 318 |
+
|
| 319 |
+
|
| 320 |
+
def test_series_constructor_coerce_data_to_extension_dtype():
|
| 321 |
+
dtype = DecimalDtype()
|
| 322 |
+
ser = pd.Series([0, 1, 2], dtype=dtype)
|
| 323 |
+
|
| 324 |
+
arr = DecimalArray(
|
| 325 |
+
[decimal.Decimal(0), decimal.Decimal(1), decimal.Decimal(2)],
|
| 326 |
+
dtype=dtype,
|
| 327 |
+
)
|
| 328 |
+
exp = pd.Series(arr)
|
| 329 |
+
tm.assert_series_equal(ser, exp)
|
| 330 |
+
|
| 331 |
+
|
| 332 |
+
def test_series_constructor_with_dtype():
|
| 333 |
+
arr = DecimalArray([decimal.Decimal("10.0")])
|
| 334 |
+
result = pd.Series(arr, dtype=DecimalDtype())
|
| 335 |
+
expected = pd.Series(arr)
|
| 336 |
+
tm.assert_series_equal(result, expected)
|
| 337 |
+
|
| 338 |
+
result = pd.Series(arr, dtype="int64")
|
| 339 |
+
expected = pd.Series([10])
|
| 340 |
+
tm.assert_series_equal(result, expected)
|
| 341 |
+
|
| 342 |
+
|
| 343 |
+
def test_dataframe_constructor_with_dtype():
|
| 344 |
+
arr = DecimalArray([decimal.Decimal("10.0")])
|
| 345 |
+
|
| 346 |
+
result = pd.DataFrame({"A": arr}, dtype=DecimalDtype())
|
| 347 |
+
expected = pd.DataFrame({"A": arr})
|
| 348 |
+
tm.assert_frame_equal(result, expected)
|
| 349 |
+
|
| 350 |
+
arr = DecimalArray([decimal.Decimal("10.0")])
|
| 351 |
+
result = pd.DataFrame({"A": arr}, dtype="int64")
|
| 352 |
+
expected = pd.DataFrame({"A": [10]})
|
| 353 |
+
tm.assert_frame_equal(result, expected)
|
| 354 |
+
|
| 355 |
+
|
| 356 |
+
@pytest.mark.parametrize("frame", [True, False])
|
| 357 |
+
def test_astype_dispatches(frame):
|
| 358 |
+
# This is a dtype-specific test that ensures Series[decimal].astype
|
| 359 |
+
# gets all the way through to ExtensionArray.astype
|
| 360 |
+
# Designing a reliable smoke test that works for arbitrary data types
|
| 361 |
+
# is difficult.
|
| 362 |
+
data = pd.Series(DecimalArray([decimal.Decimal(2)]), name="a")
|
| 363 |
+
ctx = decimal.Context()
|
| 364 |
+
ctx.prec = 5
|
| 365 |
+
|
| 366 |
+
if frame:
|
| 367 |
+
data = data.to_frame()
|
| 368 |
+
|
| 369 |
+
result = data.astype(DecimalDtype(ctx))
|
| 370 |
+
|
| 371 |
+
if frame:
|
| 372 |
+
result = result["a"]
|
| 373 |
+
|
| 374 |
+
assert result.dtype.context.prec == ctx.prec
|
| 375 |
+
|
| 376 |
+
|
| 377 |
+
class DecimalArrayWithoutFromSequence(DecimalArray):
|
| 378 |
+
"""Helper class for testing error handling in _from_sequence."""
|
| 379 |
+
|
| 380 |
+
@classmethod
|
| 381 |
+
def _from_sequence(cls, scalars, *, dtype=None, copy=False):
|
| 382 |
+
raise KeyError("For the test")
|
| 383 |
+
|
| 384 |
+
|
| 385 |
+
class DecimalArrayWithoutCoercion(DecimalArrayWithoutFromSequence):
|
| 386 |
+
@classmethod
|
| 387 |
+
def _create_arithmetic_method(cls, op):
|
| 388 |
+
return cls._create_method(op, coerce_to_dtype=False)
|
| 389 |
+
|
| 390 |
+
|
| 391 |
+
DecimalArrayWithoutCoercion._add_arithmetic_ops()
|
| 392 |
+
|
| 393 |
+
|
| 394 |
+
def test_combine_from_sequence_raises(monkeypatch):
|
| 395 |
+
# https://github.com/pandas-dev/pandas/issues/22850
|
| 396 |
+
cls = DecimalArrayWithoutFromSequence
|
| 397 |
+
|
| 398 |
+
@classmethod
|
| 399 |
+
def construct_array_type(cls):
|
| 400 |
+
return DecimalArrayWithoutFromSequence
|
| 401 |
+
|
| 402 |
+
monkeypatch.setattr(DecimalDtype, "construct_array_type", construct_array_type)
|
| 403 |
+
|
| 404 |
+
arr = cls([decimal.Decimal("1.0"), decimal.Decimal("2.0")])
|
| 405 |
+
ser = pd.Series(arr)
|
| 406 |
+
result = ser.combine(ser, operator.add)
|
| 407 |
+
|
| 408 |
+
# note: object dtype
|
| 409 |
+
expected = pd.Series(
|
| 410 |
+
[decimal.Decimal("2.0"), decimal.Decimal("4.0")], dtype="object"
|
| 411 |
+
)
|
| 412 |
+
tm.assert_series_equal(result, expected)
|
| 413 |
+
|
| 414 |
+
|
| 415 |
+
@pytest.mark.parametrize(
|
| 416 |
+
"class_", [DecimalArrayWithoutFromSequence, DecimalArrayWithoutCoercion]
|
| 417 |
+
)
|
| 418 |
+
def test_scalar_ops_from_sequence_raises(class_):
|
| 419 |
+
# op(EA, EA) should return an EA, or an ndarray if it's not possible
|
| 420 |
+
# to return an EA with the return values.
|
| 421 |
+
arr = class_([decimal.Decimal("1.0"), decimal.Decimal("2.0")])
|
| 422 |
+
result = arr + arr
|
| 423 |
+
expected = np.array(
|
| 424 |
+
[decimal.Decimal("2.0"), decimal.Decimal("4.0")], dtype="object"
|
| 425 |
+
)
|
| 426 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 427 |
+
|
| 428 |
+
|
| 429 |
+
@pytest.mark.parametrize(
|
| 430 |
+
"reverse, expected_div, expected_mod",
|
| 431 |
+
[(False, [0, 1, 1, 2], [1, 0, 1, 0]), (True, [2, 1, 0, 0], [0, 0, 2, 2])],
|
| 432 |
+
)
|
| 433 |
+
def test_divmod_array(reverse, expected_div, expected_mod):
|
| 434 |
+
# https://github.com/pandas-dev/pandas/issues/22930
|
| 435 |
+
arr = to_decimal([1, 2, 3, 4])
|
| 436 |
+
if reverse:
|
| 437 |
+
div, mod = divmod(2, arr)
|
| 438 |
+
else:
|
| 439 |
+
div, mod = divmod(arr, 2)
|
| 440 |
+
expected_div = to_decimal(expected_div)
|
| 441 |
+
expected_mod = to_decimal(expected_mod)
|
| 442 |
+
|
| 443 |
+
tm.assert_extension_array_equal(div, expected_div)
|
| 444 |
+
tm.assert_extension_array_equal(mod, expected_mod)
|
| 445 |
+
|
| 446 |
+
|
| 447 |
+
def test_ufunc_fallback(data):
|
| 448 |
+
a = data[:5]
|
| 449 |
+
s = pd.Series(a, index=range(3, 8))
|
| 450 |
+
result = np.abs(s)
|
| 451 |
+
expected = pd.Series(np.abs(a), index=range(3, 8))
|
| 452 |
+
tm.assert_series_equal(result, expected)
|
| 453 |
+
|
| 454 |
+
|
| 455 |
+
def test_array_ufunc():
|
| 456 |
+
a = to_decimal([1, 2, 3])
|
| 457 |
+
result = np.exp(a)
|
| 458 |
+
expected = to_decimal(np.exp(a._data))
|
| 459 |
+
tm.assert_extension_array_equal(result, expected)
|
| 460 |
+
|
| 461 |
+
|
| 462 |
+
def test_array_ufunc_series():
|
| 463 |
+
a = to_decimal([1, 2, 3])
|
| 464 |
+
s = pd.Series(a)
|
| 465 |
+
result = np.exp(s)
|
| 466 |
+
expected = pd.Series(to_decimal(np.exp(a._data)))
|
| 467 |
+
tm.assert_series_equal(result, expected)
|
| 468 |
+
|
| 469 |
+
|
| 470 |
+
def test_array_ufunc_series_scalar_other():
|
| 471 |
+
# check _HANDLED_TYPES
|
| 472 |
+
a = to_decimal([1, 2, 3])
|
| 473 |
+
s = pd.Series(a)
|
| 474 |
+
result = np.add(s, decimal.Decimal(1))
|
| 475 |
+
expected = pd.Series(np.add(a, decimal.Decimal(1)))
|
| 476 |
+
tm.assert_series_equal(result, expected)
|
| 477 |
+
|
| 478 |
+
|
| 479 |
+
def test_array_ufunc_series_defer():
|
| 480 |
+
a = to_decimal([1, 2, 3])
|
| 481 |
+
s = pd.Series(a)
|
| 482 |
+
|
| 483 |
+
expected = pd.Series(to_decimal([2, 4, 6]))
|
| 484 |
+
r1 = np.add(s, a)
|
| 485 |
+
r2 = np.add(a, s)
|
| 486 |
+
|
| 487 |
+
tm.assert_series_equal(r1, expected)
|
| 488 |
+
tm.assert_series_equal(r2, expected)
|
| 489 |
+
|
| 490 |
+
|
| 491 |
+
def test_groupby_agg():
|
| 492 |
+
# Ensure that the result of agg is inferred to be decimal dtype
|
| 493 |
+
# https://github.com/pandas-dev/pandas/issues/29141
|
| 494 |
+
|
| 495 |
+
data = make_data()[:5]
|
| 496 |
+
df = pd.DataFrame(
|
| 497 |
+
{"id1": [0, 0, 0, 1, 1], "id2": [0, 1, 0, 1, 1], "decimals": DecimalArray(data)}
|
| 498 |
+
)
|
| 499 |
+
|
| 500 |
+
# single key, selected column
|
| 501 |
+
expected = pd.Series(to_decimal([data[0], data[3]]))
|
| 502 |
+
result = df.groupby("id1")["decimals"].agg(lambda x: x.iloc[0])
|
| 503 |
+
tm.assert_series_equal(result, expected, check_names=False)
|
| 504 |
+
result = df["decimals"].groupby(df["id1"]).agg(lambda x: x.iloc[0])
|
| 505 |
+
tm.assert_series_equal(result, expected, check_names=False)
|
| 506 |
+
|
| 507 |
+
# multiple keys, selected column
|
| 508 |
+
expected = pd.Series(
|
| 509 |
+
to_decimal([data[0], data[1], data[3]]),
|
| 510 |
+
index=pd.MultiIndex.from_tuples([(0, 0), (0, 1), (1, 1)]),
|
| 511 |
+
)
|
| 512 |
+
result = df.groupby(["id1", "id2"])["decimals"].agg(lambda x: x.iloc[0])
|
| 513 |
+
tm.assert_series_equal(result, expected, check_names=False)
|
| 514 |
+
result = df["decimals"].groupby([df["id1"], df["id2"]]).agg(lambda x: x.iloc[0])
|
| 515 |
+
tm.assert_series_equal(result, expected, check_names=False)
|
| 516 |
+
|
| 517 |
+
# multiple columns
|
| 518 |
+
expected = pd.DataFrame({"id2": [0, 1], "decimals": to_decimal([data[0], data[3]])})
|
| 519 |
+
result = df.groupby("id1").agg(lambda x: x.iloc[0])
|
| 520 |
+
tm.assert_frame_equal(result, expected, check_names=False)
|
| 521 |
+
|
| 522 |
+
|
| 523 |
+
def test_groupby_agg_ea_method(monkeypatch):
|
| 524 |
+
# Ensure that the result of agg is inferred to be decimal dtype
|
| 525 |
+
# https://github.com/pandas-dev/pandas/issues/29141
|
| 526 |
+
|
| 527 |
+
def DecimalArray__my_sum(self):
|
| 528 |
+
return np.sum(np.array(self))
|
| 529 |
+
|
| 530 |
+
monkeypatch.setattr(DecimalArray, "my_sum", DecimalArray__my_sum, raising=False)
|
| 531 |
+
|
| 532 |
+
data = make_data()[:5]
|
| 533 |
+
df = pd.DataFrame({"id": [0, 0, 0, 1, 1], "decimals": DecimalArray(data)})
|
| 534 |
+
expected = pd.Series(to_decimal([data[0] + data[1] + data[2], data[3] + data[4]]))
|
| 535 |
+
|
| 536 |
+
result = df.groupby("id")["decimals"].agg(lambda x: x.values.my_sum())
|
| 537 |
+
tm.assert_series_equal(result, expected, check_names=False)
|
| 538 |
+
s = pd.Series(DecimalArray(data))
|
| 539 |
+
grouper = np.array([0, 0, 0, 1, 1], dtype=np.int64)
|
| 540 |
+
result = s.groupby(grouper).agg(lambda x: x.values.my_sum())
|
| 541 |
+
tm.assert_series_equal(result, expected, check_names=False)
|
| 542 |
+
|
| 543 |
+
|
| 544 |
+
def test_indexing_no_materialize(monkeypatch):
|
| 545 |
+
# See https://github.com/pandas-dev/pandas/issues/29708
|
| 546 |
+
# Ensure that indexing operations do not materialize (convert to a numpy
|
| 547 |
+
# array) the ExtensionArray unnecessary
|
| 548 |
+
|
| 549 |
+
def DecimalArray__array__(self, dtype=None):
|
| 550 |
+
raise Exception("tried to convert a DecimalArray to a numpy array")
|
| 551 |
+
|
| 552 |
+
monkeypatch.setattr(DecimalArray, "__array__", DecimalArray__array__, raising=False)
|
| 553 |
+
|
| 554 |
+
data = make_data()
|
| 555 |
+
s = pd.Series(DecimalArray(data))
|
| 556 |
+
df = pd.DataFrame({"a": s, "b": range(len(s))})
|
| 557 |
+
|
| 558 |
+
# ensure the following operations do not raise an error
|
| 559 |
+
s[s > 0.5]
|
| 560 |
+
df[s > 0.5]
|
| 561 |
+
s.at[0]
|
| 562 |
+
df.at[0, "a"]
|
| 563 |
+
|
| 564 |
+
|
| 565 |
+
def test_to_numpy_keyword():
|
| 566 |
+
# test the extra keyword
|
| 567 |
+
values = [decimal.Decimal("1.1111"), decimal.Decimal("2.2222")]
|
| 568 |
+
expected = np.array(
|
| 569 |
+
[decimal.Decimal("1.11"), decimal.Decimal("2.22")], dtype="object"
|
| 570 |
+
)
|
| 571 |
+
a = pd.array(values, dtype="decimal")
|
| 572 |
+
result = a.to_numpy(decimals=2)
|
| 573 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 574 |
+
|
| 575 |
+
result = pd.Series(a).to_numpy(decimals=2)
|
| 576 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 577 |
+
|
| 578 |
+
|
| 579 |
+
def test_array_copy_on_write(using_copy_on_write):
|
| 580 |
+
df = pd.DataFrame({"a": [decimal.Decimal(2), decimal.Decimal(3)]}, dtype="object")
|
| 581 |
+
df2 = df.astype(DecimalDtype())
|
| 582 |
+
df.iloc[0, 0] = 0
|
| 583 |
+
if using_copy_on_write:
|
| 584 |
+
expected = pd.DataFrame(
|
| 585 |
+
{"a": [decimal.Decimal(2), decimal.Decimal(3)]}, dtype=DecimalDtype()
|
| 586 |
+
)
|
| 587 |
+
tm.assert_equal(df2.values, expected.values)
|
py311/lib/python3.11/site-packages/pandas/tests/extension/list/__init__.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pandas.tests.extension.list.array import (
|
| 2 |
+
ListArray,
|
| 3 |
+
ListDtype,
|
| 4 |
+
make_data,
|
| 5 |
+
)
|
| 6 |
+
|
| 7 |
+
__all__ = ["ListArray", "ListDtype", "make_data"]
|
py311/lib/python3.11/site-packages/pandas/tests/extension/list/array.py
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Test extension array for storing nested data in a pandas container.
|
| 3 |
+
|
| 4 |
+
The ListArray stores an ndarray of lists.
|
| 5 |
+
"""
|
| 6 |
+
from __future__ import annotations
|
| 7 |
+
|
| 8 |
+
import numbers
|
| 9 |
+
import string
|
| 10 |
+
from typing import TYPE_CHECKING
|
| 11 |
+
|
| 12 |
+
import numpy as np
|
| 13 |
+
|
| 14 |
+
from pandas.core.dtypes.base import ExtensionDtype
|
| 15 |
+
|
| 16 |
+
import pandas as pd
|
| 17 |
+
from pandas.api.types import (
|
| 18 |
+
is_object_dtype,
|
| 19 |
+
is_string_dtype,
|
| 20 |
+
)
|
| 21 |
+
from pandas.core.arrays import ExtensionArray
|
| 22 |
+
|
| 23 |
+
if TYPE_CHECKING:
|
| 24 |
+
from pandas._typing import type_t
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
class ListDtype(ExtensionDtype):
|
| 28 |
+
type = list
|
| 29 |
+
name = "list"
|
| 30 |
+
na_value = np.nan
|
| 31 |
+
|
| 32 |
+
@classmethod
|
| 33 |
+
def construct_array_type(cls) -> type_t[ListArray]:
|
| 34 |
+
"""
|
| 35 |
+
Return the array type associated with this dtype.
|
| 36 |
+
|
| 37 |
+
Returns
|
| 38 |
+
-------
|
| 39 |
+
type
|
| 40 |
+
"""
|
| 41 |
+
return ListArray
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
class ListArray(ExtensionArray):
|
| 45 |
+
dtype = ListDtype()
|
| 46 |
+
__array_priority__ = 1000
|
| 47 |
+
|
| 48 |
+
def __init__(self, values, dtype=None, copy=False) -> None:
|
| 49 |
+
if not isinstance(values, np.ndarray):
|
| 50 |
+
raise TypeError("Need to pass a numpy array as values")
|
| 51 |
+
for val in values:
|
| 52 |
+
if not isinstance(val, self.dtype.type) and not pd.isna(val):
|
| 53 |
+
raise TypeError("All values must be of type " + str(self.dtype.type))
|
| 54 |
+
self.data = values
|
| 55 |
+
|
| 56 |
+
@classmethod
|
| 57 |
+
def _from_sequence(cls, scalars, *, dtype=None, copy=False):
|
| 58 |
+
data = np.empty(len(scalars), dtype=object)
|
| 59 |
+
data[:] = scalars
|
| 60 |
+
return cls(data)
|
| 61 |
+
|
| 62 |
+
def __getitem__(self, item):
|
| 63 |
+
if isinstance(item, numbers.Integral):
|
| 64 |
+
return self.data[item]
|
| 65 |
+
else:
|
| 66 |
+
# slice, list-like, mask
|
| 67 |
+
return type(self)(self.data[item])
|
| 68 |
+
|
| 69 |
+
def __len__(self) -> int:
|
| 70 |
+
return len(self.data)
|
| 71 |
+
|
| 72 |
+
def isna(self):
|
| 73 |
+
return np.array(
|
| 74 |
+
[not isinstance(x, list) and np.isnan(x) for x in self.data], dtype=bool
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
def take(self, indexer, allow_fill=False, fill_value=None):
|
| 78 |
+
# re-implement here, since NumPy has trouble setting
|
| 79 |
+
# sized objects like UserDicts into scalar slots of
|
| 80 |
+
# an ndarary.
|
| 81 |
+
indexer = np.asarray(indexer)
|
| 82 |
+
msg = (
|
| 83 |
+
"Index is out of bounds or cannot do a "
|
| 84 |
+
"non-empty take from an empty array."
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
if allow_fill:
|
| 88 |
+
if fill_value is None:
|
| 89 |
+
fill_value = self.dtype.na_value
|
| 90 |
+
# bounds check
|
| 91 |
+
if (indexer < -1).any():
|
| 92 |
+
raise ValueError
|
| 93 |
+
try:
|
| 94 |
+
output = [
|
| 95 |
+
self.data[loc] if loc != -1 else fill_value for loc in indexer
|
| 96 |
+
]
|
| 97 |
+
except IndexError as err:
|
| 98 |
+
raise IndexError(msg) from err
|
| 99 |
+
else:
|
| 100 |
+
try:
|
| 101 |
+
output = [self.data[loc] for loc in indexer]
|
| 102 |
+
except IndexError as err:
|
| 103 |
+
raise IndexError(msg) from err
|
| 104 |
+
|
| 105 |
+
return self._from_sequence(output)
|
| 106 |
+
|
| 107 |
+
def copy(self):
|
| 108 |
+
return type(self)(self.data[:])
|
| 109 |
+
|
| 110 |
+
def astype(self, dtype, copy=True):
|
| 111 |
+
if isinstance(dtype, type(self.dtype)) and dtype == self.dtype:
|
| 112 |
+
if copy:
|
| 113 |
+
return self.copy()
|
| 114 |
+
return self
|
| 115 |
+
elif is_string_dtype(dtype) and not is_object_dtype(dtype):
|
| 116 |
+
# numpy has problems with astype(str) for nested elements
|
| 117 |
+
return np.array([str(x) for x in self.data], dtype=dtype)
|
| 118 |
+
elif not copy:
|
| 119 |
+
return np.asarray(self.data, dtype=dtype)
|
| 120 |
+
else:
|
| 121 |
+
return np.array(self.data, dtype=dtype, copy=copy)
|
| 122 |
+
|
| 123 |
+
@classmethod
|
| 124 |
+
def _concat_same_type(cls, to_concat):
|
| 125 |
+
data = np.concatenate([x.data for x in to_concat])
|
| 126 |
+
return cls(data)
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
def make_data():
|
| 130 |
+
# TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer
|
| 131 |
+
rng = np.random.default_rng(2)
|
| 132 |
+
data = np.empty(100, dtype=object)
|
| 133 |
+
data[:] = [
|
| 134 |
+
[rng.choice(list(string.ascii_letters)) for _ in range(rng.integers(0, 10))]
|
| 135 |
+
for _ in range(100)
|
| 136 |
+
]
|
| 137 |
+
return data
|
py311/lib/python3.11/site-packages/pandas/tests/extension/list/test_list.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
|
| 3 |
+
import pandas as pd
|
| 4 |
+
from pandas.tests.extension.list.array import (
|
| 5 |
+
ListArray,
|
| 6 |
+
ListDtype,
|
| 7 |
+
make_data,
|
| 8 |
+
)
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
@pytest.fixture
|
| 12 |
+
def dtype():
|
| 13 |
+
return ListDtype()
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
@pytest.fixture
|
| 17 |
+
def data():
|
| 18 |
+
"""Length-100 ListArray for semantics test."""
|
| 19 |
+
data = make_data()
|
| 20 |
+
|
| 21 |
+
while len(data[0]) == len(data[1]):
|
| 22 |
+
data = make_data()
|
| 23 |
+
|
| 24 |
+
return ListArray(data)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def test_to_csv(data):
|
| 28 |
+
# https://github.com/pandas-dev/pandas/issues/28840
|
| 29 |
+
# array with list-likes fail when doing astype(str) on the numpy array
|
| 30 |
+
# which was done in get_values_for_csv
|
| 31 |
+
df = pd.DataFrame({"a": data})
|
| 32 |
+
res = df.to_csv()
|
| 33 |
+
assert str(data[0]) in res
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/__init__.py
ADDED
|
File without changes
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_append.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
|
| 3 |
+
from pandas import (
|
| 4 |
+
CategoricalIndex,
|
| 5 |
+
Index,
|
| 6 |
+
)
|
| 7 |
+
import pandas._testing as tm
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class TestAppend:
|
| 11 |
+
@pytest.fixture
|
| 12 |
+
def ci(self):
|
| 13 |
+
categories = list("cab")
|
| 14 |
+
return CategoricalIndex(list("aabbca"), categories=categories, ordered=False)
|
| 15 |
+
|
| 16 |
+
def test_append(self, ci):
|
| 17 |
+
# append cats with the same categories
|
| 18 |
+
result = ci[:3].append(ci[3:])
|
| 19 |
+
tm.assert_index_equal(result, ci, exact=True)
|
| 20 |
+
|
| 21 |
+
foos = [ci[:1], ci[1:3], ci[3:]]
|
| 22 |
+
result = foos[0].append(foos[1:])
|
| 23 |
+
tm.assert_index_equal(result, ci, exact=True)
|
| 24 |
+
|
| 25 |
+
def test_append_empty(self, ci):
|
| 26 |
+
# empty
|
| 27 |
+
result = ci.append([])
|
| 28 |
+
tm.assert_index_equal(result, ci, exact=True)
|
| 29 |
+
|
| 30 |
+
def test_append_mismatched_categories(self, ci):
|
| 31 |
+
# appending with different categories or reordered is not ok
|
| 32 |
+
msg = "all inputs must be Index"
|
| 33 |
+
with pytest.raises(TypeError, match=msg):
|
| 34 |
+
ci.append(ci.values.set_categories(list("abcd")))
|
| 35 |
+
with pytest.raises(TypeError, match=msg):
|
| 36 |
+
ci.append(ci.values.reorder_categories(list("abc")))
|
| 37 |
+
|
| 38 |
+
def test_append_category_objects(self, ci):
|
| 39 |
+
# with objects
|
| 40 |
+
result = ci.append(Index(["c", "a"]))
|
| 41 |
+
expected = CategoricalIndex(list("aabbcaca"), categories=ci.categories)
|
| 42 |
+
tm.assert_index_equal(result, expected, exact=True)
|
| 43 |
+
|
| 44 |
+
def test_append_non_categories(self, ci):
|
| 45 |
+
# invalid objects -> cast to object via concat_compat
|
| 46 |
+
result = ci.append(Index(["a", "d"]))
|
| 47 |
+
expected = Index(["a", "a", "b", "b", "c", "a", "a", "d"])
|
| 48 |
+
tm.assert_index_equal(result, expected, exact=True)
|
| 49 |
+
|
| 50 |
+
def test_append_object(self, ci):
|
| 51 |
+
# GH#14298 - if base object is not categorical -> coerce to object
|
| 52 |
+
result = Index(["c", "a"]).append(ci)
|
| 53 |
+
expected = Index(list("caaabbca"))
|
| 54 |
+
tm.assert_index_equal(result, expected, exact=True)
|
| 55 |
+
|
| 56 |
+
def test_append_to_another(self):
|
| 57 |
+
# hits Index._concat
|
| 58 |
+
fst = Index(["a", "b"])
|
| 59 |
+
snd = CategoricalIndex(["d", "e"])
|
| 60 |
+
result = fst.append(snd)
|
| 61 |
+
expected = Index(["a", "b", "d", "e"])
|
| 62 |
+
tm.assert_index_equal(result, expected)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_category.py
ADDED
|
@@ -0,0 +1,391 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
from pandas._libs import index as libindex
|
| 5 |
+
from pandas._libs.arrays import NDArrayBacked
|
| 6 |
+
|
| 7 |
+
import pandas as pd
|
| 8 |
+
from pandas import (
|
| 9 |
+
Categorical,
|
| 10 |
+
CategoricalDtype,
|
| 11 |
+
)
|
| 12 |
+
import pandas._testing as tm
|
| 13 |
+
from pandas.core.indexes.api import (
|
| 14 |
+
CategoricalIndex,
|
| 15 |
+
Index,
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class TestCategoricalIndex:
|
| 20 |
+
@pytest.fixture
|
| 21 |
+
def simple_index(self) -> CategoricalIndex:
|
| 22 |
+
return CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
|
| 23 |
+
|
| 24 |
+
def test_can_hold_identifiers(self):
|
| 25 |
+
idx = CategoricalIndex(list("aabbca"), categories=None, ordered=False)
|
| 26 |
+
key = idx[0]
|
| 27 |
+
assert idx._can_hold_identifiers_and_holds_name(key) is True
|
| 28 |
+
|
| 29 |
+
def test_insert(self, simple_index):
|
| 30 |
+
ci = simple_index
|
| 31 |
+
categories = ci.categories
|
| 32 |
+
|
| 33 |
+
# test 0th element
|
| 34 |
+
result = ci.insert(0, "a")
|
| 35 |
+
expected = CategoricalIndex(list("aaabbca"), categories=categories)
|
| 36 |
+
tm.assert_index_equal(result, expected, exact=True)
|
| 37 |
+
|
| 38 |
+
# test Nth element that follows Python list behavior
|
| 39 |
+
result = ci.insert(-1, "a")
|
| 40 |
+
expected = CategoricalIndex(list("aabbcaa"), categories=categories)
|
| 41 |
+
tm.assert_index_equal(result, expected, exact=True)
|
| 42 |
+
|
| 43 |
+
# test empty
|
| 44 |
+
result = CategoricalIndex([], categories=categories).insert(0, "a")
|
| 45 |
+
expected = CategoricalIndex(["a"], categories=categories)
|
| 46 |
+
tm.assert_index_equal(result, expected, exact=True)
|
| 47 |
+
|
| 48 |
+
# invalid -> cast to object
|
| 49 |
+
expected = ci.astype(object).insert(0, "d")
|
| 50 |
+
result = ci.insert(0, "d").astype(object)
|
| 51 |
+
tm.assert_index_equal(result, expected, exact=True)
|
| 52 |
+
|
| 53 |
+
# GH 18295 (test missing)
|
| 54 |
+
expected = CategoricalIndex(["a", np.nan, "a", "b", "c", "b"])
|
| 55 |
+
for na in (np.nan, pd.NaT, None):
|
| 56 |
+
result = CategoricalIndex(list("aabcb")).insert(1, na)
|
| 57 |
+
tm.assert_index_equal(result, expected)
|
| 58 |
+
|
| 59 |
+
def test_insert_na_mismatched_dtype(self):
|
| 60 |
+
ci = CategoricalIndex([0, 1, 1])
|
| 61 |
+
result = ci.insert(0, pd.NaT)
|
| 62 |
+
expected = Index([pd.NaT, 0, 1, 1], dtype=object)
|
| 63 |
+
tm.assert_index_equal(result, expected)
|
| 64 |
+
|
| 65 |
+
def test_delete(self, simple_index):
|
| 66 |
+
ci = simple_index
|
| 67 |
+
categories = ci.categories
|
| 68 |
+
|
| 69 |
+
result = ci.delete(0)
|
| 70 |
+
expected = CategoricalIndex(list("abbca"), categories=categories)
|
| 71 |
+
tm.assert_index_equal(result, expected, exact=True)
|
| 72 |
+
|
| 73 |
+
result = ci.delete(-1)
|
| 74 |
+
expected = CategoricalIndex(list("aabbc"), categories=categories)
|
| 75 |
+
tm.assert_index_equal(result, expected, exact=True)
|
| 76 |
+
|
| 77 |
+
with tm.external_error_raised((IndexError, ValueError)):
|
| 78 |
+
# Either depending on NumPy version
|
| 79 |
+
ci.delete(10)
|
| 80 |
+
|
| 81 |
+
@pytest.mark.parametrize(
|
| 82 |
+
"data, non_lexsorted_data",
|
| 83 |
+
[[[1, 2, 3], [9, 0, 1, 2, 3]], [list("abc"), list("fabcd")]],
|
| 84 |
+
)
|
| 85 |
+
def test_is_monotonic(self, data, non_lexsorted_data):
|
| 86 |
+
c = CategoricalIndex(data)
|
| 87 |
+
assert c.is_monotonic_increasing is True
|
| 88 |
+
assert c.is_monotonic_decreasing is False
|
| 89 |
+
|
| 90 |
+
c = CategoricalIndex(data, ordered=True)
|
| 91 |
+
assert c.is_monotonic_increasing is True
|
| 92 |
+
assert c.is_monotonic_decreasing is False
|
| 93 |
+
|
| 94 |
+
c = CategoricalIndex(data, categories=reversed(data))
|
| 95 |
+
assert c.is_monotonic_increasing is False
|
| 96 |
+
assert c.is_monotonic_decreasing is True
|
| 97 |
+
|
| 98 |
+
c = CategoricalIndex(data, categories=reversed(data), ordered=True)
|
| 99 |
+
assert c.is_monotonic_increasing is False
|
| 100 |
+
assert c.is_monotonic_decreasing is True
|
| 101 |
+
|
| 102 |
+
# test when data is neither monotonic increasing nor decreasing
|
| 103 |
+
reordered_data = [data[0], data[2], data[1]]
|
| 104 |
+
c = CategoricalIndex(reordered_data, categories=reversed(data))
|
| 105 |
+
assert c.is_monotonic_increasing is False
|
| 106 |
+
assert c.is_monotonic_decreasing is False
|
| 107 |
+
|
| 108 |
+
# non lexsorted categories
|
| 109 |
+
categories = non_lexsorted_data
|
| 110 |
+
|
| 111 |
+
c = CategoricalIndex(categories[:2], categories=categories)
|
| 112 |
+
assert c.is_monotonic_increasing is True
|
| 113 |
+
assert c.is_monotonic_decreasing is False
|
| 114 |
+
|
| 115 |
+
c = CategoricalIndex(categories[1:3], categories=categories)
|
| 116 |
+
assert c.is_monotonic_increasing is True
|
| 117 |
+
assert c.is_monotonic_decreasing is False
|
| 118 |
+
|
| 119 |
+
def test_has_duplicates(self):
|
| 120 |
+
idx = CategoricalIndex([0, 0, 0], name="foo")
|
| 121 |
+
assert idx.is_unique is False
|
| 122 |
+
assert idx.has_duplicates is True
|
| 123 |
+
|
| 124 |
+
idx = CategoricalIndex([0, 1], categories=[2, 3], name="foo")
|
| 125 |
+
assert idx.is_unique is False
|
| 126 |
+
assert idx.has_duplicates is True
|
| 127 |
+
|
| 128 |
+
idx = CategoricalIndex([0, 1, 2, 3], categories=[1, 2, 3], name="foo")
|
| 129 |
+
assert idx.is_unique is True
|
| 130 |
+
assert idx.has_duplicates is False
|
| 131 |
+
|
| 132 |
+
@pytest.mark.parametrize(
|
| 133 |
+
"data, categories, expected",
|
| 134 |
+
[
|
| 135 |
+
(
|
| 136 |
+
[1, 1, 1],
|
| 137 |
+
[1, 2, 3],
|
| 138 |
+
{
|
| 139 |
+
"first": np.array([False, True, True]),
|
| 140 |
+
"last": np.array([True, True, False]),
|
| 141 |
+
False: np.array([True, True, True]),
|
| 142 |
+
},
|
| 143 |
+
),
|
| 144 |
+
(
|
| 145 |
+
[1, 1, 1],
|
| 146 |
+
list("abc"),
|
| 147 |
+
{
|
| 148 |
+
"first": np.array([False, True, True]),
|
| 149 |
+
"last": np.array([True, True, False]),
|
| 150 |
+
False: np.array([True, True, True]),
|
| 151 |
+
},
|
| 152 |
+
),
|
| 153 |
+
(
|
| 154 |
+
[2, "a", "b"],
|
| 155 |
+
list("abc"),
|
| 156 |
+
{
|
| 157 |
+
"first": np.zeros(shape=(3), dtype=np.bool_),
|
| 158 |
+
"last": np.zeros(shape=(3), dtype=np.bool_),
|
| 159 |
+
False: np.zeros(shape=(3), dtype=np.bool_),
|
| 160 |
+
},
|
| 161 |
+
),
|
| 162 |
+
(
|
| 163 |
+
list("abb"),
|
| 164 |
+
list("abc"),
|
| 165 |
+
{
|
| 166 |
+
"first": np.array([False, False, True]),
|
| 167 |
+
"last": np.array([False, True, False]),
|
| 168 |
+
False: np.array([False, True, True]),
|
| 169 |
+
},
|
| 170 |
+
),
|
| 171 |
+
],
|
| 172 |
+
)
|
| 173 |
+
def test_drop_duplicates(self, data, categories, expected):
|
| 174 |
+
idx = CategoricalIndex(data, categories=categories, name="foo")
|
| 175 |
+
for keep, e in expected.items():
|
| 176 |
+
tm.assert_numpy_array_equal(idx.duplicated(keep=keep), e)
|
| 177 |
+
e = idx[~e]
|
| 178 |
+
result = idx.drop_duplicates(keep=keep)
|
| 179 |
+
tm.assert_index_equal(result, e)
|
| 180 |
+
|
| 181 |
+
@pytest.mark.parametrize(
|
| 182 |
+
"data, categories, expected_data",
|
| 183 |
+
[
|
| 184 |
+
([1, 1, 1], [1, 2, 3], [1]),
|
| 185 |
+
([1, 1, 1], list("abc"), [np.nan]),
|
| 186 |
+
([1, 2, "a"], [1, 2, 3], [1, 2, np.nan]),
|
| 187 |
+
([2, "a", "b"], list("abc"), [np.nan, "a", "b"]),
|
| 188 |
+
],
|
| 189 |
+
)
|
| 190 |
+
def test_unique(self, data, categories, expected_data, ordered):
|
| 191 |
+
dtype = CategoricalDtype(categories, ordered=ordered)
|
| 192 |
+
|
| 193 |
+
idx = CategoricalIndex(data, dtype=dtype)
|
| 194 |
+
expected = CategoricalIndex(expected_data, dtype=dtype)
|
| 195 |
+
tm.assert_index_equal(idx.unique(), expected)
|
| 196 |
+
|
| 197 |
+
def test_repr_roundtrip(self):
|
| 198 |
+
ci = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True)
|
| 199 |
+
str(ci)
|
| 200 |
+
tm.assert_index_equal(eval(repr(ci)), ci, exact=True)
|
| 201 |
+
|
| 202 |
+
# formatting
|
| 203 |
+
str(ci)
|
| 204 |
+
|
| 205 |
+
# long format
|
| 206 |
+
# this is not reprable
|
| 207 |
+
ci = CategoricalIndex(np.random.default_rng(2).integers(0, 5, size=100))
|
| 208 |
+
str(ci)
|
| 209 |
+
|
| 210 |
+
def test_isin(self):
|
| 211 |
+
ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"])
|
| 212 |
+
tm.assert_numpy_array_equal(
|
| 213 |
+
ci.isin(["c"]), np.array([False, False, False, True, False, False])
|
| 214 |
+
)
|
| 215 |
+
tm.assert_numpy_array_equal(
|
| 216 |
+
ci.isin(["c", "a", "b"]), np.array([True] * 5 + [False])
|
| 217 |
+
)
|
| 218 |
+
tm.assert_numpy_array_equal(
|
| 219 |
+
ci.isin(["c", "a", "b", np.nan]), np.array([True] * 6)
|
| 220 |
+
)
|
| 221 |
+
|
| 222 |
+
# mismatched categorical -> coerced to ndarray so doesn't matter
|
| 223 |
+
result = ci.isin(ci.set_categories(list("abcdefghi")))
|
| 224 |
+
expected = np.array([True] * 6)
|
| 225 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 226 |
+
|
| 227 |
+
result = ci.isin(ci.set_categories(list("defghi")))
|
| 228 |
+
expected = np.array([False] * 5 + [True])
|
| 229 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 230 |
+
|
| 231 |
+
def test_isin_overlapping_intervals(self):
|
| 232 |
+
# GH 34974
|
| 233 |
+
idx = pd.IntervalIndex([pd.Interval(0, 2), pd.Interval(0, 1)])
|
| 234 |
+
result = CategoricalIndex(idx).isin(idx)
|
| 235 |
+
expected = np.array([True, True])
|
| 236 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 237 |
+
|
| 238 |
+
def test_identical(self):
|
| 239 |
+
ci1 = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True)
|
| 240 |
+
ci2 = CategoricalIndex(["a", "b"], categories=["a", "b", "c"], ordered=True)
|
| 241 |
+
assert ci1.identical(ci1)
|
| 242 |
+
assert ci1.identical(ci1.copy())
|
| 243 |
+
assert not ci1.identical(ci2)
|
| 244 |
+
|
| 245 |
+
def test_ensure_copied_data(self):
|
| 246 |
+
# gh-12309: Check the "copy" argument of each
|
| 247 |
+
# Index.__new__ is honored.
|
| 248 |
+
#
|
| 249 |
+
# Must be tested separately from other indexes because
|
| 250 |
+
# self.values is not an ndarray.
|
| 251 |
+
index = CategoricalIndex(list("ab") * 5)
|
| 252 |
+
|
| 253 |
+
result = CategoricalIndex(index.values, copy=True)
|
| 254 |
+
tm.assert_index_equal(index, result)
|
| 255 |
+
assert not np.shares_memory(result._data._codes, index._data._codes)
|
| 256 |
+
|
| 257 |
+
result = CategoricalIndex(index.values, copy=False)
|
| 258 |
+
assert result._data._codes is index._data._codes
|
| 259 |
+
|
| 260 |
+
|
| 261 |
+
class TestCategoricalIndex2:
|
| 262 |
+
def test_view_i8(self):
|
| 263 |
+
# GH#25464
|
| 264 |
+
ci = CategoricalIndex(list("ab") * 50)
|
| 265 |
+
msg = "When changing to a larger dtype, its size must be a divisor"
|
| 266 |
+
with pytest.raises(ValueError, match=msg):
|
| 267 |
+
ci.view("i8")
|
| 268 |
+
with pytest.raises(ValueError, match=msg):
|
| 269 |
+
ci._data.view("i8")
|
| 270 |
+
|
| 271 |
+
ci = ci[:-4] # length divisible by 8
|
| 272 |
+
|
| 273 |
+
res = ci.view("i8")
|
| 274 |
+
expected = ci._data.codes.view("i8")
|
| 275 |
+
tm.assert_numpy_array_equal(res, expected)
|
| 276 |
+
|
| 277 |
+
cat = ci._data
|
| 278 |
+
tm.assert_numpy_array_equal(cat.view("i8"), expected)
|
| 279 |
+
|
| 280 |
+
@pytest.mark.parametrize(
|
| 281 |
+
"dtype, engine_type",
|
| 282 |
+
[
|
| 283 |
+
(np.int8, libindex.Int8Engine),
|
| 284 |
+
(np.int16, libindex.Int16Engine),
|
| 285 |
+
(np.int32, libindex.Int32Engine),
|
| 286 |
+
(np.int64, libindex.Int64Engine),
|
| 287 |
+
],
|
| 288 |
+
)
|
| 289 |
+
def test_engine_type(self, dtype, engine_type):
|
| 290 |
+
if dtype != np.int64:
|
| 291 |
+
# num. of uniques required to push CategoricalIndex.codes to a
|
| 292 |
+
# dtype (128 categories required for .codes dtype to be int16 etc.)
|
| 293 |
+
num_uniques = {np.int8: 1, np.int16: 128, np.int32: 32768}[dtype]
|
| 294 |
+
ci = CategoricalIndex(range(num_uniques))
|
| 295 |
+
else:
|
| 296 |
+
# having 2**32 - 2**31 categories would be very memory-intensive,
|
| 297 |
+
# so we cheat a bit with the dtype
|
| 298 |
+
ci = CategoricalIndex(range(32768)) # == 2**16 - 2**(16 - 1)
|
| 299 |
+
arr = ci.values._ndarray.astype("int64")
|
| 300 |
+
NDArrayBacked.__init__(ci._data, arr, ci.dtype)
|
| 301 |
+
assert np.issubdtype(ci.codes.dtype, dtype)
|
| 302 |
+
assert isinstance(ci._engine, engine_type)
|
| 303 |
+
|
| 304 |
+
@pytest.mark.parametrize(
|
| 305 |
+
"func,op_name",
|
| 306 |
+
[
|
| 307 |
+
(lambda idx: idx - idx, "__sub__"),
|
| 308 |
+
(lambda idx: idx + idx, "__add__"),
|
| 309 |
+
(lambda idx: idx - ["a", "b"], "__sub__"),
|
| 310 |
+
(lambda idx: idx + ["a", "b"], "__add__"),
|
| 311 |
+
(lambda idx: ["a", "b"] - idx, "__rsub__"),
|
| 312 |
+
(lambda idx: ["a", "b"] + idx, "__radd__"),
|
| 313 |
+
],
|
| 314 |
+
)
|
| 315 |
+
def test_disallow_addsub_ops(self, func, op_name):
|
| 316 |
+
# GH 10039
|
| 317 |
+
# set ops (+/-) raise TypeError
|
| 318 |
+
idx = Index(Categorical(["a", "b"]))
|
| 319 |
+
cat_or_list = "'(Categorical|list)' and '(Categorical|list)'"
|
| 320 |
+
msg = "|".join(
|
| 321 |
+
[
|
| 322 |
+
f"cannot perform {op_name} with this index type: CategoricalIndex",
|
| 323 |
+
"can only concatenate list",
|
| 324 |
+
rf"unsupported operand type\(s\) for [\+-]: {cat_or_list}",
|
| 325 |
+
]
|
| 326 |
+
)
|
| 327 |
+
with pytest.raises(TypeError, match=msg):
|
| 328 |
+
func(idx)
|
| 329 |
+
|
| 330 |
+
def test_method_delegation(self):
|
| 331 |
+
ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"))
|
| 332 |
+
result = ci.set_categories(list("cab"))
|
| 333 |
+
tm.assert_index_equal(
|
| 334 |
+
result, CategoricalIndex(list("aabbca"), categories=list("cab"))
|
| 335 |
+
)
|
| 336 |
+
|
| 337 |
+
ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
|
| 338 |
+
result = ci.rename_categories(list("efg"))
|
| 339 |
+
tm.assert_index_equal(
|
| 340 |
+
result, CategoricalIndex(list("ffggef"), categories=list("efg"))
|
| 341 |
+
)
|
| 342 |
+
|
| 343 |
+
# GH18862 (let rename_categories take callables)
|
| 344 |
+
result = ci.rename_categories(lambda x: x.upper())
|
| 345 |
+
tm.assert_index_equal(
|
| 346 |
+
result, CategoricalIndex(list("AABBCA"), categories=list("CAB"))
|
| 347 |
+
)
|
| 348 |
+
|
| 349 |
+
ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
|
| 350 |
+
result = ci.add_categories(["d"])
|
| 351 |
+
tm.assert_index_equal(
|
| 352 |
+
result, CategoricalIndex(list("aabbca"), categories=list("cabd"))
|
| 353 |
+
)
|
| 354 |
+
|
| 355 |
+
ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
|
| 356 |
+
result = ci.remove_categories(["c"])
|
| 357 |
+
tm.assert_index_equal(
|
| 358 |
+
result,
|
| 359 |
+
CategoricalIndex(list("aabb") + [np.nan] + ["a"], categories=list("ab")),
|
| 360 |
+
)
|
| 361 |
+
|
| 362 |
+
ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"))
|
| 363 |
+
result = ci.as_unordered()
|
| 364 |
+
tm.assert_index_equal(result, ci)
|
| 365 |
+
|
| 366 |
+
ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"))
|
| 367 |
+
result = ci.as_ordered()
|
| 368 |
+
tm.assert_index_equal(
|
| 369 |
+
result,
|
| 370 |
+
CategoricalIndex(list("aabbca"), categories=list("cabdef"), ordered=True),
|
| 371 |
+
)
|
| 372 |
+
|
| 373 |
+
# invalid
|
| 374 |
+
msg = "cannot use inplace with CategoricalIndex"
|
| 375 |
+
with pytest.raises(ValueError, match=msg):
|
| 376 |
+
ci.set_categories(list("cab"), inplace=True)
|
| 377 |
+
|
| 378 |
+
def test_remove_maintains_order(self):
|
| 379 |
+
ci = CategoricalIndex(list("abcdda"), categories=list("abcd"))
|
| 380 |
+
result = ci.reorder_categories(["d", "c", "b", "a"], ordered=True)
|
| 381 |
+
tm.assert_index_equal(
|
| 382 |
+
result,
|
| 383 |
+
CategoricalIndex(list("abcdda"), categories=list("dcba"), ordered=True),
|
| 384 |
+
)
|
| 385 |
+
result = result.remove_categories(["c"])
|
| 386 |
+
tm.assert_index_equal(
|
| 387 |
+
result,
|
| 388 |
+
CategoricalIndex(
|
| 389 |
+
["a", "b", np.nan, "d", "d", "a"], categories=list("dba"), ordered=True
|
| 390 |
+
),
|
| 391 |
+
)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_constructors.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
from pandas import (
|
| 5 |
+
Categorical,
|
| 6 |
+
CategoricalDtype,
|
| 7 |
+
CategoricalIndex,
|
| 8 |
+
Index,
|
| 9 |
+
)
|
| 10 |
+
import pandas._testing as tm
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class TestCategoricalIndexConstructors:
|
| 14 |
+
def test_construction_disallows_scalar(self):
|
| 15 |
+
msg = "must be called with a collection of some kind"
|
| 16 |
+
with pytest.raises(TypeError, match=msg):
|
| 17 |
+
CategoricalIndex(data=1, categories=list("abcd"), ordered=False)
|
| 18 |
+
with pytest.raises(TypeError, match=msg):
|
| 19 |
+
CategoricalIndex(categories=list("abcd"), ordered=False)
|
| 20 |
+
|
| 21 |
+
def test_construction(self):
|
| 22 |
+
ci = CategoricalIndex(list("aabbca"), categories=list("abcd"), ordered=False)
|
| 23 |
+
categories = ci.categories
|
| 24 |
+
|
| 25 |
+
result = Index(ci)
|
| 26 |
+
tm.assert_index_equal(result, ci, exact=True)
|
| 27 |
+
assert not result.ordered
|
| 28 |
+
|
| 29 |
+
result = Index(ci.values)
|
| 30 |
+
tm.assert_index_equal(result, ci, exact=True)
|
| 31 |
+
assert not result.ordered
|
| 32 |
+
|
| 33 |
+
# empty
|
| 34 |
+
result = CategoricalIndex([], categories=categories)
|
| 35 |
+
tm.assert_index_equal(result.categories, Index(categories))
|
| 36 |
+
tm.assert_numpy_array_equal(result.codes, np.array([], dtype="int8"))
|
| 37 |
+
assert not result.ordered
|
| 38 |
+
|
| 39 |
+
# passing categories
|
| 40 |
+
result = CategoricalIndex(list("aabbca"), categories=categories)
|
| 41 |
+
tm.assert_index_equal(result.categories, Index(categories))
|
| 42 |
+
tm.assert_numpy_array_equal(
|
| 43 |
+
result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
c = Categorical(list("aabbca"))
|
| 47 |
+
result = CategoricalIndex(c)
|
| 48 |
+
tm.assert_index_equal(result.categories, Index(list("abc")))
|
| 49 |
+
tm.assert_numpy_array_equal(
|
| 50 |
+
result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
|
| 51 |
+
)
|
| 52 |
+
assert not result.ordered
|
| 53 |
+
|
| 54 |
+
result = CategoricalIndex(c, categories=categories)
|
| 55 |
+
tm.assert_index_equal(result.categories, Index(categories))
|
| 56 |
+
tm.assert_numpy_array_equal(
|
| 57 |
+
result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
|
| 58 |
+
)
|
| 59 |
+
assert not result.ordered
|
| 60 |
+
|
| 61 |
+
ci = CategoricalIndex(c, categories=list("abcd"))
|
| 62 |
+
result = CategoricalIndex(ci)
|
| 63 |
+
tm.assert_index_equal(result.categories, Index(categories))
|
| 64 |
+
tm.assert_numpy_array_equal(
|
| 65 |
+
result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
|
| 66 |
+
)
|
| 67 |
+
assert not result.ordered
|
| 68 |
+
|
| 69 |
+
result = CategoricalIndex(ci, categories=list("ab"))
|
| 70 |
+
tm.assert_index_equal(result.categories, Index(list("ab")))
|
| 71 |
+
tm.assert_numpy_array_equal(
|
| 72 |
+
result.codes, np.array([0, 0, 1, 1, -1, 0], dtype="int8")
|
| 73 |
+
)
|
| 74 |
+
assert not result.ordered
|
| 75 |
+
|
| 76 |
+
result = CategoricalIndex(ci, categories=list("ab"), ordered=True)
|
| 77 |
+
tm.assert_index_equal(result.categories, Index(list("ab")))
|
| 78 |
+
tm.assert_numpy_array_equal(
|
| 79 |
+
result.codes, np.array([0, 0, 1, 1, -1, 0], dtype="int8")
|
| 80 |
+
)
|
| 81 |
+
assert result.ordered
|
| 82 |
+
|
| 83 |
+
result = CategoricalIndex(ci, categories=list("ab"), ordered=True)
|
| 84 |
+
expected = CategoricalIndex(
|
| 85 |
+
ci, categories=list("ab"), ordered=True, dtype="category"
|
| 86 |
+
)
|
| 87 |
+
tm.assert_index_equal(result, expected, exact=True)
|
| 88 |
+
|
| 89 |
+
# turn me to an Index
|
| 90 |
+
result = Index(np.array(ci))
|
| 91 |
+
assert isinstance(result, Index)
|
| 92 |
+
assert not isinstance(result, CategoricalIndex)
|
| 93 |
+
|
| 94 |
+
def test_construction_with_dtype(self):
|
| 95 |
+
# specify dtype
|
| 96 |
+
ci = CategoricalIndex(list("aabbca"), categories=list("abc"), ordered=False)
|
| 97 |
+
|
| 98 |
+
result = Index(np.array(ci), dtype="category")
|
| 99 |
+
tm.assert_index_equal(result, ci, exact=True)
|
| 100 |
+
|
| 101 |
+
result = Index(np.array(ci).tolist(), dtype="category")
|
| 102 |
+
tm.assert_index_equal(result, ci, exact=True)
|
| 103 |
+
|
| 104 |
+
# these are generally only equal when the categories are reordered
|
| 105 |
+
ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
|
| 106 |
+
|
| 107 |
+
result = Index(np.array(ci), dtype="category").reorder_categories(ci.categories)
|
| 108 |
+
tm.assert_index_equal(result, ci, exact=True)
|
| 109 |
+
|
| 110 |
+
# make sure indexes are handled
|
| 111 |
+
idx = Index(range(3))
|
| 112 |
+
expected = CategoricalIndex([0, 1, 2], categories=idx, ordered=True)
|
| 113 |
+
result = CategoricalIndex(idx, categories=idx, ordered=True)
|
| 114 |
+
tm.assert_index_equal(result, expected, exact=True)
|
| 115 |
+
|
| 116 |
+
def test_construction_empty_with_bool_categories(self):
|
| 117 |
+
# see GH#22702
|
| 118 |
+
cat = CategoricalIndex([], categories=[True, False])
|
| 119 |
+
categories = sorted(cat.categories.tolist())
|
| 120 |
+
assert categories == [False, True]
|
| 121 |
+
|
| 122 |
+
def test_construction_with_categorical_dtype(self):
|
| 123 |
+
# construction with CategoricalDtype
|
| 124 |
+
# GH#18109
|
| 125 |
+
data, cats, ordered = "a a b b".split(), "c b a".split(), True
|
| 126 |
+
dtype = CategoricalDtype(categories=cats, ordered=ordered)
|
| 127 |
+
|
| 128 |
+
result = CategoricalIndex(data, dtype=dtype)
|
| 129 |
+
expected = CategoricalIndex(data, categories=cats, ordered=ordered)
|
| 130 |
+
tm.assert_index_equal(result, expected, exact=True)
|
| 131 |
+
|
| 132 |
+
# GH#19032
|
| 133 |
+
result = Index(data, dtype=dtype)
|
| 134 |
+
tm.assert_index_equal(result, expected, exact=True)
|
| 135 |
+
|
| 136 |
+
# error when combining categories/ordered and dtype kwargs
|
| 137 |
+
msg = "Cannot specify `categories` or `ordered` together with `dtype`."
|
| 138 |
+
with pytest.raises(ValueError, match=msg):
|
| 139 |
+
CategoricalIndex(data, categories=cats, dtype=dtype)
|
| 140 |
+
|
| 141 |
+
with pytest.raises(ValueError, match=msg):
|
| 142 |
+
CategoricalIndex(data, ordered=ordered, dtype=dtype)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_equals.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
from pandas import (
|
| 5 |
+
Categorical,
|
| 6 |
+
CategoricalIndex,
|
| 7 |
+
Index,
|
| 8 |
+
MultiIndex,
|
| 9 |
+
)
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class TestEquals:
|
| 13 |
+
def test_equals_categorical(self):
|
| 14 |
+
ci1 = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True)
|
| 15 |
+
ci2 = CategoricalIndex(["a", "b"], categories=["a", "b", "c"], ordered=True)
|
| 16 |
+
|
| 17 |
+
assert ci1.equals(ci1)
|
| 18 |
+
assert not ci1.equals(ci2)
|
| 19 |
+
assert ci1.equals(ci1.astype(object))
|
| 20 |
+
assert ci1.astype(object).equals(ci1)
|
| 21 |
+
|
| 22 |
+
assert (ci1 == ci1).all()
|
| 23 |
+
assert not (ci1 != ci1).all()
|
| 24 |
+
assert not (ci1 > ci1).all()
|
| 25 |
+
assert not (ci1 < ci1).all()
|
| 26 |
+
assert (ci1 <= ci1).all()
|
| 27 |
+
assert (ci1 >= ci1).all()
|
| 28 |
+
|
| 29 |
+
assert not (ci1 == 1).all()
|
| 30 |
+
assert (ci1 == Index(["a", "b"])).all()
|
| 31 |
+
assert (ci1 == ci1.values).all()
|
| 32 |
+
|
| 33 |
+
# invalid comparisons
|
| 34 |
+
with pytest.raises(ValueError, match="Lengths must match"):
|
| 35 |
+
ci1 == Index(["a", "b", "c"])
|
| 36 |
+
|
| 37 |
+
msg = "Categoricals can only be compared if 'categories' are the same"
|
| 38 |
+
with pytest.raises(TypeError, match=msg):
|
| 39 |
+
ci1 == ci2
|
| 40 |
+
with pytest.raises(TypeError, match=msg):
|
| 41 |
+
ci1 == Categorical(ci1.values, ordered=False)
|
| 42 |
+
with pytest.raises(TypeError, match=msg):
|
| 43 |
+
ci1 == Categorical(ci1.values, categories=list("abc"))
|
| 44 |
+
|
| 45 |
+
# tests
|
| 46 |
+
# make sure that we are testing for category inclusion properly
|
| 47 |
+
ci = CategoricalIndex(list("aabca"), categories=["c", "a", "b"])
|
| 48 |
+
assert not ci.equals(list("aabca"))
|
| 49 |
+
# Same categories, but different order
|
| 50 |
+
# Unordered
|
| 51 |
+
assert ci.equals(CategoricalIndex(list("aabca")))
|
| 52 |
+
# Ordered
|
| 53 |
+
assert not ci.equals(CategoricalIndex(list("aabca"), ordered=True))
|
| 54 |
+
assert ci.equals(ci.copy())
|
| 55 |
+
|
| 56 |
+
ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"])
|
| 57 |
+
assert not ci.equals(list("aabca"))
|
| 58 |
+
assert not ci.equals(CategoricalIndex(list("aabca")))
|
| 59 |
+
assert ci.equals(ci.copy())
|
| 60 |
+
|
| 61 |
+
ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"])
|
| 62 |
+
assert not ci.equals(list("aabca") + [np.nan])
|
| 63 |
+
assert ci.equals(CategoricalIndex(list("aabca") + [np.nan]))
|
| 64 |
+
assert not ci.equals(CategoricalIndex(list("aabca") + [np.nan], ordered=True))
|
| 65 |
+
assert ci.equals(ci.copy())
|
| 66 |
+
|
| 67 |
+
def test_equals_categorical_unordered(self):
|
| 68 |
+
# https://github.com/pandas-dev/pandas/issues/16603
|
| 69 |
+
a = CategoricalIndex(["A"], categories=["A", "B"])
|
| 70 |
+
b = CategoricalIndex(["A"], categories=["B", "A"])
|
| 71 |
+
c = CategoricalIndex(["C"], categories=["B", "A"])
|
| 72 |
+
assert a.equals(b)
|
| 73 |
+
assert not a.equals(c)
|
| 74 |
+
assert not b.equals(c)
|
| 75 |
+
|
| 76 |
+
def test_equals_non_category(self):
|
| 77 |
+
# GH#37667 Case where other contains a value not among ci's
|
| 78 |
+
# categories ("D") and also contains np.nan
|
| 79 |
+
ci = CategoricalIndex(["A", "B", np.nan, np.nan])
|
| 80 |
+
other = Index(["A", "B", "D", np.nan])
|
| 81 |
+
|
| 82 |
+
assert not ci.equals(other)
|
| 83 |
+
|
| 84 |
+
def test_equals_multiindex(self):
|
| 85 |
+
# dont raise NotImplementedError when calling is_dtype_compat
|
| 86 |
+
|
| 87 |
+
mi = MultiIndex.from_arrays([["A", "B", "C", "D"], range(4)])
|
| 88 |
+
ci = mi.to_flat_index().astype("category")
|
| 89 |
+
|
| 90 |
+
assert not ci.equals(mi)
|
| 91 |
+
|
| 92 |
+
def test_equals_string_dtype(self, any_string_dtype):
|
| 93 |
+
# GH#55364
|
| 94 |
+
idx = CategoricalIndex(list("abc"), name="B")
|
| 95 |
+
other = Index(["a", "b", "c"], name="B", dtype=any_string_dtype)
|
| 96 |
+
assert idx.equals(other)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_fillna.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
from pandas import CategoricalIndex
|
| 5 |
+
import pandas._testing as tm
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class TestFillNA:
|
| 9 |
+
def test_fillna_categorical(self):
|
| 10 |
+
# GH#11343
|
| 11 |
+
idx = CategoricalIndex([1.0, np.nan, 3.0, 1.0], name="x")
|
| 12 |
+
# fill by value in categories
|
| 13 |
+
exp = CategoricalIndex([1.0, 1.0, 3.0, 1.0], name="x")
|
| 14 |
+
tm.assert_index_equal(idx.fillna(1.0), exp)
|
| 15 |
+
|
| 16 |
+
cat = idx._data
|
| 17 |
+
|
| 18 |
+
# fill by value not in categories raises TypeError on EA, casts on CI
|
| 19 |
+
msg = "Cannot setitem on a Categorical with a new category"
|
| 20 |
+
with pytest.raises(TypeError, match=msg):
|
| 21 |
+
cat.fillna(2.0)
|
| 22 |
+
|
| 23 |
+
result = idx.fillna(2.0)
|
| 24 |
+
expected = idx.astype(object).fillna(2.0)
|
| 25 |
+
tm.assert_index_equal(result, expected)
|
| 26 |
+
|
| 27 |
+
def test_fillna_copies_with_no_nas(self):
|
| 28 |
+
# Nothing to fill, should still get a copy for the Categorical method,
|
| 29 |
+
# but OK to get a view on CategoricalIndex method
|
| 30 |
+
ci = CategoricalIndex([0, 1, 1])
|
| 31 |
+
result = ci.fillna(0)
|
| 32 |
+
assert result is not ci
|
| 33 |
+
assert tm.shares_memory(result, ci)
|
| 34 |
+
|
| 35 |
+
# But at the EA level we always get a copy.
|
| 36 |
+
cat = ci._data
|
| 37 |
+
result = cat.fillna(0)
|
| 38 |
+
assert result._ndarray is not cat._ndarray
|
| 39 |
+
assert result._ndarray.base is None
|
| 40 |
+
assert not tm.shares_memory(result, cat)
|
| 41 |
+
|
| 42 |
+
def test_fillna_validates_with_no_nas(self):
|
| 43 |
+
# We validate the fill value even if fillna is a no-op
|
| 44 |
+
ci = CategoricalIndex([2, 3, 3])
|
| 45 |
+
cat = ci._data
|
| 46 |
+
|
| 47 |
+
msg = "Cannot setitem on a Categorical with a new category"
|
| 48 |
+
res = ci.fillna(False)
|
| 49 |
+
# nothing to fill, so we dont cast
|
| 50 |
+
tm.assert_index_equal(res, ci)
|
| 51 |
+
|
| 52 |
+
# Same check directly on the Categorical
|
| 53 |
+
with pytest.raises(TypeError, match=msg):
|
| 54 |
+
cat.fillna(False)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_formats.py
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Tests for CategoricalIndex.__repr__ and related methods.
|
| 3 |
+
"""
|
| 4 |
+
import pytest
|
| 5 |
+
|
| 6 |
+
from pandas._config import using_string_dtype
|
| 7 |
+
import pandas._config.config as cf
|
| 8 |
+
|
| 9 |
+
from pandas import CategoricalIndex
|
| 10 |
+
import pandas._testing as tm
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class TestCategoricalIndexRepr:
|
| 14 |
+
def test_format_different_scalar_lengths(self):
|
| 15 |
+
# GH#35439
|
| 16 |
+
idx = CategoricalIndex(["aaaaaaaaa", "b"])
|
| 17 |
+
expected = ["aaaaaaaaa", "b"]
|
| 18 |
+
msg = r"CategoricalIndex\.format is deprecated"
|
| 19 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 20 |
+
assert idx.format() == expected
|
| 21 |
+
|
| 22 |
+
@pytest.mark.xfail(using_string_dtype(), reason="repr different")
|
| 23 |
+
def test_string_categorical_index_repr(self):
|
| 24 |
+
# short
|
| 25 |
+
idx = CategoricalIndex(["a", "bb", "ccc"])
|
| 26 |
+
expected = """CategoricalIndex(['a', 'bb', 'ccc'], categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" # noqa: E501
|
| 27 |
+
assert repr(idx) == expected
|
| 28 |
+
|
| 29 |
+
# multiple lines
|
| 30 |
+
idx = CategoricalIndex(["a", "bb", "ccc"] * 10)
|
| 31 |
+
expected = """CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',
|
| 32 |
+
'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb',
|
| 33 |
+
'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],
|
| 34 |
+
categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" # noqa: E501
|
| 35 |
+
|
| 36 |
+
assert repr(idx) == expected
|
| 37 |
+
|
| 38 |
+
# truncated
|
| 39 |
+
idx = CategoricalIndex(["a", "bb", "ccc"] * 100)
|
| 40 |
+
expected = """CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',
|
| 41 |
+
...
|
| 42 |
+
'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],
|
| 43 |
+
categories=['a', 'bb', 'ccc'], ordered=False, dtype='category', length=300)""" # noqa: E501
|
| 44 |
+
|
| 45 |
+
assert repr(idx) == expected
|
| 46 |
+
|
| 47 |
+
# larger categories
|
| 48 |
+
idx = CategoricalIndex(list("abcdefghijklmmo"))
|
| 49 |
+
expected = """CategoricalIndex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
|
| 50 |
+
'm', 'm', 'o'],
|
| 51 |
+
categories=['a', 'b', 'c', 'd', ..., 'k', 'l', 'm', 'o'], ordered=False, dtype='category')""" # noqa: E501
|
| 52 |
+
|
| 53 |
+
assert repr(idx) == expected
|
| 54 |
+
|
| 55 |
+
# short
|
| 56 |
+
idx = CategoricalIndex(["あ", "いい", "ううう"])
|
| 57 |
+
expected = """CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa: E501
|
| 58 |
+
assert repr(idx) == expected
|
| 59 |
+
|
| 60 |
+
# multiple lines
|
| 61 |
+
idx = CategoricalIndex(["あ", "いい", "ううう"] * 10)
|
| 62 |
+
expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ',
|
| 63 |
+
'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
|
| 64 |
+
'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'],
|
| 65 |
+
categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa: E501
|
| 66 |
+
|
| 67 |
+
assert repr(idx) == expected
|
| 68 |
+
|
| 69 |
+
# truncated
|
| 70 |
+
idx = CategoricalIndex(["あ", "いい", "ううう"] * 100)
|
| 71 |
+
expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ',
|
| 72 |
+
...
|
| 73 |
+
'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'],
|
| 74 |
+
categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" # noqa: E501
|
| 75 |
+
|
| 76 |
+
assert repr(idx) == expected
|
| 77 |
+
|
| 78 |
+
# larger categories
|
| 79 |
+
idx = CategoricalIndex(list("あいうえおかきくけこさしすせそ"))
|
| 80 |
+
expected = """CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', 'さ', 'し',
|
| 81 |
+
'す', 'せ', 'そ'],
|
| 82 |
+
categories=['あ', 'い', 'う', 'え', ..., 'し', 'す', 'せ', 'そ'], ordered=False, dtype='category')""" # noqa: E501
|
| 83 |
+
|
| 84 |
+
assert repr(idx) == expected
|
| 85 |
+
|
| 86 |
+
# Enable Unicode option -----------------------------------------
|
| 87 |
+
with cf.option_context("display.unicode.east_asian_width", True):
|
| 88 |
+
# short
|
| 89 |
+
idx = CategoricalIndex(["あ", "いい", "ううう"])
|
| 90 |
+
expected = """CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa: E501
|
| 91 |
+
assert repr(idx) == expected
|
| 92 |
+
|
| 93 |
+
# multiple lines
|
| 94 |
+
idx = CategoricalIndex(["あ", "いい", "ううう"] * 10)
|
| 95 |
+
expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
|
| 96 |
+
'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',
|
| 97 |
+
'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
|
| 98 |
+
'う��う', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'],
|
| 99 |
+
categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa: E501
|
| 100 |
+
|
| 101 |
+
assert repr(idx) == expected
|
| 102 |
+
|
| 103 |
+
# truncated
|
| 104 |
+
idx = CategoricalIndex(["あ", "いい", "ううう"] * 100)
|
| 105 |
+
expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
|
| 106 |
+
'ううう', 'あ',
|
| 107 |
+
...
|
| 108 |
+
'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',
|
| 109 |
+
'あ', 'いい', 'ううう'],
|
| 110 |
+
categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" # noqa: E501
|
| 111 |
+
|
| 112 |
+
assert repr(idx) == expected
|
| 113 |
+
|
| 114 |
+
# larger categories
|
| 115 |
+
idx = CategoricalIndex(list("あいうえおかきくけこさしすせそ"))
|
| 116 |
+
expected = """CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ',
|
| 117 |
+
'さ', 'し', 'す', 'せ', 'そ'],
|
| 118 |
+
categories=['あ', 'い', 'う', 'え', ..., 'し', 'す', 'せ', 'そ'], ordered=False, dtype='category')""" # noqa: E501
|
| 119 |
+
|
| 120 |
+
assert repr(idx) == expected
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_indexing.py
ADDED
|
@@ -0,0 +1,420 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
from pandas.errors import InvalidIndexError
|
| 5 |
+
|
| 6 |
+
import pandas as pd
|
| 7 |
+
from pandas import (
|
| 8 |
+
CategoricalIndex,
|
| 9 |
+
Index,
|
| 10 |
+
IntervalIndex,
|
| 11 |
+
Timestamp,
|
| 12 |
+
)
|
| 13 |
+
import pandas._testing as tm
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class TestTake:
|
| 17 |
+
def test_take_fill_value(self):
|
| 18 |
+
# GH 12631
|
| 19 |
+
|
| 20 |
+
# numeric category
|
| 21 |
+
idx = CategoricalIndex([1, 2, 3], name="xxx")
|
| 22 |
+
result = idx.take(np.array([1, 0, -1]))
|
| 23 |
+
expected = CategoricalIndex([2, 1, 3], name="xxx")
|
| 24 |
+
tm.assert_index_equal(result, expected)
|
| 25 |
+
tm.assert_categorical_equal(result.values, expected.values)
|
| 26 |
+
|
| 27 |
+
# fill_value
|
| 28 |
+
result = idx.take(np.array([1, 0, -1]), fill_value=True)
|
| 29 |
+
expected = CategoricalIndex([2, 1, np.nan], categories=[1, 2, 3], name="xxx")
|
| 30 |
+
tm.assert_index_equal(result, expected)
|
| 31 |
+
tm.assert_categorical_equal(result.values, expected.values)
|
| 32 |
+
|
| 33 |
+
# allow_fill=False
|
| 34 |
+
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
|
| 35 |
+
expected = CategoricalIndex([2, 1, 3], name="xxx")
|
| 36 |
+
tm.assert_index_equal(result, expected)
|
| 37 |
+
tm.assert_categorical_equal(result.values, expected.values)
|
| 38 |
+
|
| 39 |
+
# object category
|
| 40 |
+
idx = CategoricalIndex(
|
| 41 |
+
list("CBA"), categories=list("ABC"), ordered=True, name="xxx"
|
| 42 |
+
)
|
| 43 |
+
result = idx.take(np.array([1, 0, -1]))
|
| 44 |
+
expected = CategoricalIndex(
|
| 45 |
+
list("BCA"), categories=list("ABC"), ordered=True, name="xxx"
|
| 46 |
+
)
|
| 47 |
+
tm.assert_index_equal(result, expected)
|
| 48 |
+
tm.assert_categorical_equal(result.values, expected.values)
|
| 49 |
+
|
| 50 |
+
# fill_value
|
| 51 |
+
result = idx.take(np.array([1, 0, -1]), fill_value=True)
|
| 52 |
+
expected = CategoricalIndex(
|
| 53 |
+
["B", "C", np.nan], categories=list("ABC"), ordered=True, name="xxx"
|
| 54 |
+
)
|
| 55 |
+
tm.assert_index_equal(result, expected)
|
| 56 |
+
tm.assert_categorical_equal(result.values, expected.values)
|
| 57 |
+
|
| 58 |
+
# allow_fill=False
|
| 59 |
+
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
|
| 60 |
+
expected = CategoricalIndex(
|
| 61 |
+
list("BCA"), categories=list("ABC"), ordered=True, name="xxx"
|
| 62 |
+
)
|
| 63 |
+
tm.assert_index_equal(result, expected)
|
| 64 |
+
tm.assert_categorical_equal(result.values, expected.values)
|
| 65 |
+
|
| 66 |
+
msg = (
|
| 67 |
+
"When allow_fill=True and fill_value is not None, "
|
| 68 |
+
"all indices must be >= -1"
|
| 69 |
+
)
|
| 70 |
+
with pytest.raises(ValueError, match=msg):
|
| 71 |
+
idx.take(np.array([1, 0, -2]), fill_value=True)
|
| 72 |
+
with pytest.raises(ValueError, match=msg):
|
| 73 |
+
idx.take(np.array([1, 0, -5]), fill_value=True)
|
| 74 |
+
|
| 75 |
+
msg = "index -5 is out of bounds for (axis 0 with )?size 3"
|
| 76 |
+
with pytest.raises(IndexError, match=msg):
|
| 77 |
+
idx.take(np.array([1, -5]))
|
| 78 |
+
|
| 79 |
+
def test_take_fill_value_datetime(self):
|
| 80 |
+
# datetime category
|
| 81 |
+
idx = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx")
|
| 82 |
+
idx = CategoricalIndex(idx)
|
| 83 |
+
result = idx.take(np.array([1, 0, -1]))
|
| 84 |
+
expected = pd.DatetimeIndex(
|
| 85 |
+
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx"
|
| 86 |
+
)
|
| 87 |
+
expected = CategoricalIndex(expected)
|
| 88 |
+
tm.assert_index_equal(result, expected)
|
| 89 |
+
|
| 90 |
+
# fill_value
|
| 91 |
+
result = idx.take(np.array([1, 0, -1]), fill_value=True)
|
| 92 |
+
expected = pd.DatetimeIndex(["2011-02-01", "2011-01-01", "NaT"], name="xxx")
|
| 93 |
+
exp_cats = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"])
|
| 94 |
+
expected = CategoricalIndex(expected, categories=exp_cats)
|
| 95 |
+
tm.assert_index_equal(result, expected)
|
| 96 |
+
|
| 97 |
+
# allow_fill=False
|
| 98 |
+
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
|
| 99 |
+
expected = pd.DatetimeIndex(
|
| 100 |
+
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx"
|
| 101 |
+
)
|
| 102 |
+
expected = CategoricalIndex(expected)
|
| 103 |
+
tm.assert_index_equal(result, expected)
|
| 104 |
+
|
| 105 |
+
msg = (
|
| 106 |
+
"When allow_fill=True and fill_value is not None, "
|
| 107 |
+
"all indices must be >= -1"
|
| 108 |
+
)
|
| 109 |
+
with pytest.raises(ValueError, match=msg):
|
| 110 |
+
idx.take(np.array([1, 0, -2]), fill_value=True)
|
| 111 |
+
with pytest.raises(ValueError, match=msg):
|
| 112 |
+
idx.take(np.array([1, 0, -5]), fill_value=True)
|
| 113 |
+
|
| 114 |
+
msg = "index -5 is out of bounds for (axis 0 with )?size 3"
|
| 115 |
+
with pytest.raises(IndexError, match=msg):
|
| 116 |
+
idx.take(np.array([1, -5]))
|
| 117 |
+
|
| 118 |
+
def test_take_invalid_kwargs(self):
|
| 119 |
+
idx = CategoricalIndex([1, 2, 3], name="foo")
|
| 120 |
+
indices = [1, 0, -1]
|
| 121 |
+
|
| 122 |
+
msg = r"take\(\) got an unexpected keyword argument 'foo'"
|
| 123 |
+
with pytest.raises(TypeError, match=msg):
|
| 124 |
+
idx.take(indices, foo=2)
|
| 125 |
+
|
| 126 |
+
msg = "the 'out' parameter is not supported"
|
| 127 |
+
with pytest.raises(ValueError, match=msg):
|
| 128 |
+
idx.take(indices, out=indices)
|
| 129 |
+
|
| 130 |
+
msg = "the 'mode' parameter is not supported"
|
| 131 |
+
with pytest.raises(ValueError, match=msg):
|
| 132 |
+
idx.take(indices, mode="clip")
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
class TestGetLoc:
|
| 136 |
+
def test_get_loc(self):
|
| 137 |
+
# GH 12531
|
| 138 |
+
cidx1 = CategoricalIndex(list("abcde"), categories=list("edabc"))
|
| 139 |
+
idx1 = Index(list("abcde"))
|
| 140 |
+
assert cidx1.get_loc("a") == idx1.get_loc("a")
|
| 141 |
+
assert cidx1.get_loc("e") == idx1.get_loc("e")
|
| 142 |
+
|
| 143 |
+
for i in [cidx1, idx1]:
|
| 144 |
+
with pytest.raises(KeyError, match="'NOT-EXIST'"):
|
| 145 |
+
i.get_loc("NOT-EXIST")
|
| 146 |
+
|
| 147 |
+
# non-unique
|
| 148 |
+
cidx2 = CategoricalIndex(list("aacded"), categories=list("edabc"))
|
| 149 |
+
idx2 = Index(list("aacded"))
|
| 150 |
+
|
| 151 |
+
# results in bool array
|
| 152 |
+
res = cidx2.get_loc("d")
|
| 153 |
+
tm.assert_numpy_array_equal(res, idx2.get_loc("d"))
|
| 154 |
+
tm.assert_numpy_array_equal(
|
| 155 |
+
res, np.array([False, False, False, True, False, True])
|
| 156 |
+
)
|
| 157 |
+
# unique element results in scalar
|
| 158 |
+
res = cidx2.get_loc("e")
|
| 159 |
+
assert res == idx2.get_loc("e")
|
| 160 |
+
assert res == 4
|
| 161 |
+
|
| 162 |
+
for i in [cidx2, idx2]:
|
| 163 |
+
with pytest.raises(KeyError, match="'NOT-EXIST'"):
|
| 164 |
+
i.get_loc("NOT-EXIST")
|
| 165 |
+
|
| 166 |
+
# non-unique, sliceable
|
| 167 |
+
cidx3 = CategoricalIndex(list("aabbb"), categories=list("abc"))
|
| 168 |
+
idx3 = Index(list("aabbb"))
|
| 169 |
+
|
| 170 |
+
# results in slice
|
| 171 |
+
res = cidx3.get_loc("a")
|
| 172 |
+
assert res == idx3.get_loc("a")
|
| 173 |
+
assert res == slice(0, 2, None)
|
| 174 |
+
|
| 175 |
+
res = cidx3.get_loc("b")
|
| 176 |
+
assert res == idx3.get_loc("b")
|
| 177 |
+
assert res == slice(2, 5, None)
|
| 178 |
+
|
| 179 |
+
for i in [cidx3, idx3]:
|
| 180 |
+
with pytest.raises(KeyError, match="'c'"):
|
| 181 |
+
i.get_loc("c")
|
| 182 |
+
|
| 183 |
+
def test_get_loc_unique(self):
|
| 184 |
+
cidx = CategoricalIndex(list("abc"))
|
| 185 |
+
result = cidx.get_loc("b")
|
| 186 |
+
assert result == 1
|
| 187 |
+
|
| 188 |
+
def test_get_loc_monotonic_nonunique(self):
|
| 189 |
+
cidx = CategoricalIndex(list("abbc"))
|
| 190 |
+
result = cidx.get_loc("b")
|
| 191 |
+
expected = slice(1, 3, None)
|
| 192 |
+
assert result == expected
|
| 193 |
+
|
| 194 |
+
def test_get_loc_nonmonotonic_nonunique(self):
|
| 195 |
+
cidx = CategoricalIndex(list("abcb"))
|
| 196 |
+
result = cidx.get_loc("b")
|
| 197 |
+
expected = np.array([False, True, False, True], dtype=bool)
|
| 198 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 199 |
+
|
| 200 |
+
def test_get_loc_nan(self):
|
| 201 |
+
# GH#41933
|
| 202 |
+
ci = CategoricalIndex(["A", "B", np.nan])
|
| 203 |
+
res = ci.get_loc(np.nan)
|
| 204 |
+
|
| 205 |
+
assert res == 2
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
class TestGetIndexer:
|
| 209 |
+
def test_get_indexer_base(self):
|
| 210 |
+
# Determined by cat ordering.
|
| 211 |
+
idx = CategoricalIndex(list("cab"), categories=list("cab"))
|
| 212 |
+
expected = np.arange(len(idx), dtype=np.intp)
|
| 213 |
+
|
| 214 |
+
actual = idx.get_indexer(idx)
|
| 215 |
+
tm.assert_numpy_array_equal(expected, actual)
|
| 216 |
+
|
| 217 |
+
with pytest.raises(ValueError, match="Invalid fill method"):
|
| 218 |
+
idx.get_indexer(idx, method="invalid")
|
| 219 |
+
|
| 220 |
+
def test_get_indexer_requires_unique(self):
|
| 221 |
+
ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
|
| 222 |
+
oidx = Index(np.array(ci))
|
| 223 |
+
|
| 224 |
+
msg = "Reindexing only valid with uniquely valued Index objects"
|
| 225 |
+
|
| 226 |
+
for n in [1, 2, 5, len(ci)]:
|
| 227 |
+
finder = oidx[np.random.default_rng(2).integers(0, len(ci), size=n)]
|
| 228 |
+
|
| 229 |
+
with pytest.raises(InvalidIndexError, match=msg):
|
| 230 |
+
ci.get_indexer(finder)
|
| 231 |
+
|
| 232 |
+
# see gh-17323
|
| 233 |
+
#
|
| 234 |
+
# Even when indexer is equal to the
|
| 235 |
+
# members in the index, we should
|
| 236 |
+
# respect duplicates instead of taking
|
| 237 |
+
# the fast-track path.
|
| 238 |
+
for finder in [list("aabbca"), list("aababca")]:
|
| 239 |
+
with pytest.raises(InvalidIndexError, match=msg):
|
| 240 |
+
ci.get_indexer(finder)
|
| 241 |
+
|
| 242 |
+
def test_get_indexer_non_unique(self):
|
| 243 |
+
idx1 = CategoricalIndex(list("aabcde"), categories=list("edabc"))
|
| 244 |
+
idx2 = CategoricalIndex(list("abf"))
|
| 245 |
+
|
| 246 |
+
for indexer in [idx2, list("abf"), Index(list("abf"))]:
|
| 247 |
+
msg = "Reindexing only valid with uniquely valued Index objects"
|
| 248 |
+
with pytest.raises(InvalidIndexError, match=msg):
|
| 249 |
+
idx1.get_indexer(indexer)
|
| 250 |
+
|
| 251 |
+
r1, _ = idx1.get_indexer_non_unique(indexer)
|
| 252 |
+
expected = np.array([0, 1, 2, -1], dtype=np.intp)
|
| 253 |
+
tm.assert_almost_equal(r1, expected)
|
| 254 |
+
|
| 255 |
+
def test_get_indexer_method(self):
|
| 256 |
+
idx1 = CategoricalIndex(list("aabcde"), categories=list("edabc"))
|
| 257 |
+
idx2 = CategoricalIndex(list("abf"))
|
| 258 |
+
|
| 259 |
+
msg = "method pad not yet implemented for CategoricalIndex"
|
| 260 |
+
with pytest.raises(NotImplementedError, match=msg):
|
| 261 |
+
idx2.get_indexer(idx1, method="pad")
|
| 262 |
+
msg = "method backfill not yet implemented for CategoricalIndex"
|
| 263 |
+
with pytest.raises(NotImplementedError, match=msg):
|
| 264 |
+
idx2.get_indexer(idx1, method="backfill")
|
| 265 |
+
|
| 266 |
+
msg = "method nearest not yet implemented for CategoricalIndex"
|
| 267 |
+
with pytest.raises(NotImplementedError, match=msg):
|
| 268 |
+
idx2.get_indexer(idx1, method="nearest")
|
| 269 |
+
|
| 270 |
+
def test_get_indexer_array(self):
|
| 271 |
+
arr = np.array(
|
| 272 |
+
[Timestamp("1999-12-31 00:00:00"), Timestamp("2000-12-31 00:00:00")],
|
| 273 |
+
dtype=object,
|
| 274 |
+
)
|
| 275 |
+
cats = [Timestamp("1999-12-31 00:00:00"), Timestamp("2000-12-31 00:00:00")]
|
| 276 |
+
ci = CategoricalIndex(cats, categories=cats, ordered=False, dtype="category")
|
| 277 |
+
result = ci.get_indexer(arr)
|
| 278 |
+
expected = np.array([0, 1], dtype="intp")
|
| 279 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 280 |
+
|
| 281 |
+
def test_get_indexer_same_categories_same_order(self):
|
| 282 |
+
ci = CategoricalIndex(["a", "b"], categories=["a", "b"])
|
| 283 |
+
|
| 284 |
+
result = ci.get_indexer(CategoricalIndex(["b", "b"], categories=["a", "b"]))
|
| 285 |
+
expected = np.array([1, 1], dtype="intp")
|
| 286 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 287 |
+
|
| 288 |
+
def test_get_indexer_same_categories_different_order(self):
|
| 289 |
+
# https://github.com/pandas-dev/pandas/issues/19551
|
| 290 |
+
ci = CategoricalIndex(["a", "b"], categories=["a", "b"])
|
| 291 |
+
|
| 292 |
+
result = ci.get_indexer(CategoricalIndex(["b", "b"], categories=["b", "a"]))
|
| 293 |
+
expected = np.array([1, 1], dtype="intp")
|
| 294 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 295 |
+
|
| 296 |
+
def test_get_indexer_nans_in_index_and_target(self):
|
| 297 |
+
# GH 45361
|
| 298 |
+
ci = CategoricalIndex([1, 2, np.nan, 3])
|
| 299 |
+
other1 = [2, 3, 4, np.nan]
|
| 300 |
+
res1 = ci.get_indexer(other1)
|
| 301 |
+
expected1 = np.array([1, 3, -1, 2], dtype=np.intp)
|
| 302 |
+
tm.assert_numpy_array_equal(res1, expected1)
|
| 303 |
+
other2 = [1, 4, 2, 3]
|
| 304 |
+
res2 = ci.get_indexer(other2)
|
| 305 |
+
expected2 = np.array([0, -1, 1, 3], dtype=np.intp)
|
| 306 |
+
tm.assert_numpy_array_equal(res2, expected2)
|
| 307 |
+
|
| 308 |
+
|
| 309 |
+
class TestWhere:
|
| 310 |
+
def test_where(self, listlike_box):
|
| 311 |
+
klass = listlike_box
|
| 312 |
+
|
| 313 |
+
i = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
|
| 314 |
+
cond = [True] * len(i)
|
| 315 |
+
expected = i
|
| 316 |
+
result = i.where(klass(cond))
|
| 317 |
+
tm.assert_index_equal(result, expected)
|
| 318 |
+
|
| 319 |
+
cond = [False] + [True] * (len(i) - 1)
|
| 320 |
+
expected = CategoricalIndex([np.nan] + i[1:].tolist(), categories=i.categories)
|
| 321 |
+
result = i.where(klass(cond))
|
| 322 |
+
tm.assert_index_equal(result, expected)
|
| 323 |
+
|
| 324 |
+
def test_where_non_categories(self):
|
| 325 |
+
ci = CategoricalIndex(["a", "b", "c", "d"])
|
| 326 |
+
mask = np.array([True, False, True, False])
|
| 327 |
+
|
| 328 |
+
result = ci.where(mask, 2)
|
| 329 |
+
expected = Index(["a", 2, "c", 2], dtype=object)
|
| 330 |
+
tm.assert_index_equal(result, expected)
|
| 331 |
+
|
| 332 |
+
msg = "Cannot setitem on a Categorical with a new category"
|
| 333 |
+
with pytest.raises(TypeError, match=msg):
|
| 334 |
+
# Test the Categorical method directly
|
| 335 |
+
ci._data._where(mask, 2)
|
| 336 |
+
|
| 337 |
+
|
| 338 |
+
class TestContains:
|
| 339 |
+
def test_contains(self):
|
| 340 |
+
ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"), ordered=False)
|
| 341 |
+
|
| 342 |
+
assert "a" in ci
|
| 343 |
+
assert "z" not in ci
|
| 344 |
+
assert "e" not in ci
|
| 345 |
+
assert np.nan not in ci
|
| 346 |
+
|
| 347 |
+
# assert codes NOT in index
|
| 348 |
+
assert 0 not in ci
|
| 349 |
+
assert 1 not in ci
|
| 350 |
+
|
| 351 |
+
def test_contains_nan(self):
|
| 352 |
+
ci = CategoricalIndex(list("aabbca") + [np.nan], categories=list("cabdef"))
|
| 353 |
+
assert np.nan in ci
|
| 354 |
+
|
| 355 |
+
@pytest.mark.parametrize("unwrap", [True, False])
|
| 356 |
+
def test_contains_na_dtype(self, unwrap):
|
| 357 |
+
dti = pd.date_range("2016-01-01", periods=100).insert(0, pd.NaT)
|
| 358 |
+
pi = dti.to_period("D")
|
| 359 |
+
tdi = dti - dti[-1]
|
| 360 |
+
ci = CategoricalIndex(dti)
|
| 361 |
+
|
| 362 |
+
obj = ci
|
| 363 |
+
if unwrap:
|
| 364 |
+
obj = ci._data
|
| 365 |
+
|
| 366 |
+
assert np.nan in obj
|
| 367 |
+
assert None in obj
|
| 368 |
+
assert pd.NaT in obj
|
| 369 |
+
assert np.datetime64("NaT") in obj
|
| 370 |
+
assert np.timedelta64("NaT") not in obj
|
| 371 |
+
|
| 372 |
+
obj2 = CategoricalIndex(tdi)
|
| 373 |
+
if unwrap:
|
| 374 |
+
obj2 = obj2._data
|
| 375 |
+
|
| 376 |
+
assert np.nan in obj2
|
| 377 |
+
assert None in obj2
|
| 378 |
+
assert pd.NaT in obj2
|
| 379 |
+
assert np.datetime64("NaT") not in obj2
|
| 380 |
+
assert np.timedelta64("NaT") in obj2
|
| 381 |
+
|
| 382 |
+
obj3 = CategoricalIndex(pi)
|
| 383 |
+
if unwrap:
|
| 384 |
+
obj3 = obj3._data
|
| 385 |
+
|
| 386 |
+
assert np.nan in obj3
|
| 387 |
+
assert None in obj3
|
| 388 |
+
assert pd.NaT in obj3
|
| 389 |
+
assert np.datetime64("NaT") not in obj3
|
| 390 |
+
assert np.timedelta64("NaT") not in obj3
|
| 391 |
+
|
| 392 |
+
@pytest.mark.parametrize(
|
| 393 |
+
"item, expected",
|
| 394 |
+
[
|
| 395 |
+
(pd.Interval(0, 1), True),
|
| 396 |
+
(1.5, True),
|
| 397 |
+
(pd.Interval(0.5, 1.5), False),
|
| 398 |
+
("a", False),
|
| 399 |
+
(Timestamp(1), False),
|
| 400 |
+
(pd.Timedelta(1), False),
|
| 401 |
+
],
|
| 402 |
+
ids=str,
|
| 403 |
+
)
|
| 404 |
+
def test_contains_interval(self, item, expected):
|
| 405 |
+
# GH 23705
|
| 406 |
+
ci = CategoricalIndex(IntervalIndex.from_breaks(range(3)))
|
| 407 |
+
result = item in ci
|
| 408 |
+
assert result is expected
|
| 409 |
+
|
| 410 |
+
def test_contains_list(self):
|
| 411 |
+
# GH#21729
|
| 412 |
+
idx = CategoricalIndex([1, 2, 3])
|
| 413 |
+
|
| 414 |
+
assert "a" not in idx
|
| 415 |
+
|
| 416 |
+
with pytest.raises(TypeError, match="unhashable type"):
|
| 417 |
+
["a"] in idx
|
| 418 |
+
|
| 419 |
+
with pytest.raises(TypeError, match="unhashable type"):
|
| 420 |
+
["a", "b"] in idx
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_map.py
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
import pandas as pd
|
| 5 |
+
from pandas import (
|
| 6 |
+
CategoricalIndex,
|
| 7 |
+
Index,
|
| 8 |
+
Series,
|
| 9 |
+
)
|
| 10 |
+
import pandas._testing as tm
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
@pytest.mark.parametrize(
|
| 14 |
+
"data, categories",
|
| 15 |
+
[
|
| 16 |
+
(list("abcbca"), list("cab")),
|
| 17 |
+
(pd.interval_range(0, 3).repeat(3), pd.interval_range(0, 3)),
|
| 18 |
+
],
|
| 19 |
+
ids=["string", "interval"],
|
| 20 |
+
)
|
| 21 |
+
def test_map_str(data, categories, ordered):
|
| 22 |
+
# GH 31202 - override base class since we want to maintain categorical/ordered
|
| 23 |
+
index = CategoricalIndex(data, categories=categories, ordered=ordered)
|
| 24 |
+
result = index.map(str)
|
| 25 |
+
expected = CategoricalIndex(
|
| 26 |
+
map(str, data), categories=map(str, categories), ordered=ordered
|
| 27 |
+
)
|
| 28 |
+
tm.assert_index_equal(result, expected)
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def test_map():
|
| 32 |
+
ci = CategoricalIndex(list("ABABC"), categories=list("CBA"), ordered=True)
|
| 33 |
+
result = ci.map(lambda x: x.lower())
|
| 34 |
+
exp = CategoricalIndex(list("ababc"), categories=list("cba"), ordered=True)
|
| 35 |
+
tm.assert_index_equal(result, exp)
|
| 36 |
+
|
| 37 |
+
ci = CategoricalIndex(
|
| 38 |
+
list("ABABC"), categories=list("BAC"), ordered=False, name="XXX"
|
| 39 |
+
)
|
| 40 |
+
result = ci.map(lambda x: x.lower())
|
| 41 |
+
exp = CategoricalIndex(
|
| 42 |
+
list("ababc"), categories=list("bac"), ordered=False, name="XXX"
|
| 43 |
+
)
|
| 44 |
+
tm.assert_index_equal(result, exp)
|
| 45 |
+
|
| 46 |
+
# GH 12766: Return an index not an array
|
| 47 |
+
tm.assert_index_equal(
|
| 48 |
+
ci.map(lambda x: 1), Index(np.array([1] * 5, dtype=np.int64), name="XXX")
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
# change categories dtype
|
| 52 |
+
ci = CategoricalIndex(list("ABABC"), categories=list("BAC"), ordered=False)
|
| 53 |
+
|
| 54 |
+
def f(x):
|
| 55 |
+
return {"A": 10, "B": 20, "C": 30}.get(x)
|
| 56 |
+
|
| 57 |
+
result = ci.map(f)
|
| 58 |
+
exp = CategoricalIndex([10, 20, 10, 20, 30], categories=[20, 10, 30], ordered=False)
|
| 59 |
+
tm.assert_index_equal(result, exp)
|
| 60 |
+
|
| 61 |
+
result = ci.map(Series([10, 20, 30], index=["A", "B", "C"]))
|
| 62 |
+
tm.assert_index_equal(result, exp)
|
| 63 |
+
|
| 64 |
+
result = ci.map({"A": 10, "B": 20, "C": 30})
|
| 65 |
+
tm.assert_index_equal(result, exp)
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def test_map_with_categorical_series():
|
| 69 |
+
# GH 12756
|
| 70 |
+
a = Index([1, 2, 3, 4])
|
| 71 |
+
b = Series(["even", "odd", "even", "odd"], dtype="category")
|
| 72 |
+
c = Series(["even", "odd", "even", "odd"])
|
| 73 |
+
|
| 74 |
+
exp = CategoricalIndex(["odd", "even", "odd", np.nan])
|
| 75 |
+
tm.assert_index_equal(a.map(b), exp)
|
| 76 |
+
exp = Index(["odd", "even", "odd", np.nan])
|
| 77 |
+
tm.assert_index_equal(a.map(c), exp)
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
@pytest.mark.parametrize(
|
| 81 |
+
("data", "f", "expected"),
|
| 82 |
+
(
|
| 83 |
+
([1, 1, np.nan], pd.isna, CategoricalIndex([False, False, np.nan])),
|
| 84 |
+
([1, 2, np.nan], pd.isna, Index([False, False, np.nan])),
|
| 85 |
+
([1, 1, np.nan], {1: False}, CategoricalIndex([False, False, np.nan])),
|
| 86 |
+
([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])),
|
| 87 |
+
(
|
| 88 |
+
[1, 1, np.nan],
|
| 89 |
+
Series([False, False]),
|
| 90 |
+
CategoricalIndex([False, False, np.nan]),
|
| 91 |
+
),
|
| 92 |
+
(
|
| 93 |
+
[1, 2, np.nan],
|
| 94 |
+
Series([False, False, False]),
|
| 95 |
+
Index([False, False, np.nan]),
|
| 96 |
+
),
|
| 97 |
+
),
|
| 98 |
+
)
|
| 99 |
+
def test_map_with_nan_ignore(data, f, expected): # GH 24241
|
| 100 |
+
values = CategoricalIndex(data)
|
| 101 |
+
result = values.map(f, na_action="ignore")
|
| 102 |
+
tm.assert_index_equal(result, expected)
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
@pytest.mark.parametrize(
|
| 106 |
+
("data", "f", "expected"),
|
| 107 |
+
(
|
| 108 |
+
([1, 1, np.nan], pd.isna, Index([False, False, True])),
|
| 109 |
+
([1, 2, np.nan], pd.isna, Index([False, False, True])),
|
| 110 |
+
([1, 1, np.nan], {1: False}, CategoricalIndex([False, False, np.nan])),
|
| 111 |
+
([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])),
|
| 112 |
+
(
|
| 113 |
+
[1, 1, np.nan],
|
| 114 |
+
Series([False, False]),
|
| 115 |
+
CategoricalIndex([False, False, np.nan]),
|
| 116 |
+
),
|
| 117 |
+
(
|
| 118 |
+
[1, 2, np.nan],
|
| 119 |
+
Series([False, False, False]),
|
| 120 |
+
Index([False, False, np.nan]),
|
| 121 |
+
),
|
| 122 |
+
),
|
| 123 |
+
)
|
| 124 |
+
def test_map_with_nan_none(data, f, expected): # GH 24241
|
| 125 |
+
values = CategoricalIndex(data)
|
| 126 |
+
result = values.map(f, na_action=None)
|
| 127 |
+
tm.assert_index_equal(result, expected)
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
def test_map_with_dict_or_series():
|
| 131 |
+
orig_values = ["a", "B", 1, "a"]
|
| 132 |
+
new_values = ["one", 2, 3.0, "one"]
|
| 133 |
+
cur_index = CategoricalIndex(orig_values, name="XXX")
|
| 134 |
+
expected = CategoricalIndex(new_values, name="XXX", categories=[3.0, 2, "one"])
|
| 135 |
+
|
| 136 |
+
mapper = Series(new_values[:-1], index=orig_values[:-1])
|
| 137 |
+
result = cur_index.map(mapper)
|
| 138 |
+
# Order of categories in result can be different
|
| 139 |
+
tm.assert_index_equal(result, expected)
|
| 140 |
+
|
| 141 |
+
mapper = dict(zip(orig_values[:-1], new_values[:-1]))
|
| 142 |
+
result = cur_index.map(mapper)
|
| 143 |
+
# Order of categories in result can be different
|
| 144 |
+
tm.assert_index_equal(result, expected)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_reindex.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
from pandas import (
|
| 5 |
+
Categorical,
|
| 6 |
+
CategoricalIndex,
|
| 7 |
+
Index,
|
| 8 |
+
Interval,
|
| 9 |
+
)
|
| 10 |
+
import pandas._testing as tm
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class TestReindex:
|
| 14 |
+
def test_reindex_list_non_unique(self):
|
| 15 |
+
# GH#11586
|
| 16 |
+
msg = "cannot reindex on an axis with duplicate labels"
|
| 17 |
+
ci = CategoricalIndex(["a", "b", "c", "a"])
|
| 18 |
+
with pytest.raises(ValueError, match=msg):
|
| 19 |
+
ci.reindex(["a", "c"])
|
| 20 |
+
|
| 21 |
+
def test_reindex_categorical_non_unique(self):
|
| 22 |
+
msg = "cannot reindex on an axis with duplicate labels"
|
| 23 |
+
ci = CategoricalIndex(["a", "b", "c", "a"])
|
| 24 |
+
with pytest.raises(ValueError, match=msg):
|
| 25 |
+
ci.reindex(Categorical(["a", "c"]))
|
| 26 |
+
|
| 27 |
+
def test_reindex_list_non_unique_unused_category(self):
|
| 28 |
+
msg = "cannot reindex on an axis with duplicate labels"
|
| 29 |
+
ci = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
|
| 30 |
+
with pytest.raises(ValueError, match=msg):
|
| 31 |
+
ci.reindex(["a", "c"])
|
| 32 |
+
|
| 33 |
+
def test_reindex_categorical_non_unique_unused_category(self):
|
| 34 |
+
msg = "cannot reindex on an axis with duplicate labels"
|
| 35 |
+
ci = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
|
| 36 |
+
with pytest.raises(ValueError, match=msg):
|
| 37 |
+
ci.reindex(Categorical(["a", "c"]))
|
| 38 |
+
|
| 39 |
+
def test_reindex_duplicate_target(self):
|
| 40 |
+
# See GH25459
|
| 41 |
+
cat = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c", "d"])
|
| 42 |
+
res, indexer = cat.reindex(["a", "c", "c"])
|
| 43 |
+
exp = Index(["a", "c", "c"])
|
| 44 |
+
tm.assert_index_equal(res, exp, exact=True)
|
| 45 |
+
tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp))
|
| 46 |
+
|
| 47 |
+
res, indexer = cat.reindex(
|
| 48 |
+
CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"])
|
| 49 |
+
)
|
| 50 |
+
exp = CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"])
|
| 51 |
+
tm.assert_index_equal(res, exp, exact=True)
|
| 52 |
+
tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp))
|
| 53 |
+
|
| 54 |
+
def test_reindex_empty_index(self):
|
| 55 |
+
# See GH16770
|
| 56 |
+
c = CategoricalIndex([])
|
| 57 |
+
res, indexer = c.reindex(["a", "b"])
|
| 58 |
+
tm.assert_index_equal(res, Index(["a", "b"]), exact=True)
|
| 59 |
+
tm.assert_numpy_array_equal(indexer, np.array([-1, -1], dtype=np.intp))
|
| 60 |
+
|
| 61 |
+
def test_reindex_categorical_added_category(self):
|
| 62 |
+
# GH 42424
|
| 63 |
+
ci = CategoricalIndex(
|
| 64 |
+
[Interval(0, 1, closed="right"), Interval(1, 2, closed="right")],
|
| 65 |
+
ordered=True,
|
| 66 |
+
)
|
| 67 |
+
ci_add = CategoricalIndex(
|
| 68 |
+
[
|
| 69 |
+
Interval(0, 1, closed="right"),
|
| 70 |
+
Interval(1, 2, closed="right"),
|
| 71 |
+
Interval(2, 3, closed="right"),
|
| 72 |
+
Interval(3, 4, closed="right"),
|
| 73 |
+
],
|
| 74 |
+
ordered=True,
|
| 75 |
+
)
|
| 76 |
+
result, _ = ci.reindex(ci_add)
|
| 77 |
+
expected = ci_add
|
| 78 |
+
tm.assert_index_equal(expected, result)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_setops.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
from pandas import (
|
| 5 |
+
CategoricalIndex,
|
| 6 |
+
Index,
|
| 7 |
+
)
|
| 8 |
+
import pandas._testing as tm
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
@pytest.mark.parametrize("na_value", [None, np.nan])
|
| 12 |
+
def test_difference_with_na(na_value):
|
| 13 |
+
# GH 57318
|
| 14 |
+
ci = CategoricalIndex(["a", "b", "c", None])
|
| 15 |
+
other = Index(["c", na_value])
|
| 16 |
+
result = ci.difference(other)
|
| 17 |
+
expected = CategoricalIndex(["a", "b"], categories=["a", "b", "c"])
|
| 18 |
+
tm.assert_index_equal(result, expected)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/__init__.py
ADDED
|
File without changes
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_arithmetic.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Arithmetic tests specific to DatetimeIndex are generally about `freq`
|
| 2 |
+
# rentention or inference. Other arithmetic tests belong in
|
| 3 |
+
# tests/arithmetic/test_datetime64.py
|
| 4 |
+
import pytest
|
| 5 |
+
|
| 6 |
+
from pandas import (
|
| 7 |
+
Timedelta,
|
| 8 |
+
TimedeltaIndex,
|
| 9 |
+
Timestamp,
|
| 10 |
+
date_range,
|
| 11 |
+
timedelta_range,
|
| 12 |
+
)
|
| 13 |
+
import pandas._testing as tm
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class TestDatetimeIndexArithmetic:
|
| 17 |
+
def test_add_timedelta_preserves_freq(self):
|
| 18 |
+
# GH#37295 should hold for any DTI with freq=None or Tick freq
|
| 19 |
+
tz = "Canada/Eastern"
|
| 20 |
+
dti = date_range(
|
| 21 |
+
start=Timestamp("2019-03-26 00:00:00-0400", tz=tz),
|
| 22 |
+
end=Timestamp("2020-10-17 00:00:00-0400", tz=tz),
|
| 23 |
+
freq="D",
|
| 24 |
+
)
|
| 25 |
+
result = dti + Timedelta(days=1)
|
| 26 |
+
assert result.freq == dti.freq
|
| 27 |
+
|
| 28 |
+
def test_sub_datetime_preserves_freq(self, tz_naive_fixture):
|
| 29 |
+
# GH#48818
|
| 30 |
+
dti = date_range("2016-01-01", periods=12, tz=tz_naive_fixture)
|
| 31 |
+
|
| 32 |
+
res = dti - dti[0]
|
| 33 |
+
expected = timedelta_range("0 Days", "11 Days")
|
| 34 |
+
tm.assert_index_equal(res, expected)
|
| 35 |
+
assert res.freq == expected.freq
|
| 36 |
+
|
| 37 |
+
@pytest.mark.xfail(
|
| 38 |
+
reason="The inherited freq is incorrect bc dti.freq is incorrect "
|
| 39 |
+
"https://github.com/pandas-dev/pandas/pull/48818/files#r982793461"
|
| 40 |
+
)
|
| 41 |
+
def test_sub_datetime_preserves_freq_across_dst(self):
|
| 42 |
+
# GH#48818
|
| 43 |
+
ts = Timestamp("2016-03-11", tz="US/Pacific")
|
| 44 |
+
dti = date_range(ts, periods=4)
|
| 45 |
+
|
| 46 |
+
res = dti - dti[0]
|
| 47 |
+
expected = TimedeltaIndex(
|
| 48 |
+
[
|
| 49 |
+
Timedelta(days=0),
|
| 50 |
+
Timedelta(days=1),
|
| 51 |
+
Timedelta(days=2),
|
| 52 |
+
Timedelta(days=2, hours=23),
|
| 53 |
+
]
|
| 54 |
+
)
|
| 55 |
+
tm.assert_index_equal(res, expected)
|
| 56 |
+
assert res.freq == expected.freq
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_constructors.py
ADDED
|
@@ -0,0 +1,1204 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from datetime import (
|
| 4 |
+
datetime,
|
| 5 |
+
timedelta,
|
| 6 |
+
timezone,
|
| 7 |
+
)
|
| 8 |
+
from functools import partial
|
| 9 |
+
from operator import attrgetter
|
| 10 |
+
|
| 11 |
+
import dateutil
|
| 12 |
+
import dateutil.tz
|
| 13 |
+
from dateutil.tz import gettz
|
| 14 |
+
import numpy as np
|
| 15 |
+
import pytest
|
| 16 |
+
import pytz
|
| 17 |
+
|
| 18 |
+
from pandas._libs.tslibs import (
|
| 19 |
+
OutOfBoundsDatetime,
|
| 20 |
+
astype_overflowsafe,
|
| 21 |
+
timezones,
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
import pandas as pd
|
| 25 |
+
from pandas import (
|
| 26 |
+
DatetimeIndex,
|
| 27 |
+
Index,
|
| 28 |
+
Timestamp,
|
| 29 |
+
date_range,
|
| 30 |
+
offsets,
|
| 31 |
+
to_datetime,
|
| 32 |
+
)
|
| 33 |
+
import pandas._testing as tm
|
| 34 |
+
from pandas.core.arrays import period_array
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
class TestDatetimeIndex:
|
| 38 |
+
def test_closed_deprecated(self):
|
| 39 |
+
# GH#52628
|
| 40 |
+
msg = "The 'closed' keyword"
|
| 41 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 42 |
+
DatetimeIndex([], closed=True)
|
| 43 |
+
|
| 44 |
+
def test_normalize_deprecated(self):
|
| 45 |
+
# GH#52628
|
| 46 |
+
msg = "The 'normalize' keyword"
|
| 47 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 48 |
+
DatetimeIndex([], normalize=True)
|
| 49 |
+
|
| 50 |
+
def test_from_dt64_unsupported_unit(self):
|
| 51 |
+
# GH#49292
|
| 52 |
+
val = np.datetime64(1, "D")
|
| 53 |
+
result = DatetimeIndex([val], tz="US/Pacific")
|
| 54 |
+
|
| 55 |
+
expected = DatetimeIndex([val.astype("M8[s]")], tz="US/Pacific")
|
| 56 |
+
tm.assert_index_equal(result, expected)
|
| 57 |
+
|
| 58 |
+
def test_explicit_tz_none(self):
|
| 59 |
+
# GH#48659
|
| 60 |
+
dti = date_range("2016-01-01", periods=10, tz="UTC")
|
| 61 |
+
|
| 62 |
+
msg = "Passed data is timezone-aware, incompatible with 'tz=None'"
|
| 63 |
+
with pytest.raises(ValueError, match=msg):
|
| 64 |
+
DatetimeIndex(dti, tz=None)
|
| 65 |
+
|
| 66 |
+
with pytest.raises(ValueError, match=msg):
|
| 67 |
+
DatetimeIndex(np.array(dti), tz=None)
|
| 68 |
+
|
| 69 |
+
msg = "Cannot pass both a timezone-aware dtype and tz=None"
|
| 70 |
+
with pytest.raises(ValueError, match=msg):
|
| 71 |
+
DatetimeIndex([], dtype="M8[ns, UTC]", tz=None)
|
| 72 |
+
|
| 73 |
+
def test_freq_validation_with_nat(self):
|
| 74 |
+
# GH#11587 make sure we get a useful error message when generate_range
|
| 75 |
+
# raises
|
| 76 |
+
msg = (
|
| 77 |
+
"Inferred frequency None from passed values does not conform "
|
| 78 |
+
"to passed frequency D"
|
| 79 |
+
)
|
| 80 |
+
with pytest.raises(ValueError, match=msg):
|
| 81 |
+
DatetimeIndex([pd.NaT, Timestamp("2011-01-01")], freq="D")
|
| 82 |
+
with pytest.raises(ValueError, match=msg):
|
| 83 |
+
DatetimeIndex([pd.NaT, Timestamp("2011-01-01")._value], freq="D")
|
| 84 |
+
|
| 85 |
+
# TODO: better place for tests shared by DTI/TDI?
|
| 86 |
+
@pytest.mark.parametrize(
|
| 87 |
+
"index",
|
| 88 |
+
[
|
| 89 |
+
date_range("2016-01-01", periods=5, tz="US/Pacific"),
|
| 90 |
+
pd.timedelta_range("1 Day", periods=5),
|
| 91 |
+
],
|
| 92 |
+
)
|
| 93 |
+
def test_shallow_copy_inherits_array_freq(self, index):
|
| 94 |
+
# If we pass a DTA/TDA to shallow_copy and dont specify a freq,
|
| 95 |
+
# we should inherit the array's freq, not our own.
|
| 96 |
+
array = index._data
|
| 97 |
+
|
| 98 |
+
arr = array[[0, 3, 2, 4, 1]]
|
| 99 |
+
assert arr.freq is None
|
| 100 |
+
|
| 101 |
+
result = index._shallow_copy(arr)
|
| 102 |
+
assert result.freq is None
|
| 103 |
+
|
| 104 |
+
def test_categorical_preserves_tz(self):
|
| 105 |
+
# GH#18664 retain tz when going DTI-->Categorical-->DTI
|
| 106 |
+
dti = DatetimeIndex(
|
| 107 |
+
[pd.NaT, "2015-01-01", "1999-04-06 15:14:13", "2015-01-01"], tz="US/Eastern"
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
for dtobj in [dti, dti._data]:
|
| 111 |
+
# works for DatetimeIndex or DatetimeArray
|
| 112 |
+
|
| 113 |
+
ci = pd.CategoricalIndex(dtobj)
|
| 114 |
+
carr = pd.Categorical(dtobj)
|
| 115 |
+
cser = pd.Series(ci)
|
| 116 |
+
|
| 117 |
+
for obj in [ci, carr, cser]:
|
| 118 |
+
result = DatetimeIndex(obj)
|
| 119 |
+
tm.assert_index_equal(result, dti)
|
| 120 |
+
|
| 121 |
+
def test_dti_with_period_data_raises(self):
|
| 122 |
+
# GH#23675
|
| 123 |
+
data = pd.PeriodIndex(["2016Q1", "2016Q2"], freq="Q")
|
| 124 |
+
|
| 125 |
+
with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
|
| 126 |
+
DatetimeIndex(data)
|
| 127 |
+
|
| 128 |
+
with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
|
| 129 |
+
to_datetime(data)
|
| 130 |
+
|
| 131 |
+
with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
|
| 132 |
+
DatetimeIndex(period_array(data))
|
| 133 |
+
|
| 134 |
+
with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
|
| 135 |
+
to_datetime(period_array(data))
|
| 136 |
+
|
| 137 |
+
def test_dti_with_timedelta64_data_raises(self):
|
| 138 |
+
# GH#23675 deprecated, enforrced in GH#29794
|
| 139 |
+
data = np.array([0], dtype="m8[ns]")
|
| 140 |
+
msg = r"timedelta64\[ns\] cannot be converted to datetime64"
|
| 141 |
+
with pytest.raises(TypeError, match=msg):
|
| 142 |
+
DatetimeIndex(data)
|
| 143 |
+
|
| 144 |
+
with pytest.raises(TypeError, match=msg):
|
| 145 |
+
to_datetime(data)
|
| 146 |
+
|
| 147 |
+
with pytest.raises(TypeError, match=msg):
|
| 148 |
+
DatetimeIndex(pd.TimedeltaIndex(data))
|
| 149 |
+
|
| 150 |
+
with pytest.raises(TypeError, match=msg):
|
| 151 |
+
to_datetime(pd.TimedeltaIndex(data))
|
| 152 |
+
|
| 153 |
+
def test_constructor_from_sparse_array(self):
|
| 154 |
+
# https://github.com/pandas-dev/pandas/issues/35843
|
| 155 |
+
values = [
|
| 156 |
+
Timestamp("2012-05-01T01:00:00.000000"),
|
| 157 |
+
Timestamp("2016-05-01T01:00:00.000000"),
|
| 158 |
+
]
|
| 159 |
+
arr = pd.arrays.SparseArray(values)
|
| 160 |
+
result = Index(arr)
|
| 161 |
+
assert type(result) is Index
|
| 162 |
+
assert result.dtype == arr.dtype
|
| 163 |
+
|
| 164 |
+
def test_construction_caching(self):
|
| 165 |
+
df = pd.DataFrame(
|
| 166 |
+
{
|
| 167 |
+
"dt": date_range("20130101", periods=3),
|
| 168 |
+
"dttz": date_range("20130101", periods=3, tz="US/Eastern"),
|
| 169 |
+
"dt_with_null": [
|
| 170 |
+
Timestamp("20130101"),
|
| 171 |
+
pd.NaT,
|
| 172 |
+
Timestamp("20130103"),
|
| 173 |
+
],
|
| 174 |
+
"dtns": date_range("20130101", periods=3, freq="ns"),
|
| 175 |
+
}
|
| 176 |
+
)
|
| 177 |
+
assert df.dttz.dtype.tz.zone == "US/Eastern"
|
| 178 |
+
|
| 179 |
+
@pytest.mark.parametrize(
|
| 180 |
+
"kwargs",
|
| 181 |
+
[{"tz": "dtype.tz"}, {"dtype": "dtype"}, {"dtype": "dtype", "tz": "dtype.tz"}],
|
| 182 |
+
)
|
| 183 |
+
def test_construction_with_alt(self, kwargs, tz_aware_fixture):
|
| 184 |
+
tz = tz_aware_fixture
|
| 185 |
+
i = date_range("20130101", periods=5, freq="h", tz=tz)
|
| 186 |
+
kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()}
|
| 187 |
+
result = DatetimeIndex(i, **kwargs)
|
| 188 |
+
tm.assert_index_equal(i, result)
|
| 189 |
+
|
| 190 |
+
@pytest.mark.parametrize(
|
| 191 |
+
"kwargs",
|
| 192 |
+
[{"tz": "dtype.tz"}, {"dtype": "dtype"}, {"dtype": "dtype", "tz": "dtype.tz"}],
|
| 193 |
+
)
|
| 194 |
+
def test_construction_with_alt_tz_localize(self, kwargs, tz_aware_fixture):
|
| 195 |
+
tz = tz_aware_fixture
|
| 196 |
+
i = date_range("20130101", periods=5, freq="h", tz=tz)
|
| 197 |
+
i = i._with_freq(None)
|
| 198 |
+
kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()}
|
| 199 |
+
|
| 200 |
+
if "tz" in kwargs:
|
| 201 |
+
result = DatetimeIndex(i.asi8, tz="UTC").tz_convert(kwargs["tz"])
|
| 202 |
+
|
| 203 |
+
expected = DatetimeIndex(i, **kwargs)
|
| 204 |
+
tm.assert_index_equal(result, expected)
|
| 205 |
+
|
| 206 |
+
# localize into the provided tz
|
| 207 |
+
i2 = DatetimeIndex(i.tz_localize(None).asi8, tz="UTC")
|
| 208 |
+
expected = i.tz_localize(None).tz_localize("UTC")
|
| 209 |
+
tm.assert_index_equal(i2, expected)
|
| 210 |
+
|
| 211 |
+
# incompat tz/dtype
|
| 212 |
+
msg = "cannot supply both a tz and a dtype with a tz"
|
| 213 |
+
with pytest.raises(ValueError, match=msg):
|
| 214 |
+
DatetimeIndex(i.tz_localize(None).asi8, dtype=i.dtype, tz="US/Pacific")
|
| 215 |
+
|
| 216 |
+
def test_construction_index_with_mixed_timezones(self):
|
| 217 |
+
# gh-11488: no tz results in DatetimeIndex
|
| 218 |
+
result = Index([Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx")
|
| 219 |
+
exp = DatetimeIndex(
|
| 220 |
+
[Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx"
|
| 221 |
+
)
|
| 222 |
+
tm.assert_index_equal(result, exp, exact=True)
|
| 223 |
+
assert isinstance(result, DatetimeIndex)
|
| 224 |
+
assert result.tz is None
|
| 225 |
+
|
| 226 |
+
# same tz results in DatetimeIndex
|
| 227 |
+
result = Index(
|
| 228 |
+
[
|
| 229 |
+
Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
|
| 230 |
+
Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"),
|
| 231 |
+
],
|
| 232 |
+
name="idx",
|
| 233 |
+
)
|
| 234 |
+
exp = DatetimeIndex(
|
| 235 |
+
[Timestamp("2011-01-01 10:00"), Timestamp("2011-01-02 10:00")],
|
| 236 |
+
tz="Asia/Tokyo",
|
| 237 |
+
name="idx",
|
| 238 |
+
)
|
| 239 |
+
tm.assert_index_equal(result, exp, exact=True)
|
| 240 |
+
assert isinstance(result, DatetimeIndex)
|
| 241 |
+
assert result.tz is not None
|
| 242 |
+
assert result.tz == exp.tz
|
| 243 |
+
|
| 244 |
+
# same tz results in DatetimeIndex (DST)
|
| 245 |
+
result = Index(
|
| 246 |
+
[
|
| 247 |
+
Timestamp("2011-01-01 10:00", tz="US/Eastern"),
|
| 248 |
+
Timestamp("2011-08-01 10:00", tz="US/Eastern"),
|
| 249 |
+
],
|
| 250 |
+
name="idx",
|
| 251 |
+
)
|
| 252 |
+
exp = DatetimeIndex(
|
| 253 |
+
[Timestamp("2011-01-01 10:00"), Timestamp("2011-08-01 10:00")],
|
| 254 |
+
tz="US/Eastern",
|
| 255 |
+
name="idx",
|
| 256 |
+
)
|
| 257 |
+
tm.assert_index_equal(result, exp, exact=True)
|
| 258 |
+
assert isinstance(result, DatetimeIndex)
|
| 259 |
+
assert result.tz is not None
|
| 260 |
+
assert result.tz == exp.tz
|
| 261 |
+
|
| 262 |
+
# Different tz results in Index(dtype=object)
|
| 263 |
+
result = Index(
|
| 264 |
+
[
|
| 265 |
+
Timestamp("2011-01-01 10:00"),
|
| 266 |
+
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
|
| 267 |
+
],
|
| 268 |
+
name="idx",
|
| 269 |
+
)
|
| 270 |
+
exp = Index(
|
| 271 |
+
[
|
| 272 |
+
Timestamp("2011-01-01 10:00"),
|
| 273 |
+
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
|
| 274 |
+
],
|
| 275 |
+
dtype="object",
|
| 276 |
+
name="idx",
|
| 277 |
+
)
|
| 278 |
+
tm.assert_index_equal(result, exp, exact=True)
|
| 279 |
+
assert not isinstance(result, DatetimeIndex)
|
| 280 |
+
|
| 281 |
+
result = Index(
|
| 282 |
+
[
|
| 283 |
+
Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
|
| 284 |
+
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
|
| 285 |
+
],
|
| 286 |
+
name="idx",
|
| 287 |
+
)
|
| 288 |
+
exp = Index(
|
| 289 |
+
[
|
| 290 |
+
Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
|
| 291 |
+
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
|
| 292 |
+
],
|
| 293 |
+
dtype="object",
|
| 294 |
+
name="idx",
|
| 295 |
+
)
|
| 296 |
+
tm.assert_index_equal(result, exp, exact=True)
|
| 297 |
+
assert not isinstance(result, DatetimeIndex)
|
| 298 |
+
|
| 299 |
+
msg = "DatetimeIndex has mixed timezones"
|
| 300 |
+
msg_depr = "parsing datetimes with mixed time zones will raise an error"
|
| 301 |
+
with pytest.raises(TypeError, match=msg):
|
| 302 |
+
with tm.assert_produces_warning(FutureWarning, match=msg_depr):
|
| 303 |
+
DatetimeIndex(["2013-11-02 22:00-05:00", "2013-11-03 22:00-06:00"])
|
| 304 |
+
|
| 305 |
+
# length = 1
|
| 306 |
+
result = Index([Timestamp("2011-01-01")], name="idx")
|
| 307 |
+
exp = DatetimeIndex([Timestamp("2011-01-01")], name="idx")
|
| 308 |
+
tm.assert_index_equal(result, exp, exact=True)
|
| 309 |
+
assert isinstance(result, DatetimeIndex)
|
| 310 |
+
assert result.tz is None
|
| 311 |
+
|
| 312 |
+
# length = 1 with tz
|
| 313 |
+
result = Index([Timestamp("2011-01-01 10:00", tz="Asia/Tokyo")], name="idx")
|
| 314 |
+
exp = DatetimeIndex(
|
| 315 |
+
[Timestamp("2011-01-01 10:00")], tz="Asia/Tokyo", name="idx"
|
| 316 |
+
)
|
| 317 |
+
tm.assert_index_equal(result, exp, exact=True)
|
| 318 |
+
assert isinstance(result, DatetimeIndex)
|
| 319 |
+
assert result.tz is not None
|
| 320 |
+
assert result.tz == exp.tz
|
| 321 |
+
|
| 322 |
+
def test_construction_index_with_mixed_timezones_with_NaT(self):
|
| 323 |
+
# see gh-11488
|
| 324 |
+
result = Index(
|
| 325 |
+
[pd.NaT, Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-02")],
|
| 326 |
+
name="idx",
|
| 327 |
+
)
|
| 328 |
+
exp = DatetimeIndex(
|
| 329 |
+
[pd.NaT, Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-02")],
|
| 330 |
+
name="idx",
|
| 331 |
+
)
|
| 332 |
+
tm.assert_index_equal(result, exp, exact=True)
|
| 333 |
+
assert isinstance(result, DatetimeIndex)
|
| 334 |
+
assert result.tz is None
|
| 335 |
+
|
| 336 |
+
# Same tz results in DatetimeIndex
|
| 337 |
+
result = Index(
|
| 338 |
+
[
|
| 339 |
+
pd.NaT,
|
| 340 |
+
Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
|
| 341 |
+
pd.NaT,
|
| 342 |
+
Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"),
|
| 343 |
+
],
|
| 344 |
+
name="idx",
|
| 345 |
+
)
|
| 346 |
+
exp = DatetimeIndex(
|
| 347 |
+
[
|
| 348 |
+
pd.NaT,
|
| 349 |
+
Timestamp("2011-01-01 10:00"),
|
| 350 |
+
pd.NaT,
|
| 351 |
+
Timestamp("2011-01-02 10:00"),
|
| 352 |
+
],
|
| 353 |
+
tz="Asia/Tokyo",
|
| 354 |
+
name="idx",
|
| 355 |
+
)
|
| 356 |
+
tm.assert_index_equal(result, exp, exact=True)
|
| 357 |
+
assert isinstance(result, DatetimeIndex)
|
| 358 |
+
assert result.tz is not None
|
| 359 |
+
assert result.tz == exp.tz
|
| 360 |
+
|
| 361 |
+
# same tz results in DatetimeIndex (DST)
|
| 362 |
+
result = Index(
|
| 363 |
+
[
|
| 364 |
+
Timestamp("2011-01-01 10:00", tz="US/Eastern"),
|
| 365 |
+
pd.NaT,
|
| 366 |
+
Timestamp("2011-08-01 10:00", tz="US/Eastern"),
|
| 367 |
+
],
|
| 368 |
+
name="idx",
|
| 369 |
+
)
|
| 370 |
+
exp = DatetimeIndex(
|
| 371 |
+
[Timestamp("2011-01-01 10:00"), pd.NaT, Timestamp("2011-08-01 10:00")],
|
| 372 |
+
tz="US/Eastern",
|
| 373 |
+
name="idx",
|
| 374 |
+
)
|
| 375 |
+
tm.assert_index_equal(result, exp, exact=True)
|
| 376 |
+
assert isinstance(result, DatetimeIndex)
|
| 377 |
+
assert result.tz is not None
|
| 378 |
+
assert result.tz == exp.tz
|
| 379 |
+
|
| 380 |
+
# different tz results in Index(dtype=object)
|
| 381 |
+
result = Index(
|
| 382 |
+
[
|
| 383 |
+
pd.NaT,
|
| 384 |
+
Timestamp("2011-01-01 10:00"),
|
| 385 |
+
pd.NaT,
|
| 386 |
+
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
|
| 387 |
+
],
|
| 388 |
+
name="idx",
|
| 389 |
+
)
|
| 390 |
+
exp = Index(
|
| 391 |
+
[
|
| 392 |
+
pd.NaT,
|
| 393 |
+
Timestamp("2011-01-01 10:00"),
|
| 394 |
+
pd.NaT,
|
| 395 |
+
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
|
| 396 |
+
],
|
| 397 |
+
dtype="object",
|
| 398 |
+
name="idx",
|
| 399 |
+
)
|
| 400 |
+
tm.assert_index_equal(result, exp, exact=True)
|
| 401 |
+
assert not isinstance(result, DatetimeIndex)
|
| 402 |
+
|
| 403 |
+
result = Index(
|
| 404 |
+
[
|
| 405 |
+
pd.NaT,
|
| 406 |
+
Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
|
| 407 |
+
pd.NaT,
|
| 408 |
+
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
|
| 409 |
+
],
|
| 410 |
+
name="idx",
|
| 411 |
+
)
|
| 412 |
+
exp = Index(
|
| 413 |
+
[
|
| 414 |
+
pd.NaT,
|
| 415 |
+
Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
|
| 416 |
+
pd.NaT,
|
| 417 |
+
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
|
| 418 |
+
],
|
| 419 |
+
dtype="object",
|
| 420 |
+
name="idx",
|
| 421 |
+
)
|
| 422 |
+
tm.assert_index_equal(result, exp, exact=True)
|
| 423 |
+
assert not isinstance(result, DatetimeIndex)
|
| 424 |
+
|
| 425 |
+
# all NaT
|
| 426 |
+
result = Index([pd.NaT, pd.NaT], name="idx")
|
| 427 |
+
exp = DatetimeIndex([pd.NaT, pd.NaT], name="idx")
|
| 428 |
+
tm.assert_index_equal(result, exp, exact=True)
|
| 429 |
+
assert isinstance(result, DatetimeIndex)
|
| 430 |
+
assert result.tz is None
|
| 431 |
+
|
| 432 |
+
def test_construction_dti_with_mixed_timezones(self):
|
| 433 |
+
# GH 11488 (not changed, added explicit tests)
|
| 434 |
+
|
| 435 |
+
# no tz results in DatetimeIndex
|
| 436 |
+
result = DatetimeIndex(
|
| 437 |
+
[Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx"
|
| 438 |
+
)
|
| 439 |
+
exp = DatetimeIndex(
|
| 440 |
+
[Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx"
|
| 441 |
+
)
|
| 442 |
+
tm.assert_index_equal(result, exp, exact=True)
|
| 443 |
+
assert isinstance(result, DatetimeIndex)
|
| 444 |
+
|
| 445 |
+
# same tz results in DatetimeIndex
|
| 446 |
+
result = DatetimeIndex(
|
| 447 |
+
[
|
| 448 |
+
Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
|
| 449 |
+
Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"),
|
| 450 |
+
],
|
| 451 |
+
name="idx",
|
| 452 |
+
)
|
| 453 |
+
exp = DatetimeIndex(
|
| 454 |
+
[Timestamp("2011-01-01 10:00"), Timestamp("2011-01-02 10:00")],
|
| 455 |
+
tz="Asia/Tokyo",
|
| 456 |
+
name="idx",
|
| 457 |
+
)
|
| 458 |
+
tm.assert_index_equal(result, exp, exact=True)
|
| 459 |
+
assert isinstance(result, DatetimeIndex)
|
| 460 |
+
|
| 461 |
+
# same tz results in DatetimeIndex (DST)
|
| 462 |
+
result = DatetimeIndex(
|
| 463 |
+
[
|
| 464 |
+
Timestamp("2011-01-01 10:00", tz="US/Eastern"),
|
| 465 |
+
Timestamp("2011-08-01 10:00", tz="US/Eastern"),
|
| 466 |
+
],
|
| 467 |
+
name="idx",
|
| 468 |
+
)
|
| 469 |
+
exp = DatetimeIndex(
|
| 470 |
+
[Timestamp("2011-01-01 10:00"), Timestamp("2011-08-01 10:00")],
|
| 471 |
+
tz="US/Eastern",
|
| 472 |
+
name="idx",
|
| 473 |
+
)
|
| 474 |
+
tm.assert_index_equal(result, exp, exact=True)
|
| 475 |
+
assert isinstance(result, DatetimeIndex)
|
| 476 |
+
|
| 477 |
+
# tz mismatch affecting to tz-aware raises TypeError/ValueError
|
| 478 |
+
|
| 479 |
+
msg = "cannot be converted to datetime64"
|
| 480 |
+
with pytest.raises(ValueError, match=msg):
|
| 481 |
+
DatetimeIndex(
|
| 482 |
+
[
|
| 483 |
+
Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
|
| 484 |
+
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
|
| 485 |
+
],
|
| 486 |
+
name="idx",
|
| 487 |
+
)
|
| 488 |
+
|
| 489 |
+
# pre-2.0 this raised bc of awareness mismatch. in 2.0 with a tz#
|
| 490 |
+
# specified we behave as if this was called pointwise, so
|
| 491 |
+
# the naive Timestamp is treated as a wall time.
|
| 492 |
+
dti = DatetimeIndex(
|
| 493 |
+
[
|
| 494 |
+
Timestamp("2011-01-01 10:00"),
|
| 495 |
+
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
|
| 496 |
+
],
|
| 497 |
+
tz="Asia/Tokyo",
|
| 498 |
+
name="idx",
|
| 499 |
+
)
|
| 500 |
+
expected = DatetimeIndex(
|
| 501 |
+
[
|
| 502 |
+
Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
|
| 503 |
+
Timestamp("2011-01-02 10:00", tz="US/Eastern").tz_convert("Asia/Tokyo"),
|
| 504 |
+
],
|
| 505 |
+
tz="Asia/Tokyo",
|
| 506 |
+
name="idx",
|
| 507 |
+
)
|
| 508 |
+
tm.assert_index_equal(dti, expected)
|
| 509 |
+
|
| 510 |
+
# pre-2.0 mixed-tz scalars raised even if a tz/dtype was specified.
|
| 511 |
+
# as of 2.0 we successfully return the requested tz/dtype
|
| 512 |
+
dti = DatetimeIndex(
|
| 513 |
+
[
|
| 514 |
+
Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
|
| 515 |
+
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
|
| 516 |
+
],
|
| 517 |
+
tz="US/Eastern",
|
| 518 |
+
name="idx",
|
| 519 |
+
)
|
| 520 |
+
expected = DatetimeIndex(
|
| 521 |
+
[
|
| 522 |
+
Timestamp("2011-01-01 10:00", tz="Asia/Tokyo").tz_convert("US/Eastern"),
|
| 523 |
+
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
|
| 524 |
+
],
|
| 525 |
+
tz="US/Eastern",
|
| 526 |
+
name="idx",
|
| 527 |
+
)
|
| 528 |
+
tm.assert_index_equal(dti, expected)
|
| 529 |
+
|
| 530 |
+
# same thing but pass dtype instead of tz
|
| 531 |
+
dti = DatetimeIndex(
|
| 532 |
+
[
|
| 533 |
+
Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
|
| 534 |
+
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
|
| 535 |
+
],
|
| 536 |
+
dtype="M8[ns, US/Eastern]",
|
| 537 |
+
name="idx",
|
| 538 |
+
)
|
| 539 |
+
tm.assert_index_equal(dti, expected)
|
| 540 |
+
|
| 541 |
+
def test_construction_base_constructor(self):
|
| 542 |
+
arr = [Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-03")]
|
| 543 |
+
tm.assert_index_equal(Index(arr), DatetimeIndex(arr))
|
| 544 |
+
tm.assert_index_equal(Index(np.array(arr)), DatetimeIndex(np.array(arr)))
|
| 545 |
+
|
| 546 |
+
arr = [np.nan, pd.NaT, Timestamp("2011-01-03")]
|
| 547 |
+
tm.assert_index_equal(Index(arr), DatetimeIndex(arr))
|
| 548 |
+
tm.assert_index_equal(Index(np.array(arr)), DatetimeIndex(np.array(arr)))
|
| 549 |
+
|
| 550 |
+
def test_construction_outofbounds(self):
|
| 551 |
+
# GH 13663
|
| 552 |
+
dates = [
|
| 553 |
+
datetime(3000, 1, 1),
|
| 554 |
+
datetime(4000, 1, 1),
|
| 555 |
+
datetime(5000, 1, 1),
|
| 556 |
+
datetime(6000, 1, 1),
|
| 557 |
+
]
|
| 558 |
+
exp = Index(dates, dtype=object)
|
| 559 |
+
# coerces to object
|
| 560 |
+
tm.assert_index_equal(Index(dates), exp)
|
| 561 |
+
|
| 562 |
+
msg = "^Out of bounds nanosecond timestamp: 3000-01-01 00:00:00, at position 0$"
|
| 563 |
+
with pytest.raises(OutOfBoundsDatetime, match=msg):
|
| 564 |
+
# can't create DatetimeIndex
|
| 565 |
+
DatetimeIndex(dates)
|
| 566 |
+
|
| 567 |
+
@pytest.mark.parametrize("data", [["1400-01-01"], [datetime(1400, 1, 1)]])
|
| 568 |
+
def test_dti_date_out_of_range(self, data):
|
| 569 |
+
# GH#1475
|
| 570 |
+
msg = (
|
| 571 |
+
"^Out of bounds nanosecond timestamp: "
|
| 572 |
+
"1400-01-01( 00:00:00)?, at position 0$"
|
| 573 |
+
)
|
| 574 |
+
with pytest.raises(OutOfBoundsDatetime, match=msg):
|
| 575 |
+
DatetimeIndex(data)
|
| 576 |
+
|
| 577 |
+
def test_construction_with_ndarray(self):
|
| 578 |
+
# GH 5152
|
| 579 |
+
dates = [datetime(2013, 10, 7), datetime(2013, 10, 8), datetime(2013, 10, 9)]
|
| 580 |
+
data = DatetimeIndex(dates, freq=offsets.BDay()).values
|
| 581 |
+
result = DatetimeIndex(data, freq=offsets.BDay())
|
| 582 |
+
expected = DatetimeIndex(["2013-10-07", "2013-10-08", "2013-10-09"], freq="B")
|
| 583 |
+
tm.assert_index_equal(result, expected)
|
| 584 |
+
|
| 585 |
+
def test_integer_values_and_tz_interpreted_as_utc(self):
|
| 586 |
+
# GH-24559
|
| 587 |
+
val = np.datetime64("2000-01-01 00:00:00", "ns")
|
| 588 |
+
values = np.array([val.view("i8")])
|
| 589 |
+
|
| 590 |
+
result = DatetimeIndex(values).tz_localize("US/Central")
|
| 591 |
+
|
| 592 |
+
expected = DatetimeIndex(["2000-01-01T00:00:00"], dtype="M8[ns, US/Central]")
|
| 593 |
+
tm.assert_index_equal(result, expected)
|
| 594 |
+
|
| 595 |
+
# but UTC is *not* deprecated.
|
| 596 |
+
with tm.assert_produces_warning(None):
|
| 597 |
+
result = DatetimeIndex(values, tz="UTC")
|
| 598 |
+
expected = DatetimeIndex(["2000-01-01T00:00:00"], dtype="M8[ns, UTC]")
|
| 599 |
+
tm.assert_index_equal(result, expected)
|
| 600 |
+
|
| 601 |
+
def test_constructor_coverage(self):
|
| 602 |
+
msg = r"DatetimeIndex\(\.\.\.\) must be called with a collection"
|
| 603 |
+
with pytest.raises(TypeError, match=msg):
|
| 604 |
+
DatetimeIndex("1/1/2000")
|
| 605 |
+
|
| 606 |
+
# generator expression
|
| 607 |
+
gen = (datetime(2000, 1, 1) + timedelta(i) for i in range(10))
|
| 608 |
+
result = DatetimeIndex(gen)
|
| 609 |
+
expected = DatetimeIndex(
|
| 610 |
+
[datetime(2000, 1, 1) + timedelta(i) for i in range(10)]
|
| 611 |
+
)
|
| 612 |
+
tm.assert_index_equal(result, expected)
|
| 613 |
+
|
| 614 |
+
# NumPy string array
|
| 615 |
+
strings = np.array(["2000-01-01", "2000-01-02", "2000-01-03"])
|
| 616 |
+
result = DatetimeIndex(strings)
|
| 617 |
+
expected = DatetimeIndex(strings.astype("O"))
|
| 618 |
+
tm.assert_index_equal(result, expected)
|
| 619 |
+
|
| 620 |
+
from_ints = DatetimeIndex(expected.asi8)
|
| 621 |
+
tm.assert_index_equal(from_ints, expected)
|
| 622 |
+
|
| 623 |
+
# string with NaT
|
| 624 |
+
strings = np.array(["2000-01-01", "2000-01-02", "NaT"])
|
| 625 |
+
result = DatetimeIndex(strings)
|
| 626 |
+
expected = DatetimeIndex(strings.astype("O"))
|
| 627 |
+
tm.assert_index_equal(result, expected)
|
| 628 |
+
|
| 629 |
+
from_ints = DatetimeIndex(expected.asi8)
|
| 630 |
+
tm.assert_index_equal(from_ints, expected)
|
| 631 |
+
|
| 632 |
+
# non-conforming
|
| 633 |
+
msg = (
|
| 634 |
+
"Inferred frequency None from passed values does not conform "
|
| 635 |
+
"to passed frequency D"
|
| 636 |
+
)
|
| 637 |
+
with pytest.raises(ValueError, match=msg):
|
| 638 |
+
DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-04"], freq="D")
|
| 639 |
+
|
| 640 |
+
@pytest.mark.parametrize("freq", ["YS", "W-SUN"])
|
| 641 |
+
def test_constructor_datetime64_tzformat(self, freq):
|
| 642 |
+
# see GH#6572: ISO 8601 format results in stdlib timezone object
|
| 643 |
+
idx = date_range(
|
| 644 |
+
"2013-01-01T00:00:00-05:00", "2016-01-01T23:59:59-05:00", freq=freq
|
| 645 |
+
)
|
| 646 |
+
expected = date_range(
|
| 647 |
+
"2013-01-01T00:00:00",
|
| 648 |
+
"2016-01-01T23:59:59",
|
| 649 |
+
freq=freq,
|
| 650 |
+
tz=timezone(timedelta(minutes=-300)),
|
| 651 |
+
)
|
| 652 |
+
tm.assert_index_equal(idx, expected)
|
| 653 |
+
# Unable to use `US/Eastern` because of DST
|
| 654 |
+
expected_i8 = date_range(
|
| 655 |
+
"2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="America/Lima"
|
| 656 |
+
)
|
| 657 |
+
tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8)
|
| 658 |
+
|
| 659 |
+
idx = date_range(
|
| 660 |
+
"2013-01-01T00:00:00+09:00", "2016-01-01T23:59:59+09:00", freq=freq
|
| 661 |
+
)
|
| 662 |
+
expected = date_range(
|
| 663 |
+
"2013-01-01T00:00:00",
|
| 664 |
+
"2016-01-01T23:59:59",
|
| 665 |
+
freq=freq,
|
| 666 |
+
tz=timezone(timedelta(minutes=540)),
|
| 667 |
+
)
|
| 668 |
+
tm.assert_index_equal(idx, expected)
|
| 669 |
+
expected_i8 = date_range(
|
| 670 |
+
"2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="Asia/Tokyo"
|
| 671 |
+
)
|
| 672 |
+
tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8)
|
| 673 |
+
|
| 674 |
+
# Non ISO 8601 format results in dateutil.tz.tzoffset
|
| 675 |
+
idx = date_range("2013/1/1 0:00:00-5:00", "2016/1/1 23:59:59-5:00", freq=freq)
|
| 676 |
+
expected = date_range(
|
| 677 |
+
"2013-01-01T00:00:00",
|
| 678 |
+
"2016-01-01T23:59:59",
|
| 679 |
+
freq=freq,
|
| 680 |
+
tz=timezone(timedelta(minutes=-300)),
|
| 681 |
+
)
|
| 682 |
+
tm.assert_index_equal(idx, expected)
|
| 683 |
+
# Unable to use `US/Eastern` because of DST
|
| 684 |
+
expected_i8 = date_range(
|
| 685 |
+
"2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="America/Lima"
|
| 686 |
+
)
|
| 687 |
+
tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8)
|
| 688 |
+
|
| 689 |
+
idx = date_range("2013/1/1 0:00:00+9:00", "2016/1/1 23:59:59+09:00", freq=freq)
|
| 690 |
+
expected = date_range(
|
| 691 |
+
"2013-01-01T00:00:00",
|
| 692 |
+
"2016-01-01T23:59:59",
|
| 693 |
+
freq=freq,
|
| 694 |
+
tz=timezone(timedelta(minutes=540)),
|
| 695 |
+
)
|
| 696 |
+
tm.assert_index_equal(idx, expected)
|
| 697 |
+
expected_i8 = date_range(
|
| 698 |
+
"2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="Asia/Tokyo"
|
| 699 |
+
)
|
| 700 |
+
tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8)
|
| 701 |
+
|
| 702 |
+
def test_constructor_dtype(self):
|
| 703 |
+
# passing a dtype with a tz should localize
|
| 704 |
+
idx = DatetimeIndex(
|
| 705 |
+
["2013-01-01", "2013-01-02"], dtype="datetime64[ns, US/Eastern]"
|
| 706 |
+
)
|
| 707 |
+
expected = (
|
| 708 |
+
DatetimeIndex(["2013-01-01", "2013-01-02"])
|
| 709 |
+
.as_unit("ns")
|
| 710 |
+
.tz_localize("US/Eastern")
|
| 711 |
+
)
|
| 712 |
+
tm.assert_index_equal(idx, expected)
|
| 713 |
+
|
| 714 |
+
idx = DatetimeIndex(["2013-01-01", "2013-01-02"], tz="US/Eastern").as_unit("ns")
|
| 715 |
+
tm.assert_index_equal(idx, expected)
|
| 716 |
+
|
| 717 |
+
def test_constructor_dtype_tz_mismatch_raises(self):
|
| 718 |
+
# if we already have a tz and its not the same, then raise
|
| 719 |
+
idx = DatetimeIndex(
|
| 720 |
+
["2013-01-01", "2013-01-02"], dtype="datetime64[ns, US/Eastern]"
|
| 721 |
+
)
|
| 722 |
+
|
| 723 |
+
msg = (
|
| 724 |
+
"cannot supply both a tz and a timezone-naive dtype "
|
| 725 |
+
r"\(i\.e\. datetime64\[ns\]\)"
|
| 726 |
+
)
|
| 727 |
+
with pytest.raises(ValueError, match=msg):
|
| 728 |
+
DatetimeIndex(idx, dtype="datetime64[ns]")
|
| 729 |
+
|
| 730 |
+
# this is effectively trying to convert tz's
|
| 731 |
+
msg = "data is already tz-aware US/Eastern, unable to set specified tz: CET"
|
| 732 |
+
with pytest.raises(TypeError, match=msg):
|
| 733 |
+
DatetimeIndex(idx, dtype="datetime64[ns, CET]")
|
| 734 |
+
msg = "cannot supply both a tz and a dtype with a tz"
|
| 735 |
+
with pytest.raises(ValueError, match=msg):
|
| 736 |
+
DatetimeIndex(idx, tz="CET", dtype="datetime64[ns, US/Eastern]")
|
| 737 |
+
|
| 738 |
+
result = DatetimeIndex(idx, dtype="datetime64[ns, US/Eastern]")
|
| 739 |
+
tm.assert_index_equal(idx, result)
|
| 740 |
+
|
| 741 |
+
@pytest.mark.parametrize("dtype", [object, np.int32, np.int64])
|
| 742 |
+
def test_constructor_invalid_dtype_raises(self, dtype):
|
| 743 |
+
# GH 23986
|
| 744 |
+
msg = "Unexpected value for 'dtype'"
|
| 745 |
+
with pytest.raises(ValueError, match=msg):
|
| 746 |
+
DatetimeIndex([1, 2], dtype=dtype)
|
| 747 |
+
|
| 748 |
+
def test_000constructor_resolution(self):
|
| 749 |
+
# 2252
|
| 750 |
+
t1 = Timestamp((1352934390 * 1000000000) + 1000000 + 1000 + 1)
|
| 751 |
+
idx = DatetimeIndex([t1])
|
| 752 |
+
|
| 753 |
+
assert idx.nanosecond[0] == t1.nanosecond
|
| 754 |
+
|
| 755 |
+
def test_disallow_setting_tz(self):
|
| 756 |
+
# GH 3746
|
| 757 |
+
dti = DatetimeIndex(["2010"], tz="UTC")
|
| 758 |
+
msg = "Cannot directly set timezone"
|
| 759 |
+
with pytest.raises(AttributeError, match=msg):
|
| 760 |
+
dti.tz = pytz.timezone("US/Pacific")
|
| 761 |
+
|
| 762 |
+
@pytest.mark.parametrize(
|
| 763 |
+
"tz",
|
| 764 |
+
[
|
| 765 |
+
None,
|
| 766 |
+
"America/Los_Angeles",
|
| 767 |
+
pytz.timezone("America/Los_Angeles"),
|
| 768 |
+
Timestamp("2000", tz="America/Los_Angeles").tz,
|
| 769 |
+
],
|
| 770 |
+
)
|
| 771 |
+
def test_constructor_start_end_with_tz(self, tz):
|
| 772 |
+
# GH 18595
|
| 773 |
+
start = Timestamp("2013-01-01 06:00:00", tz="America/Los_Angeles")
|
| 774 |
+
end = Timestamp("2013-01-02 06:00:00", tz="America/Los_Angeles")
|
| 775 |
+
result = date_range(freq="D", start=start, end=end, tz=tz)
|
| 776 |
+
expected = DatetimeIndex(
|
| 777 |
+
["2013-01-01 06:00:00", "2013-01-02 06:00:00"],
|
| 778 |
+
dtype="M8[ns, America/Los_Angeles]",
|
| 779 |
+
freq="D",
|
| 780 |
+
)
|
| 781 |
+
tm.assert_index_equal(result, expected)
|
| 782 |
+
# Especially assert that the timezone is consistent for pytz
|
| 783 |
+
assert pytz.timezone("America/Los_Angeles") is result.tz
|
| 784 |
+
|
| 785 |
+
@pytest.mark.parametrize("tz", ["US/Pacific", "US/Eastern", "Asia/Tokyo"])
|
| 786 |
+
def test_constructor_with_non_normalized_pytz(self, tz):
|
| 787 |
+
# GH 18595
|
| 788 |
+
non_norm_tz = Timestamp("2010", tz=tz).tz
|
| 789 |
+
result = DatetimeIndex(["2010"], tz=non_norm_tz)
|
| 790 |
+
assert pytz.timezone(tz) is result.tz
|
| 791 |
+
|
| 792 |
+
def test_constructor_timestamp_near_dst(self):
|
| 793 |
+
# GH 20854
|
| 794 |
+
ts = [
|
| 795 |
+
Timestamp("2016-10-30 03:00:00+0300", tz="Europe/Helsinki"),
|
| 796 |
+
Timestamp("2016-10-30 03:00:00+0200", tz="Europe/Helsinki"),
|
| 797 |
+
]
|
| 798 |
+
result = DatetimeIndex(ts)
|
| 799 |
+
expected = DatetimeIndex([ts[0].to_pydatetime(), ts[1].to_pydatetime()])
|
| 800 |
+
tm.assert_index_equal(result, expected)
|
| 801 |
+
|
| 802 |
+
@pytest.mark.parametrize("klass", [Index, DatetimeIndex])
|
| 803 |
+
@pytest.mark.parametrize("box", [np.array, partial(np.array, dtype=object), list])
|
| 804 |
+
@pytest.mark.parametrize(
|
| 805 |
+
"tz, dtype",
|
| 806 |
+
[("US/Pacific", "datetime64[ns, US/Pacific]"), (None, "datetime64[ns]")],
|
| 807 |
+
)
|
| 808 |
+
def test_constructor_with_int_tz(self, klass, box, tz, dtype):
|
| 809 |
+
# GH 20997, 20964
|
| 810 |
+
ts = Timestamp("2018-01-01", tz=tz).as_unit("ns")
|
| 811 |
+
result = klass(box([ts._value]), dtype=dtype)
|
| 812 |
+
expected = klass([ts])
|
| 813 |
+
assert result == expected
|
| 814 |
+
|
| 815 |
+
def test_construction_int_rountrip(self, tz_naive_fixture):
|
| 816 |
+
# GH 12619, GH#24559
|
| 817 |
+
tz = tz_naive_fixture
|
| 818 |
+
|
| 819 |
+
result = 1293858000000000000
|
| 820 |
+
expected = DatetimeIndex([result], tz=tz).asi8[0]
|
| 821 |
+
assert result == expected
|
| 822 |
+
|
| 823 |
+
def test_construction_from_replaced_timestamps_with_dst(self):
|
| 824 |
+
# GH 18785
|
| 825 |
+
index = date_range(
|
| 826 |
+
Timestamp(2000, 12, 31),
|
| 827 |
+
Timestamp(2005, 12, 31),
|
| 828 |
+
freq="YE-DEC",
|
| 829 |
+
tz="Australia/Melbourne",
|
| 830 |
+
)
|
| 831 |
+
result = DatetimeIndex([x.replace(month=6, day=1) for x in index])
|
| 832 |
+
expected = DatetimeIndex(
|
| 833 |
+
[
|
| 834 |
+
"2000-06-01 00:00:00",
|
| 835 |
+
"2001-06-01 00:00:00",
|
| 836 |
+
"2002-06-01 00:00:00",
|
| 837 |
+
"2003-06-01 00:00:00",
|
| 838 |
+
"2004-06-01 00:00:00",
|
| 839 |
+
"2005-06-01 00:00:00",
|
| 840 |
+
],
|
| 841 |
+
tz="Australia/Melbourne",
|
| 842 |
+
)
|
| 843 |
+
tm.assert_index_equal(result, expected)
|
| 844 |
+
|
| 845 |
+
def test_construction_with_tz_and_tz_aware_dti(self):
|
| 846 |
+
# GH 23579
|
| 847 |
+
dti = date_range("2016-01-01", periods=3, tz="US/Central")
|
| 848 |
+
msg = "data is already tz-aware US/Central, unable to set specified tz"
|
| 849 |
+
with pytest.raises(TypeError, match=msg):
|
| 850 |
+
DatetimeIndex(dti, tz="Asia/Tokyo")
|
| 851 |
+
|
| 852 |
+
def test_construction_with_nat_and_tzlocal(self):
|
| 853 |
+
tz = dateutil.tz.tzlocal()
|
| 854 |
+
result = DatetimeIndex(["2018", "NaT"], tz=tz)
|
| 855 |
+
expected = DatetimeIndex([Timestamp("2018", tz=tz), pd.NaT])
|
| 856 |
+
tm.assert_index_equal(result, expected)
|
| 857 |
+
|
| 858 |
+
def test_constructor_with_ambiguous_keyword_arg(self):
|
| 859 |
+
# GH 35297
|
| 860 |
+
|
| 861 |
+
expected = DatetimeIndex(
|
| 862 |
+
["2020-11-01 01:00:00", "2020-11-02 01:00:00"],
|
| 863 |
+
dtype="datetime64[ns, America/New_York]",
|
| 864 |
+
freq="D",
|
| 865 |
+
ambiguous=False,
|
| 866 |
+
)
|
| 867 |
+
|
| 868 |
+
# ambiguous keyword in start
|
| 869 |
+
timezone = "America/New_York"
|
| 870 |
+
start = Timestamp(year=2020, month=11, day=1, hour=1).tz_localize(
|
| 871 |
+
timezone, ambiguous=False
|
| 872 |
+
)
|
| 873 |
+
result = date_range(start=start, periods=2, ambiguous=False)
|
| 874 |
+
tm.assert_index_equal(result, expected)
|
| 875 |
+
|
| 876 |
+
# ambiguous keyword in end
|
| 877 |
+
timezone = "America/New_York"
|
| 878 |
+
end = Timestamp(year=2020, month=11, day=2, hour=1).tz_localize(
|
| 879 |
+
timezone, ambiguous=False
|
| 880 |
+
)
|
| 881 |
+
result = date_range(end=end, periods=2, ambiguous=False)
|
| 882 |
+
tm.assert_index_equal(result, expected)
|
| 883 |
+
|
| 884 |
+
def test_constructor_with_nonexistent_keyword_arg(self, warsaw):
|
| 885 |
+
# GH 35297
|
| 886 |
+
timezone = warsaw
|
| 887 |
+
|
| 888 |
+
# nonexistent keyword in start
|
| 889 |
+
start = Timestamp("2015-03-29 02:30:00").tz_localize(
|
| 890 |
+
timezone, nonexistent="shift_forward"
|
| 891 |
+
)
|
| 892 |
+
result = date_range(start=start, periods=2, freq="h")
|
| 893 |
+
expected = DatetimeIndex(
|
| 894 |
+
[
|
| 895 |
+
Timestamp("2015-03-29 03:00:00+02:00", tz=timezone),
|
| 896 |
+
Timestamp("2015-03-29 04:00:00+02:00", tz=timezone),
|
| 897 |
+
]
|
| 898 |
+
)
|
| 899 |
+
|
| 900 |
+
tm.assert_index_equal(result, expected)
|
| 901 |
+
|
| 902 |
+
# nonexistent keyword in end
|
| 903 |
+
end = start
|
| 904 |
+
result = date_range(end=end, periods=2, freq="h")
|
| 905 |
+
expected = DatetimeIndex(
|
| 906 |
+
[
|
| 907 |
+
Timestamp("2015-03-29 01:00:00+01:00", tz=timezone),
|
| 908 |
+
Timestamp("2015-03-29 03:00:00+02:00", tz=timezone),
|
| 909 |
+
]
|
| 910 |
+
)
|
| 911 |
+
|
| 912 |
+
tm.assert_index_equal(result, expected)
|
| 913 |
+
|
| 914 |
+
def test_constructor_no_precision_raises(self):
|
| 915 |
+
# GH-24753, GH-24739
|
| 916 |
+
|
| 917 |
+
msg = "with no precision is not allowed"
|
| 918 |
+
with pytest.raises(ValueError, match=msg):
|
| 919 |
+
DatetimeIndex(["2000"], dtype="datetime64")
|
| 920 |
+
|
| 921 |
+
msg = "The 'datetime64' dtype has no unit. Please pass in"
|
| 922 |
+
with pytest.raises(ValueError, match=msg):
|
| 923 |
+
Index(["2000"], dtype="datetime64")
|
| 924 |
+
|
| 925 |
+
def test_constructor_wrong_precision_raises(self):
|
| 926 |
+
dti = DatetimeIndex(["2000"], dtype="datetime64[us]")
|
| 927 |
+
assert dti.dtype == "M8[us]"
|
| 928 |
+
assert dti[0] == Timestamp(2000, 1, 1)
|
| 929 |
+
|
| 930 |
+
def test_index_constructor_with_numpy_object_array_and_timestamp_tz_with_nan(self):
|
| 931 |
+
# GH 27011
|
| 932 |
+
result = Index(np.array([Timestamp("2019", tz="UTC"), np.nan], dtype=object))
|
| 933 |
+
expected = DatetimeIndex([Timestamp("2019", tz="UTC"), pd.NaT])
|
| 934 |
+
tm.assert_index_equal(result, expected)
|
| 935 |
+
|
| 936 |
+
@pytest.mark.parametrize("tz", [pytz.timezone("US/Eastern"), gettz("US/Eastern")])
|
| 937 |
+
def test_dti_from_tzaware_datetime(self, tz):
|
| 938 |
+
d = [datetime(2012, 8, 19, tzinfo=tz)]
|
| 939 |
+
|
| 940 |
+
index = DatetimeIndex(d)
|
| 941 |
+
assert timezones.tz_compare(index.tz, tz)
|
| 942 |
+
|
| 943 |
+
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
|
| 944 |
+
def test_dti_tz_constructors(self, tzstr):
|
| 945 |
+
"""Test different DatetimeIndex constructions with timezone
|
| 946 |
+
Follow-up of GH#4229
|
| 947 |
+
"""
|
| 948 |
+
arr = ["11/10/2005 08:00:00", "11/10/2005 09:00:00"]
|
| 949 |
+
|
| 950 |
+
idx1 = to_datetime(arr).tz_localize(tzstr)
|
| 951 |
+
idx2 = date_range(start="2005-11-10 08:00:00", freq="h", periods=2, tz=tzstr)
|
| 952 |
+
idx2 = idx2._with_freq(None) # the others all have freq=None
|
| 953 |
+
idx3 = DatetimeIndex(arr, tz=tzstr)
|
| 954 |
+
idx4 = DatetimeIndex(np.array(arr), tz=tzstr)
|
| 955 |
+
|
| 956 |
+
for other in [idx2, idx3, idx4]:
|
| 957 |
+
tm.assert_index_equal(idx1, other)
|
| 958 |
+
|
| 959 |
+
def test_dti_construction_idempotent(self, unit):
|
| 960 |
+
rng = date_range(
|
| 961 |
+
"03/12/2012 00:00", periods=10, freq="W-FRI", tz="US/Eastern", unit=unit
|
| 962 |
+
)
|
| 963 |
+
rng2 = DatetimeIndex(data=rng, tz="US/Eastern")
|
| 964 |
+
tm.assert_index_equal(rng, rng2)
|
| 965 |
+
|
| 966 |
+
@pytest.mark.parametrize("prefix", ["", "dateutil/"])
|
| 967 |
+
def test_dti_constructor_static_tzinfo(self, prefix):
|
| 968 |
+
# it works!
|
| 969 |
+
index = DatetimeIndex([datetime(2012, 1, 1)], tz=prefix + "EST")
|
| 970 |
+
index.hour
|
| 971 |
+
index[0]
|
| 972 |
+
|
| 973 |
+
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
|
| 974 |
+
def test_dti_convert_datetime_list(self, tzstr):
|
| 975 |
+
dr = date_range("2012-06-02", periods=10, tz=tzstr, name="foo")
|
| 976 |
+
dr2 = DatetimeIndex(list(dr), name="foo", freq="D")
|
| 977 |
+
tm.assert_index_equal(dr, dr2)
|
| 978 |
+
|
| 979 |
+
@pytest.mark.parametrize(
|
| 980 |
+
"tz",
|
| 981 |
+
[
|
| 982 |
+
pytz.timezone("US/Eastern"),
|
| 983 |
+
gettz("US/Eastern"),
|
| 984 |
+
],
|
| 985 |
+
)
|
| 986 |
+
@pytest.mark.parametrize("use_str", [True, False])
|
| 987 |
+
@pytest.mark.parametrize("box_cls", [Timestamp, DatetimeIndex])
|
| 988 |
+
def test_dti_ambiguous_matches_timestamp(self, tz, use_str, box_cls, request):
|
| 989 |
+
# GH#47471 check that we get the same raising behavior in the DTI
|
| 990 |
+
# constructor and Timestamp constructor
|
| 991 |
+
dtstr = "2013-11-03 01:59:59.999999"
|
| 992 |
+
item = dtstr
|
| 993 |
+
if not use_str:
|
| 994 |
+
item = Timestamp(dtstr).to_pydatetime()
|
| 995 |
+
if box_cls is not Timestamp:
|
| 996 |
+
item = [item]
|
| 997 |
+
|
| 998 |
+
if not use_str and isinstance(tz, dateutil.tz.tzfile):
|
| 999 |
+
# FIXME: The Timestamp constructor here behaves differently than all
|
| 1000 |
+
# the other cases bc with dateutil/zoneinfo tzinfos we implicitly
|
| 1001 |
+
# get fold=0. Having this raise is not important, but having the
|
| 1002 |
+
# behavior be consistent across cases is.
|
| 1003 |
+
mark = pytest.mark.xfail(reason="We implicitly get fold=0.")
|
| 1004 |
+
request.applymarker(mark)
|
| 1005 |
+
|
| 1006 |
+
with pytest.raises(pytz.AmbiguousTimeError, match=dtstr):
|
| 1007 |
+
box_cls(item, tz=tz)
|
| 1008 |
+
|
| 1009 |
+
@pytest.mark.parametrize("tz", [None, "UTC", "US/Pacific"])
|
| 1010 |
+
def test_dti_constructor_with_non_nano_dtype(self, tz):
|
| 1011 |
+
# GH#55756, GH#54620
|
| 1012 |
+
ts = Timestamp("2999-01-01")
|
| 1013 |
+
dtype = "M8[us]"
|
| 1014 |
+
if tz is not None:
|
| 1015 |
+
dtype = f"M8[us, {tz}]"
|
| 1016 |
+
vals = [ts, "2999-01-02 03:04:05.678910", 2500]
|
| 1017 |
+
result = DatetimeIndex(vals, dtype=dtype)
|
| 1018 |
+
# The 2500 is interpreted as microseconds, consistent with what
|
| 1019 |
+
# we would get if we created DatetimeIndexes from vals[:2] and vals[2:]
|
| 1020 |
+
# and concated the results.
|
| 1021 |
+
pointwise = [
|
| 1022 |
+
vals[0].tz_localize(tz),
|
| 1023 |
+
Timestamp(vals[1], tz=tz),
|
| 1024 |
+
to_datetime(vals[2], unit="us", utc=True).tz_convert(tz),
|
| 1025 |
+
]
|
| 1026 |
+
exp_vals = [x.as_unit("us").asm8 for x in pointwise]
|
| 1027 |
+
exp_arr = np.array(exp_vals, dtype="M8[us]")
|
| 1028 |
+
expected = DatetimeIndex(exp_arr, dtype="M8[us]")
|
| 1029 |
+
if tz is not None:
|
| 1030 |
+
expected = expected.tz_localize("UTC").tz_convert(tz)
|
| 1031 |
+
tm.assert_index_equal(result, expected)
|
| 1032 |
+
|
| 1033 |
+
result2 = DatetimeIndex(np.array(vals, dtype=object), dtype=dtype)
|
| 1034 |
+
tm.assert_index_equal(result2, expected)
|
| 1035 |
+
|
| 1036 |
+
def test_dti_constructor_with_non_nano_now_today(self):
|
| 1037 |
+
# GH#55756
|
| 1038 |
+
now = Timestamp.now()
|
| 1039 |
+
today = Timestamp.today()
|
| 1040 |
+
result = DatetimeIndex(["now", "today"], dtype="M8[s]")
|
| 1041 |
+
assert result.dtype == "M8[s]"
|
| 1042 |
+
|
| 1043 |
+
# result may not exactly match [now, today] so we'll test it up to a tolerance.
|
| 1044 |
+
# (it *may* match exactly due to rounding)
|
| 1045 |
+
tolerance = pd.Timedelta(microseconds=1)
|
| 1046 |
+
|
| 1047 |
+
diff0 = result[0] - now.as_unit("s")
|
| 1048 |
+
assert diff0 >= pd.Timedelta(0)
|
| 1049 |
+
assert diff0 < tolerance
|
| 1050 |
+
|
| 1051 |
+
diff1 = result[1] - today.as_unit("s")
|
| 1052 |
+
assert diff1 >= pd.Timedelta(0)
|
| 1053 |
+
assert diff1 < tolerance
|
| 1054 |
+
|
| 1055 |
+
def test_dti_constructor_object_float_matches_float_dtype(self):
|
| 1056 |
+
# GH#55780
|
| 1057 |
+
arr = np.array([0, np.nan], dtype=np.float64)
|
| 1058 |
+
arr2 = arr.astype(object)
|
| 1059 |
+
|
| 1060 |
+
dti1 = DatetimeIndex(arr, tz="CET")
|
| 1061 |
+
dti2 = DatetimeIndex(arr2, tz="CET")
|
| 1062 |
+
tm.assert_index_equal(dti1, dti2)
|
| 1063 |
+
|
| 1064 |
+
@pytest.mark.parametrize("dtype", ["M8[us]", "M8[us, US/Pacific]"])
|
| 1065 |
+
def test_dti_constructor_with_dtype_object_int_matches_int_dtype(self, dtype):
|
| 1066 |
+
# Going through the object path should match the non-object path
|
| 1067 |
+
|
| 1068 |
+
vals1 = np.arange(5, dtype="i8") * 1000
|
| 1069 |
+
vals1[0] = pd.NaT.value
|
| 1070 |
+
|
| 1071 |
+
vals2 = vals1.astype(np.float64)
|
| 1072 |
+
vals2[0] = np.nan
|
| 1073 |
+
|
| 1074 |
+
vals3 = vals1.astype(object)
|
| 1075 |
+
# change lib.infer_dtype(vals3) from "integer" so we go through
|
| 1076 |
+
# array_to_datetime in _sequence_to_dt64
|
| 1077 |
+
vals3[0] = pd.NaT
|
| 1078 |
+
|
| 1079 |
+
vals4 = vals2.astype(object)
|
| 1080 |
+
|
| 1081 |
+
res1 = DatetimeIndex(vals1, dtype=dtype)
|
| 1082 |
+
res2 = DatetimeIndex(vals2, dtype=dtype)
|
| 1083 |
+
res3 = DatetimeIndex(vals3, dtype=dtype)
|
| 1084 |
+
res4 = DatetimeIndex(vals4, dtype=dtype)
|
| 1085 |
+
|
| 1086 |
+
expected = DatetimeIndex(vals1.view("M8[us]"))
|
| 1087 |
+
if res1.tz is not None:
|
| 1088 |
+
expected = expected.tz_localize("UTC").tz_convert(res1.tz)
|
| 1089 |
+
tm.assert_index_equal(res1, expected)
|
| 1090 |
+
tm.assert_index_equal(res2, expected)
|
| 1091 |
+
tm.assert_index_equal(res3, expected)
|
| 1092 |
+
tm.assert_index_equal(res4, expected)
|
| 1093 |
+
|
| 1094 |
+
|
| 1095 |
+
class TestTimeSeries:
|
| 1096 |
+
def test_dti_constructor_preserve_dti_freq(self):
|
| 1097 |
+
rng = date_range("1/1/2000", "1/2/2000", freq="5min")
|
| 1098 |
+
|
| 1099 |
+
rng2 = DatetimeIndex(rng)
|
| 1100 |
+
assert rng.freq == rng2.freq
|
| 1101 |
+
|
| 1102 |
+
def test_explicit_none_freq(self):
|
| 1103 |
+
# Explicitly passing freq=None is respected
|
| 1104 |
+
rng = date_range("1/1/2000", "1/2/2000", freq="5min")
|
| 1105 |
+
|
| 1106 |
+
result = DatetimeIndex(rng, freq=None)
|
| 1107 |
+
assert result.freq is None
|
| 1108 |
+
|
| 1109 |
+
result = DatetimeIndex(rng._data, freq=None)
|
| 1110 |
+
assert result.freq is None
|
| 1111 |
+
|
| 1112 |
+
def test_dti_constructor_small_int(self, any_int_numpy_dtype):
|
| 1113 |
+
# see gh-13721
|
| 1114 |
+
exp = DatetimeIndex(
|
| 1115 |
+
[
|
| 1116 |
+
"1970-01-01 00:00:00.00000000",
|
| 1117 |
+
"1970-01-01 00:00:00.00000001",
|
| 1118 |
+
"1970-01-01 00:00:00.00000002",
|
| 1119 |
+
]
|
| 1120 |
+
)
|
| 1121 |
+
|
| 1122 |
+
arr = np.array([0, 10, 20], dtype=any_int_numpy_dtype)
|
| 1123 |
+
tm.assert_index_equal(DatetimeIndex(arr), exp)
|
| 1124 |
+
|
| 1125 |
+
def test_ctor_str_intraday(self):
|
| 1126 |
+
rng = DatetimeIndex(["1-1-2000 00:00:01"])
|
| 1127 |
+
assert rng[0].second == 1
|
| 1128 |
+
|
| 1129 |
+
def test_index_cast_datetime64_other_units(self):
|
| 1130 |
+
arr = np.arange(0, 100, 10, dtype=np.int64).view("M8[D]")
|
| 1131 |
+
idx = Index(arr)
|
| 1132 |
+
|
| 1133 |
+
assert (idx.values == astype_overflowsafe(arr, dtype=np.dtype("M8[ns]"))).all()
|
| 1134 |
+
|
| 1135 |
+
def test_constructor_int64_nocopy(self):
|
| 1136 |
+
# GH#1624
|
| 1137 |
+
arr = np.arange(1000, dtype=np.int64)
|
| 1138 |
+
index = DatetimeIndex(arr)
|
| 1139 |
+
|
| 1140 |
+
arr[50:100] = -1
|
| 1141 |
+
assert (index.asi8[50:100] == -1).all()
|
| 1142 |
+
|
| 1143 |
+
arr = np.arange(1000, dtype=np.int64)
|
| 1144 |
+
index = DatetimeIndex(arr, copy=True)
|
| 1145 |
+
|
| 1146 |
+
arr[50:100] = -1
|
| 1147 |
+
assert (index.asi8[50:100] != -1).all()
|
| 1148 |
+
|
| 1149 |
+
@pytest.mark.parametrize(
|
| 1150 |
+
"freq",
|
| 1151 |
+
["ME", "QE", "YE", "D", "B", "bh", "min", "s", "ms", "us", "h", "ns", "C"],
|
| 1152 |
+
)
|
| 1153 |
+
def test_from_freq_recreate_from_data(self, freq):
|
| 1154 |
+
org = date_range(start="2001/02/01 09:00", freq=freq, periods=1)
|
| 1155 |
+
idx = DatetimeIndex(org, freq=freq)
|
| 1156 |
+
tm.assert_index_equal(idx, org)
|
| 1157 |
+
|
| 1158 |
+
org = date_range(
|
| 1159 |
+
start="2001/02/01 09:00", freq=freq, tz="US/Pacific", periods=1
|
| 1160 |
+
)
|
| 1161 |
+
idx = DatetimeIndex(org, freq=freq, tz="US/Pacific")
|
| 1162 |
+
tm.assert_index_equal(idx, org)
|
| 1163 |
+
|
| 1164 |
+
def test_datetimeindex_constructor_misc(self):
|
| 1165 |
+
arr = ["1/1/2005", "1/2/2005", "Jn 3, 2005", "2005-01-04"]
|
| 1166 |
+
msg = r"(\(')?Unknown datetime string format(:', 'Jn 3, 2005'\))?"
|
| 1167 |
+
with pytest.raises(ValueError, match=msg):
|
| 1168 |
+
DatetimeIndex(arr)
|
| 1169 |
+
|
| 1170 |
+
arr = ["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"]
|
| 1171 |
+
idx1 = DatetimeIndex(arr)
|
| 1172 |
+
|
| 1173 |
+
arr = [datetime(2005, 1, 1), "1/2/2005", "1/3/2005", "2005-01-04"]
|
| 1174 |
+
idx2 = DatetimeIndex(arr)
|
| 1175 |
+
|
| 1176 |
+
arr = [Timestamp(datetime(2005, 1, 1)), "1/2/2005", "1/3/2005", "2005-01-04"]
|
| 1177 |
+
idx3 = DatetimeIndex(arr)
|
| 1178 |
+
|
| 1179 |
+
arr = np.array(["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"], dtype="O")
|
| 1180 |
+
idx4 = DatetimeIndex(arr)
|
| 1181 |
+
|
| 1182 |
+
idx5 = DatetimeIndex(["12/05/2007", "25/01/2008"], dayfirst=True)
|
| 1183 |
+
idx6 = DatetimeIndex(
|
| 1184 |
+
["2007/05/12", "2008/01/25"], dayfirst=False, yearfirst=True
|
| 1185 |
+
)
|
| 1186 |
+
tm.assert_index_equal(idx5, idx6)
|
| 1187 |
+
|
| 1188 |
+
for other in [idx2, idx3, idx4]:
|
| 1189 |
+
assert (idx1.values == other.values).all()
|
| 1190 |
+
|
| 1191 |
+
def test_dti_constructor_object_dtype_dayfirst_yearfirst_with_tz(self):
|
| 1192 |
+
# GH#55813
|
| 1193 |
+
val = "5/10/16"
|
| 1194 |
+
|
| 1195 |
+
dfirst = Timestamp(2016, 10, 5, tz="US/Pacific")
|
| 1196 |
+
yfirst = Timestamp(2005, 10, 16, tz="US/Pacific")
|
| 1197 |
+
|
| 1198 |
+
result1 = DatetimeIndex([val], tz="US/Pacific", dayfirst=True)
|
| 1199 |
+
expected1 = DatetimeIndex([dfirst])
|
| 1200 |
+
tm.assert_index_equal(result1, expected1)
|
| 1201 |
+
|
| 1202 |
+
result2 = DatetimeIndex([val], tz="US/Pacific", yearfirst=True)
|
| 1203 |
+
expected2 = DatetimeIndex([yfirst])
|
| 1204 |
+
tm.assert_index_equal(result2, expected2)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_date_range.py
ADDED
|
@@ -0,0 +1,1721 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
test date_range, bdate_range construction from the convenience range functions
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from datetime import (
|
| 6 |
+
datetime,
|
| 7 |
+
time,
|
| 8 |
+
timedelta,
|
| 9 |
+
)
|
| 10 |
+
import re
|
| 11 |
+
|
| 12 |
+
import numpy as np
|
| 13 |
+
import pytest
|
| 14 |
+
import pytz
|
| 15 |
+
from pytz import timezone
|
| 16 |
+
|
| 17 |
+
from pandas._libs.tslibs import timezones
|
| 18 |
+
from pandas._libs.tslibs.offsets import (
|
| 19 |
+
BDay,
|
| 20 |
+
CDay,
|
| 21 |
+
DateOffset,
|
| 22 |
+
MonthEnd,
|
| 23 |
+
prefix_mapping,
|
| 24 |
+
)
|
| 25 |
+
from pandas.errors import OutOfBoundsDatetime
|
| 26 |
+
import pandas.util._test_decorators as td
|
| 27 |
+
|
| 28 |
+
import pandas as pd
|
| 29 |
+
from pandas import (
|
| 30 |
+
DataFrame,
|
| 31 |
+
DatetimeIndex,
|
| 32 |
+
Series,
|
| 33 |
+
Timedelta,
|
| 34 |
+
Timestamp,
|
| 35 |
+
bdate_range,
|
| 36 |
+
date_range,
|
| 37 |
+
offsets,
|
| 38 |
+
)
|
| 39 |
+
import pandas._testing as tm
|
| 40 |
+
from pandas.core.arrays.datetimes import _generate_range as generate_range
|
| 41 |
+
from pandas.tests.indexes.datetimes.test_timezones import (
|
| 42 |
+
FixedOffset,
|
| 43 |
+
fixed_off_no_name,
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
from pandas.tseries.holiday import USFederalHolidayCalendar
|
| 47 |
+
|
| 48 |
+
START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def _get_expected_range(
|
| 52 |
+
begin_to_match,
|
| 53 |
+
end_to_match,
|
| 54 |
+
both_range,
|
| 55 |
+
inclusive_endpoints,
|
| 56 |
+
):
|
| 57 |
+
"""Helper to get expected range from a both inclusive range"""
|
| 58 |
+
left_match = begin_to_match == both_range[0]
|
| 59 |
+
right_match = end_to_match == both_range[-1]
|
| 60 |
+
|
| 61 |
+
if inclusive_endpoints == "left" and right_match:
|
| 62 |
+
expected_range = both_range[:-1]
|
| 63 |
+
elif inclusive_endpoints == "right" and left_match:
|
| 64 |
+
expected_range = both_range[1:]
|
| 65 |
+
elif inclusive_endpoints == "neither" and left_match and right_match:
|
| 66 |
+
expected_range = both_range[1:-1]
|
| 67 |
+
elif inclusive_endpoints == "neither" and right_match:
|
| 68 |
+
expected_range = both_range[:-1]
|
| 69 |
+
elif inclusive_endpoints == "neither" and left_match:
|
| 70 |
+
expected_range = both_range[1:]
|
| 71 |
+
elif inclusive_endpoints == "both":
|
| 72 |
+
expected_range = both_range[:]
|
| 73 |
+
else:
|
| 74 |
+
expected_range = both_range[:]
|
| 75 |
+
|
| 76 |
+
return expected_range
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
class TestTimestampEquivDateRange:
|
| 80 |
+
# Older tests in TestTimeSeries constructed their `stamp` objects
|
| 81 |
+
# using `date_range` instead of the `Timestamp` constructor.
|
| 82 |
+
# TestTimestampEquivDateRange checks that these are equivalent in the
|
| 83 |
+
# pertinent cases.
|
| 84 |
+
|
| 85 |
+
def test_date_range_timestamp_equiv(self):
|
| 86 |
+
rng = date_range("20090415", "20090519", tz="US/Eastern")
|
| 87 |
+
stamp = rng[0]
|
| 88 |
+
|
| 89 |
+
ts = Timestamp("20090415", tz="US/Eastern")
|
| 90 |
+
assert ts == stamp
|
| 91 |
+
|
| 92 |
+
def test_date_range_timestamp_equiv_dateutil(self):
|
| 93 |
+
rng = date_range("20090415", "20090519", tz="dateutil/US/Eastern")
|
| 94 |
+
stamp = rng[0]
|
| 95 |
+
|
| 96 |
+
ts = Timestamp("20090415", tz="dateutil/US/Eastern")
|
| 97 |
+
assert ts == stamp
|
| 98 |
+
|
| 99 |
+
def test_date_range_timestamp_equiv_explicit_pytz(self):
|
| 100 |
+
rng = date_range("20090415", "20090519", tz=pytz.timezone("US/Eastern"))
|
| 101 |
+
stamp = rng[0]
|
| 102 |
+
|
| 103 |
+
ts = Timestamp("20090415", tz=pytz.timezone("US/Eastern"))
|
| 104 |
+
assert ts == stamp
|
| 105 |
+
|
| 106 |
+
@td.skip_if_windows
|
| 107 |
+
def test_date_range_timestamp_equiv_explicit_dateutil(self):
|
| 108 |
+
from pandas._libs.tslibs.timezones import dateutil_gettz as gettz
|
| 109 |
+
|
| 110 |
+
rng = date_range("20090415", "20090519", tz=gettz("US/Eastern"))
|
| 111 |
+
stamp = rng[0]
|
| 112 |
+
|
| 113 |
+
ts = Timestamp("20090415", tz=gettz("US/Eastern"))
|
| 114 |
+
assert ts == stamp
|
| 115 |
+
|
| 116 |
+
def test_date_range_timestamp_equiv_from_datetime_instance(self):
|
| 117 |
+
datetime_instance = datetime(2014, 3, 4)
|
| 118 |
+
# build a timestamp with a frequency, since then it supports
|
| 119 |
+
# addition/subtraction of integers
|
| 120 |
+
timestamp_instance = date_range(datetime_instance, periods=1, freq="D")[0]
|
| 121 |
+
|
| 122 |
+
ts = Timestamp(datetime_instance)
|
| 123 |
+
assert ts == timestamp_instance
|
| 124 |
+
|
| 125 |
+
def test_date_range_timestamp_equiv_preserve_frequency(self):
|
| 126 |
+
timestamp_instance = date_range("2014-03-05", periods=1, freq="D")[0]
|
| 127 |
+
ts = Timestamp("2014-03-05")
|
| 128 |
+
|
| 129 |
+
assert timestamp_instance == ts
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
class TestDateRanges:
|
| 133 |
+
def test_date_range_name(self):
|
| 134 |
+
idx = date_range(start="2000-01-01", periods=1, freq="YE", name="TEST")
|
| 135 |
+
assert idx.name == "TEST"
|
| 136 |
+
|
| 137 |
+
def test_date_range_invalid_periods(self):
|
| 138 |
+
msg = "periods must be a number, got foo"
|
| 139 |
+
with pytest.raises(TypeError, match=msg):
|
| 140 |
+
date_range(start="1/1/2000", periods="foo", freq="D")
|
| 141 |
+
|
| 142 |
+
def test_date_range_fractional_period(self):
|
| 143 |
+
msg = "Non-integer 'periods' in pd.date_range, pd.timedelta_range"
|
| 144 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 145 |
+
rng = date_range("1/1/2000", periods=10.5)
|
| 146 |
+
exp = date_range("1/1/2000", periods=10)
|
| 147 |
+
tm.assert_index_equal(rng, exp)
|
| 148 |
+
|
| 149 |
+
@pytest.mark.parametrize(
|
| 150 |
+
"freq,freq_depr",
|
| 151 |
+
[
|
| 152 |
+
("2ME", "2M"),
|
| 153 |
+
("2SME", "2SM"),
|
| 154 |
+
("2BQE", "2BQ"),
|
| 155 |
+
("2BYE", "2BY"),
|
| 156 |
+
],
|
| 157 |
+
)
|
| 158 |
+
def test_date_range_frequency_M_SM_BQ_BY_deprecated(self, freq, freq_depr):
|
| 159 |
+
# GH#52064
|
| 160 |
+
depr_msg = f"'{freq_depr[1:]}' is deprecated and will be removed "
|
| 161 |
+
f"in a future version, please use '{freq[1:]}' instead."
|
| 162 |
+
|
| 163 |
+
expected = date_range("1/1/2000", periods=4, freq=freq)
|
| 164 |
+
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
|
| 165 |
+
result = date_range("1/1/2000", periods=4, freq=freq_depr)
|
| 166 |
+
tm.assert_index_equal(result, expected)
|
| 167 |
+
|
| 168 |
+
def test_date_range_tuple_freq_raises(self):
|
| 169 |
+
# GH#34703
|
| 170 |
+
edate = datetime(2000, 1, 1)
|
| 171 |
+
with pytest.raises(TypeError, match="pass as a string instead"):
|
| 172 |
+
date_range(end=edate, freq=("D", 5), periods=20)
|
| 173 |
+
|
| 174 |
+
@pytest.mark.parametrize("freq", ["ns", "us", "ms", "min", "s", "h", "D"])
|
| 175 |
+
def test_date_range_edges(self, freq):
|
| 176 |
+
# GH#13672
|
| 177 |
+
td = Timedelta(f"1{freq}")
|
| 178 |
+
ts = Timestamp("1970-01-01")
|
| 179 |
+
|
| 180 |
+
idx = date_range(
|
| 181 |
+
start=ts + td,
|
| 182 |
+
end=ts + 4 * td,
|
| 183 |
+
freq=freq,
|
| 184 |
+
)
|
| 185 |
+
exp = DatetimeIndex(
|
| 186 |
+
[ts + n * td for n in range(1, 5)],
|
| 187 |
+
dtype="M8[ns]",
|
| 188 |
+
freq=freq,
|
| 189 |
+
)
|
| 190 |
+
tm.assert_index_equal(idx, exp)
|
| 191 |
+
|
| 192 |
+
# start after end
|
| 193 |
+
idx = date_range(
|
| 194 |
+
start=ts + 4 * td,
|
| 195 |
+
end=ts + td,
|
| 196 |
+
freq=freq,
|
| 197 |
+
)
|
| 198 |
+
exp = DatetimeIndex([], dtype="M8[ns]", freq=freq)
|
| 199 |
+
tm.assert_index_equal(idx, exp)
|
| 200 |
+
|
| 201 |
+
# start matches end
|
| 202 |
+
idx = date_range(
|
| 203 |
+
start=ts + td,
|
| 204 |
+
end=ts + td,
|
| 205 |
+
freq=freq,
|
| 206 |
+
)
|
| 207 |
+
exp = DatetimeIndex([ts + td], dtype="M8[ns]", freq=freq)
|
| 208 |
+
tm.assert_index_equal(idx, exp)
|
| 209 |
+
|
| 210 |
+
def test_date_range_near_implementation_bound(self):
|
| 211 |
+
# GH#???
|
| 212 |
+
freq = Timedelta(1)
|
| 213 |
+
|
| 214 |
+
with pytest.raises(OutOfBoundsDatetime, match="Cannot generate range with"):
|
| 215 |
+
date_range(end=Timestamp.min, periods=2, freq=freq)
|
| 216 |
+
|
| 217 |
+
def test_date_range_nat(self):
|
| 218 |
+
# GH#11587
|
| 219 |
+
msg = "Neither `start` nor `end` can be NaT"
|
| 220 |
+
with pytest.raises(ValueError, match=msg):
|
| 221 |
+
date_range(start="2016-01-01", end=pd.NaT, freq="D")
|
| 222 |
+
with pytest.raises(ValueError, match=msg):
|
| 223 |
+
date_range(start=pd.NaT, end="2016-01-01", freq="D")
|
| 224 |
+
|
| 225 |
+
def test_date_range_multiplication_overflow(self):
|
| 226 |
+
# GH#24255
|
| 227 |
+
# check that overflows in calculating `addend = periods * stride`
|
| 228 |
+
# are caught
|
| 229 |
+
with tm.assert_produces_warning(None):
|
| 230 |
+
# we should _not_ be seeing a overflow RuntimeWarning
|
| 231 |
+
dti = date_range(start="1677-09-22", periods=213503, freq="D")
|
| 232 |
+
|
| 233 |
+
assert dti[0] == Timestamp("1677-09-22")
|
| 234 |
+
assert len(dti) == 213503
|
| 235 |
+
|
| 236 |
+
msg = "Cannot generate range with"
|
| 237 |
+
with pytest.raises(OutOfBoundsDatetime, match=msg):
|
| 238 |
+
date_range("1969-05-04", periods=200000000, freq="30000D")
|
| 239 |
+
|
| 240 |
+
def test_date_range_unsigned_overflow_handling(self):
|
| 241 |
+
# GH#24255
|
| 242 |
+
# case where `addend = periods * stride` overflows int64 bounds
|
| 243 |
+
# but not uint64 bounds
|
| 244 |
+
dti = date_range(start="1677-09-22", end="2262-04-11", freq="D")
|
| 245 |
+
|
| 246 |
+
dti2 = date_range(start=dti[0], periods=len(dti), freq="D")
|
| 247 |
+
assert dti2.equals(dti)
|
| 248 |
+
|
| 249 |
+
dti3 = date_range(end=dti[-1], periods=len(dti), freq="D")
|
| 250 |
+
assert dti3.equals(dti)
|
| 251 |
+
|
| 252 |
+
def test_date_range_int64_overflow_non_recoverable(self):
|
| 253 |
+
# GH#24255
|
| 254 |
+
# case with start later than 1970-01-01, overflow int64 but not uint64
|
| 255 |
+
msg = "Cannot generate range with"
|
| 256 |
+
with pytest.raises(OutOfBoundsDatetime, match=msg):
|
| 257 |
+
date_range(start="1970-02-01", periods=106752 * 24, freq="h")
|
| 258 |
+
|
| 259 |
+
# case with end before 1970-01-01, overflow int64 but not uint64
|
| 260 |
+
with pytest.raises(OutOfBoundsDatetime, match=msg):
|
| 261 |
+
date_range(end="1969-11-14", periods=106752 * 24, freq="h")
|
| 262 |
+
|
| 263 |
+
@pytest.mark.slow
|
| 264 |
+
@pytest.mark.parametrize(
|
| 265 |
+
"s_ts, e_ts", [("2262-02-23", "1969-11-14"), ("1970-02-01", "1677-10-22")]
|
| 266 |
+
)
|
| 267 |
+
def test_date_range_int64_overflow_stride_endpoint_different_signs(
|
| 268 |
+
self, s_ts, e_ts
|
| 269 |
+
):
|
| 270 |
+
# cases where stride * periods overflow int64 and stride/endpoint
|
| 271 |
+
# have different signs
|
| 272 |
+
start = Timestamp(s_ts)
|
| 273 |
+
end = Timestamp(e_ts)
|
| 274 |
+
|
| 275 |
+
expected = date_range(start=start, end=end, freq="-1h")
|
| 276 |
+
assert expected[0] == start
|
| 277 |
+
assert expected[-1] == end
|
| 278 |
+
|
| 279 |
+
dti = date_range(end=end, periods=len(expected), freq="-1h")
|
| 280 |
+
tm.assert_index_equal(dti, expected)
|
| 281 |
+
|
| 282 |
+
def test_date_range_out_of_bounds(self):
|
| 283 |
+
# GH#14187
|
| 284 |
+
msg = "Cannot generate range"
|
| 285 |
+
with pytest.raises(OutOfBoundsDatetime, match=msg):
|
| 286 |
+
date_range("2016-01-01", periods=100000, freq="D")
|
| 287 |
+
with pytest.raises(OutOfBoundsDatetime, match=msg):
|
| 288 |
+
date_range(end="1763-10-12", periods=100000, freq="D")
|
| 289 |
+
|
| 290 |
+
def test_date_range_gen_error(self):
|
| 291 |
+
rng = date_range("1/1/2000 00:00", "1/1/2000 00:18", freq="5min")
|
| 292 |
+
assert len(rng) == 4
|
| 293 |
+
|
| 294 |
+
def test_date_range_normalize(self):
|
| 295 |
+
snap = datetime.today()
|
| 296 |
+
n = 50
|
| 297 |
+
|
| 298 |
+
rng = date_range(snap, periods=n, normalize=False, freq="2D")
|
| 299 |
+
|
| 300 |
+
offset = timedelta(2)
|
| 301 |
+
expected = DatetimeIndex(
|
| 302 |
+
[snap + i * offset for i in range(n)], dtype="M8[ns]", freq=offset
|
| 303 |
+
)
|
| 304 |
+
|
| 305 |
+
tm.assert_index_equal(rng, expected)
|
| 306 |
+
|
| 307 |
+
rng = date_range("1/1/2000 08:15", periods=n, normalize=False, freq="B")
|
| 308 |
+
the_time = time(8, 15)
|
| 309 |
+
for val in rng:
|
| 310 |
+
assert val.time() == the_time
|
| 311 |
+
|
| 312 |
+
def test_date_range_ambiguous_arguments(self):
|
| 313 |
+
# #2538
|
| 314 |
+
start = datetime(2011, 1, 1, 5, 3, 40)
|
| 315 |
+
end = datetime(2011, 1, 1, 8, 9, 40)
|
| 316 |
+
|
| 317 |
+
msg = (
|
| 318 |
+
"Of the four parameters: start, end, periods, and "
|
| 319 |
+
"freq, exactly three must be specified"
|
| 320 |
+
)
|
| 321 |
+
with pytest.raises(ValueError, match=msg):
|
| 322 |
+
date_range(start, end, periods=10, freq="s")
|
| 323 |
+
|
| 324 |
+
def test_date_range_convenience_periods(self, unit):
|
| 325 |
+
# GH 20808
|
| 326 |
+
result = date_range("2018-04-24", "2018-04-27", periods=3, unit=unit)
|
| 327 |
+
expected = DatetimeIndex(
|
| 328 |
+
["2018-04-24 00:00:00", "2018-04-25 12:00:00", "2018-04-27 00:00:00"],
|
| 329 |
+
dtype=f"M8[{unit}]",
|
| 330 |
+
freq=None,
|
| 331 |
+
)
|
| 332 |
+
|
| 333 |
+
tm.assert_index_equal(result, expected)
|
| 334 |
+
|
| 335 |
+
# Test if spacing remains linear if tz changes to dst in range
|
| 336 |
+
result = date_range(
|
| 337 |
+
"2018-04-01 01:00:00",
|
| 338 |
+
"2018-04-01 04:00:00",
|
| 339 |
+
tz="Australia/Sydney",
|
| 340 |
+
periods=3,
|
| 341 |
+
unit=unit,
|
| 342 |
+
)
|
| 343 |
+
expected = DatetimeIndex(
|
| 344 |
+
[
|
| 345 |
+
Timestamp("2018-04-01 01:00:00+1100", tz="Australia/Sydney"),
|
| 346 |
+
Timestamp("2018-04-01 02:00:00+1000", tz="Australia/Sydney"),
|
| 347 |
+
Timestamp("2018-04-01 04:00:00+1000", tz="Australia/Sydney"),
|
| 348 |
+
]
|
| 349 |
+
).as_unit(unit)
|
| 350 |
+
tm.assert_index_equal(result, expected)
|
| 351 |
+
|
| 352 |
+
def test_date_range_index_comparison(self):
|
| 353 |
+
rng = date_range("2011-01-01", periods=3, tz="US/Eastern")
|
| 354 |
+
df = Series(rng).to_frame()
|
| 355 |
+
arr = np.array([rng.to_list()]).T
|
| 356 |
+
arr2 = np.array([rng]).T
|
| 357 |
+
|
| 358 |
+
with pytest.raises(ValueError, match="Unable to coerce to Series"):
|
| 359 |
+
rng == df
|
| 360 |
+
|
| 361 |
+
with pytest.raises(ValueError, match="Unable to coerce to Series"):
|
| 362 |
+
df == rng
|
| 363 |
+
|
| 364 |
+
expected = DataFrame([True, True, True])
|
| 365 |
+
|
| 366 |
+
results = df == arr2
|
| 367 |
+
tm.assert_frame_equal(results, expected)
|
| 368 |
+
|
| 369 |
+
expected = Series([True, True, True], name=0)
|
| 370 |
+
|
| 371 |
+
results = df[0] == arr2[:, 0]
|
| 372 |
+
tm.assert_series_equal(results, expected)
|
| 373 |
+
|
| 374 |
+
expected = np.array(
|
| 375 |
+
[[True, False, False], [False, True, False], [False, False, True]]
|
| 376 |
+
)
|
| 377 |
+
results = rng == arr
|
| 378 |
+
tm.assert_numpy_array_equal(results, expected)
|
| 379 |
+
|
| 380 |
+
@pytest.mark.parametrize(
|
| 381 |
+
"start,end,result_tz",
|
| 382 |
+
[
|
| 383 |
+
["20180101", "20180103", "US/Eastern"],
|
| 384 |
+
[datetime(2018, 1, 1), datetime(2018, 1, 3), "US/Eastern"],
|
| 385 |
+
[Timestamp("20180101"), Timestamp("20180103"), "US/Eastern"],
|
| 386 |
+
[
|
| 387 |
+
Timestamp("20180101", tz="US/Eastern"),
|
| 388 |
+
Timestamp("20180103", tz="US/Eastern"),
|
| 389 |
+
"US/Eastern",
|
| 390 |
+
],
|
| 391 |
+
[
|
| 392 |
+
Timestamp("20180101", tz="US/Eastern"),
|
| 393 |
+
Timestamp("20180103", tz="US/Eastern"),
|
| 394 |
+
None,
|
| 395 |
+
],
|
| 396 |
+
],
|
| 397 |
+
)
|
| 398 |
+
def test_date_range_linspacing_tz(self, start, end, result_tz):
|
| 399 |
+
# GH 20983
|
| 400 |
+
result = date_range(start, end, periods=3, tz=result_tz)
|
| 401 |
+
expected = date_range("20180101", periods=3, freq="D", tz="US/Eastern")
|
| 402 |
+
tm.assert_index_equal(result, expected)
|
| 403 |
+
|
| 404 |
+
def test_date_range_timedelta(self):
|
| 405 |
+
start = "2020-01-01"
|
| 406 |
+
end = "2020-01-11"
|
| 407 |
+
rng1 = date_range(start, end, freq="3D")
|
| 408 |
+
rng2 = date_range(start, end, freq=timedelta(days=3))
|
| 409 |
+
tm.assert_index_equal(rng1, rng2)
|
| 410 |
+
|
| 411 |
+
def test_range_misspecified(self):
|
| 412 |
+
# GH #1095
|
| 413 |
+
msg = (
|
| 414 |
+
"Of the four parameters: start, end, periods, and "
|
| 415 |
+
"freq, exactly three must be specified"
|
| 416 |
+
)
|
| 417 |
+
|
| 418 |
+
with pytest.raises(ValueError, match=msg):
|
| 419 |
+
date_range(start="1/1/2000")
|
| 420 |
+
|
| 421 |
+
with pytest.raises(ValueError, match=msg):
|
| 422 |
+
date_range(end="1/1/2000")
|
| 423 |
+
|
| 424 |
+
with pytest.raises(ValueError, match=msg):
|
| 425 |
+
date_range(periods=10)
|
| 426 |
+
|
| 427 |
+
with pytest.raises(ValueError, match=msg):
|
| 428 |
+
date_range(start="1/1/2000", freq="h")
|
| 429 |
+
|
| 430 |
+
with pytest.raises(ValueError, match=msg):
|
| 431 |
+
date_range(end="1/1/2000", freq="h")
|
| 432 |
+
|
| 433 |
+
with pytest.raises(ValueError, match=msg):
|
| 434 |
+
date_range(periods=10, freq="h")
|
| 435 |
+
|
| 436 |
+
with pytest.raises(ValueError, match=msg):
|
| 437 |
+
date_range()
|
| 438 |
+
|
| 439 |
+
def test_compat_replace(self):
|
| 440 |
+
# https://github.com/statsmodels/statsmodels/issues/3349
|
| 441 |
+
# replace should take ints/longs for compat
|
| 442 |
+
result = date_range(Timestamp("1960-04-01 00:00:00"), periods=76, freq="QS-JAN")
|
| 443 |
+
assert len(result) == 76
|
| 444 |
+
|
| 445 |
+
def test_catch_infinite_loop(self):
|
| 446 |
+
offset = offsets.DateOffset(minute=5)
|
| 447 |
+
# blow up, don't loop forever
|
| 448 |
+
msg = "Offset <DateOffset: minute=5> did not increment date"
|
| 449 |
+
with pytest.raises(ValueError, match=msg):
|
| 450 |
+
date_range(datetime(2011, 11, 11), datetime(2011, 11, 12), freq=offset)
|
| 451 |
+
|
| 452 |
+
def test_construct_over_dst(self, unit):
|
| 453 |
+
# GH 20854
|
| 454 |
+
pre_dst = Timestamp("2010-11-07 01:00:00").tz_localize(
|
| 455 |
+
"US/Pacific", ambiguous=True
|
| 456 |
+
)
|
| 457 |
+
pst_dst = Timestamp("2010-11-07 01:00:00").tz_localize(
|
| 458 |
+
"US/Pacific", ambiguous=False
|
| 459 |
+
)
|
| 460 |
+
expect_data = [
|
| 461 |
+
Timestamp("2010-11-07 00:00:00", tz="US/Pacific"),
|
| 462 |
+
pre_dst,
|
| 463 |
+
pst_dst,
|
| 464 |
+
]
|
| 465 |
+
expected = DatetimeIndex(expect_data, freq="h").as_unit(unit)
|
| 466 |
+
result = date_range(
|
| 467 |
+
start="2010-11-7", periods=3, freq="h", tz="US/Pacific", unit=unit
|
| 468 |
+
)
|
| 469 |
+
tm.assert_index_equal(result, expected)
|
| 470 |
+
|
| 471 |
+
def test_construct_with_different_start_end_string_format(self, unit):
|
| 472 |
+
# GH 12064
|
| 473 |
+
result = date_range(
|
| 474 |
+
"2013-01-01 00:00:00+09:00",
|
| 475 |
+
"2013/01/01 02:00:00+09:00",
|
| 476 |
+
freq="h",
|
| 477 |
+
unit=unit,
|
| 478 |
+
)
|
| 479 |
+
expected = DatetimeIndex(
|
| 480 |
+
[
|
| 481 |
+
Timestamp("2013-01-01 00:00:00+09:00"),
|
| 482 |
+
Timestamp("2013-01-01 01:00:00+09:00"),
|
| 483 |
+
Timestamp("2013-01-01 02:00:00+09:00"),
|
| 484 |
+
],
|
| 485 |
+
freq="h",
|
| 486 |
+
).as_unit(unit)
|
| 487 |
+
tm.assert_index_equal(result, expected)
|
| 488 |
+
|
| 489 |
+
def test_error_with_zero_monthends(self):
|
| 490 |
+
msg = r"Offset <0 \* MonthEnds> did not increment date"
|
| 491 |
+
with pytest.raises(ValueError, match=msg):
|
| 492 |
+
date_range("1/1/2000", "1/1/2001", freq=MonthEnd(0))
|
| 493 |
+
|
| 494 |
+
def test_range_bug(self, unit):
|
| 495 |
+
# GH #770
|
| 496 |
+
offset = DateOffset(months=3)
|
| 497 |
+
result = date_range("2011-1-1", "2012-1-31", freq=offset, unit=unit)
|
| 498 |
+
|
| 499 |
+
start = datetime(2011, 1, 1)
|
| 500 |
+
expected = DatetimeIndex(
|
| 501 |
+
[start + i * offset for i in range(5)], dtype=f"M8[{unit}]", freq=offset
|
| 502 |
+
)
|
| 503 |
+
tm.assert_index_equal(result, expected)
|
| 504 |
+
|
| 505 |
+
def test_range_tz_pytz(self):
|
| 506 |
+
# see gh-2906
|
| 507 |
+
tz = timezone("US/Eastern")
|
| 508 |
+
start = tz.localize(datetime(2011, 1, 1))
|
| 509 |
+
end = tz.localize(datetime(2011, 1, 3))
|
| 510 |
+
|
| 511 |
+
dr = date_range(start=start, periods=3)
|
| 512 |
+
assert dr.tz.zone == tz.zone
|
| 513 |
+
assert dr[0] == start
|
| 514 |
+
assert dr[2] == end
|
| 515 |
+
|
| 516 |
+
dr = date_range(end=end, periods=3)
|
| 517 |
+
assert dr.tz.zone == tz.zone
|
| 518 |
+
assert dr[0] == start
|
| 519 |
+
assert dr[2] == end
|
| 520 |
+
|
| 521 |
+
dr = date_range(start=start, end=end)
|
| 522 |
+
assert dr.tz.zone == tz.zone
|
| 523 |
+
assert dr[0] == start
|
| 524 |
+
assert dr[2] == end
|
| 525 |
+
|
| 526 |
+
@pytest.mark.parametrize(
|
| 527 |
+
"start, end",
|
| 528 |
+
[
|
| 529 |
+
[
|
| 530 |
+
Timestamp(datetime(2014, 3, 6), tz="US/Eastern"),
|
| 531 |
+
Timestamp(datetime(2014, 3, 12), tz="US/Eastern"),
|
| 532 |
+
],
|
| 533 |
+
[
|
| 534 |
+
Timestamp(datetime(2013, 11, 1), tz="US/Eastern"),
|
| 535 |
+
Timestamp(datetime(2013, 11, 6), tz="US/Eastern"),
|
| 536 |
+
],
|
| 537 |
+
],
|
| 538 |
+
)
|
| 539 |
+
def test_range_tz_dst_straddle_pytz(self, start, end):
|
| 540 |
+
dr = date_range(start, end, freq="D")
|
| 541 |
+
assert dr[0] == start
|
| 542 |
+
assert dr[-1] == end
|
| 543 |
+
assert np.all(dr.hour == 0)
|
| 544 |
+
|
| 545 |
+
dr = date_range(start, end, freq="D", tz="US/Eastern")
|
| 546 |
+
assert dr[0] == start
|
| 547 |
+
assert dr[-1] == end
|
| 548 |
+
assert np.all(dr.hour == 0)
|
| 549 |
+
|
| 550 |
+
dr = date_range(
|
| 551 |
+
start.replace(tzinfo=None),
|
| 552 |
+
end.replace(tzinfo=None),
|
| 553 |
+
freq="D",
|
| 554 |
+
tz="US/Eastern",
|
| 555 |
+
)
|
| 556 |
+
assert dr[0] == start
|
| 557 |
+
assert dr[-1] == end
|
| 558 |
+
assert np.all(dr.hour == 0)
|
| 559 |
+
|
| 560 |
+
def test_range_tz_dateutil(self):
|
| 561 |
+
# see gh-2906
|
| 562 |
+
|
| 563 |
+
# Use maybe_get_tz to fix filename in tz under dateutil.
|
| 564 |
+
from pandas._libs.tslibs.timezones import maybe_get_tz
|
| 565 |
+
|
| 566 |
+
tz = lambda x: maybe_get_tz("dateutil/" + x)
|
| 567 |
+
|
| 568 |
+
start = datetime(2011, 1, 1, tzinfo=tz("US/Eastern"))
|
| 569 |
+
end = datetime(2011, 1, 3, tzinfo=tz("US/Eastern"))
|
| 570 |
+
|
| 571 |
+
dr = date_range(start=start, periods=3)
|
| 572 |
+
assert dr.tz == tz("US/Eastern")
|
| 573 |
+
assert dr[0] == start
|
| 574 |
+
assert dr[2] == end
|
| 575 |
+
|
| 576 |
+
dr = date_range(end=end, periods=3)
|
| 577 |
+
assert dr.tz == tz("US/Eastern")
|
| 578 |
+
assert dr[0] == start
|
| 579 |
+
assert dr[2] == end
|
| 580 |
+
|
| 581 |
+
dr = date_range(start=start, end=end)
|
| 582 |
+
assert dr.tz == tz("US/Eastern")
|
| 583 |
+
assert dr[0] == start
|
| 584 |
+
assert dr[2] == end
|
| 585 |
+
|
| 586 |
+
@pytest.mark.parametrize("freq", ["1D", "3D", "2ME", "7W", "3h", "YE"])
|
| 587 |
+
@pytest.mark.parametrize("tz", [None, "US/Eastern"])
|
| 588 |
+
def test_range_closed(self, freq, tz, inclusive_endpoints_fixture):
|
| 589 |
+
# GH#12409, GH#12684
|
| 590 |
+
|
| 591 |
+
begin = Timestamp("2011/1/1", tz=tz)
|
| 592 |
+
end = Timestamp("2014/1/1", tz=tz)
|
| 593 |
+
|
| 594 |
+
result_range = date_range(
|
| 595 |
+
begin, end, inclusive=inclusive_endpoints_fixture, freq=freq
|
| 596 |
+
)
|
| 597 |
+
both_range = date_range(begin, end, inclusive="both", freq=freq)
|
| 598 |
+
expected_range = _get_expected_range(
|
| 599 |
+
begin, end, both_range, inclusive_endpoints_fixture
|
| 600 |
+
)
|
| 601 |
+
|
| 602 |
+
tm.assert_index_equal(expected_range, result_range)
|
| 603 |
+
|
| 604 |
+
@pytest.mark.parametrize("freq", ["1D", "3D", "2ME", "7W", "3h", "YE"])
|
| 605 |
+
def test_range_with_tz_closed_with_tz_aware_start_end(
|
| 606 |
+
self, freq, inclusive_endpoints_fixture
|
| 607 |
+
):
|
| 608 |
+
begin = Timestamp("2011/1/1")
|
| 609 |
+
end = Timestamp("2014/1/1")
|
| 610 |
+
begintz = Timestamp("2011/1/1", tz="US/Eastern")
|
| 611 |
+
endtz = Timestamp("2014/1/1", tz="US/Eastern")
|
| 612 |
+
|
| 613 |
+
result_range = date_range(
|
| 614 |
+
begin,
|
| 615 |
+
end,
|
| 616 |
+
inclusive=inclusive_endpoints_fixture,
|
| 617 |
+
freq=freq,
|
| 618 |
+
tz="US/Eastern",
|
| 619 |
+
)
|
| 620 |
+
both_range = date_range(
|
| 621 |
+
begin, end, inclusive="both", freq=freq, tz="US/Eastern"
|
| 622 |
+
)
|
| 623 |
+
expected_range = _get_expected_range(
|
| 624 |
+
begintz,
|
| 625 |
+
endtz,
|
| 626 |
+
both_range,
|
| 627 |
+
inclusive_endpoints_fixture,
|
| 628 |
+
)
|
| 629 |
+
|
| 630 |
+
tm.assert_index_equal(expected_range, result_range)
|
| 631 |
+
|
| 632 |
+
def test_range_closed_boundary(self, inclusive_endpoints_fixture):
|
| 633 |
+
# GH#11804
|
| 634 |
+
right_boundary = date_range(
|
| 635 |
+
"2015-09-12",
|
| 636 |
+
"2015-12-01",
|
| 637 |
+
freq="QS-MAR",
|
| 638 |
+
inclusive=inclusive_endpoints_fixture,
|
| 639 |
+
)
|
| 640 |
+
left_boundary = date_range(
|
| 641 |
+
"2015-09-01",
|
| 642 |
+
"2015-09-12",
|
| 643 |
+
freq="QS-MAR",
|
| 644 |
+
inclusive=inclusive_endpoints_fixture,
|
| 645 |
+
)
|
| 646 |
+
both_boundary = date_range(
|
| 647 |
+
"2015-09-01",
|
| 648 |
+
"2015-12-01",
|
| 649 |
+
freq="QS-MAR",
|
| 650 |
+
inclusive=inclusive_endpoints_fixture,
|
| 651 |
+
)
|
| 652 |
+
neither_boundary = date_range(
|
| 653 |
+
"2015-09-11",
|
| 654 |
+
"2015-09-12",
|
| 655 |
+
freq="QS-MAR",
|
| 656 |
+
inclusive=inclusive_endpoints_fixture,
|
| 657 |
+
)
|
| 658 |
+
|
| 659 |
+
expected_right = both_boundary
|
| 660 |
+
expected_left = both_boundary
|
| 661 |
+
expected_both = both_boundary
|
| 662 |
+
|
| 663 |
+
if inclusive_endpoints_fixture == "right":
|
| 664 |
+
expected_left = both_boundary[1:]
|
| 665 |
+
elif inclusive_endpoints_fixture == "left":
|
| 666 |
+
expected_right = both_boundary[:-1]
|
| 667 |
+
elif inclusive_endpoints_fixture == "both":
|
| 668 |
+
expected_right = both_boundary[1:]
|
| 669 |
+
expected_left = both_boundary[:-1]
|
| 670 |
+
|
| 671 |
+
expected_neither = both_boundary[1:-1]
|
| 672 |
+
|
| 673 |
+
tm.assert_index_equal(right_boundary, expected_right)
|
| 674 |
+
tm.assert_index_equal(left_boundary, expected_left)
|
| 675 |
+
tm.assert_index_equal(both_boundary, expected_both)
|
| 676 |
+
tm.assert_index_equal(neither_boundary, expected_neither)
|
| 677 |
+
|
| 678 |
+
def test_date_range_years_only(self, tz_naive_fixture):
|
| 679 |
+
tz = tz_naive_fixture
|
| 680 |
+
# GH#6961
|
| 681 |
+
rng1 = date_range("2014", "2015", freq="ME", tz=tz)
|
| 682 |
+
expected1 = date_range("2014-01-31", "2014-12-31", freq="ME", tz=tz)
|
| 683 |
+
tm.assert_index_equal(rng1, expected1)
|
| 684 |
+
|
| 685 |
+
rng2 = date_range("2014", "2015", freq="MS", tz=tz)
|
| 686 |
+
expected2 = date_range("2014-01-01", "2015-01-01", freq="MS", tz=tz)
|
| 687 |
+
tm.assert_index_equal(rng2, expected2)
|
| 688 |
+
|
| 689 |
+
rng3 = date_range("2014", "2020", freq="YE", tz=tz)
|
| 690 |
+
expected3 = date_range("2014-12-31", "2019-12-31", freq="YE", tz=tz)
|
| 691 |
+
tm.assert_index_equal(rng3, expected3)
|
| 692 |
+
|
| 693 |
+
rng4 = date_range("2014", "2020", freq="YS", tz=tz)
|
| 694 |
+
expected4 = date_range("2014-01-01", "2020-01-01", freq="YS", tz=tz)
|
| 695 |
+
tm.assert_index_equal(rng4, expected4)
|
| 696 |
+
|
| 697 |
+
def test_freq_divides_end_in_nanos(self):
|
| 698 |
+
# GH 10885
|
| 699 |
+
result_1 = date_range("2005-01-12 10:00", "2005-01-12 16:00", freq="345min")
|
| 700 |
+
result_2 = date_range("2005-01-13 10:00", "2005-01-13 16:00", freq="345min")
|
| 701 |
+
expected_1 = DatetimeIndex(
|
| 702 |
+
["2005-01-12 10:00:00", "2005-01-12 15:45:00"],
|
| 703 |
+
dtype="datetime64[ns]",
|
| 704 |
+
freq="345min",
|
| 705 |
+
tz=None,
|
| 706 |
+
)
|
| 707 |
+
expected_2 = DatetimeIndex(
|
| 708 |
+
["2005-01-13 10:00:00", "2005-01-13 15:45:00"],
|
| 709 |
+
dtype="datetime64[ns]",
|
| 710 |
+
freq="345min",
|
| 711 |
+
tz=None,
|
| 712 |
+
)
|
| 713 |
+
tm.assert_index_equal(result_1, expected_1)
|
| 714 |
+
tm.assert_index_equal(result_2, expected_2)
|
| 715 |
+
|
| 716 |
+
def test_cached_range_bug(self):
|
| 717 |
+
rng = date_range("2010-09-01 05:00:00", periods=50, freq=DateOffset(hours=6))
|
| 718 |
+
assert len(rng) == 50
|
| 719 |
+
assert rng[0] == datetime(2010, 9, 1, 5)
|
| 720 |
+
|
| 721 |
+
def test_timezone_comparison_bug(self):
|
| 722 |
+
# smoke test
|
| 723 |
+
start = Timestamp("20130220 10:00", tz="US/Eastern")
|
| 724 |
+
result = date_range(start, periods=2, tz="US/Eastern")
|
| 725 |
+
assert len(result) == 2
|
| 726 |
+
|
| 727 |
+
def test_timezone_comparison_assert(self):
|
| 728 |
+
start = Timestamp("20130220 10:00", tz="US/Eastern")
|
| 729 |
+
msg = "Inferred time zone not equal to passed time zone"
|
| 730 |
+
with pytest.raises(AssertionError, match=msg):
|
| 731 |
+
date_range(start, periods=2, tz="Europe/Berlin")
|
| 732 |
+
|
| 733 |
+
def test_negative_non_tick_frequency_descending_dates(self, tz_aware_fixture):
|
| 734 |
+
# GH 23270
|
| 735 |
+
tz = tz_aware_fixture
|
| 736 |
+
result = date_range(start="2011-06-01", end="2011-01-01", freq="-1MS", tz=tz)
|
| 737 |
+
expected = date_range(end="2011-06-01", start="2011-01-01", freq="1MS", tz=tz)[
|
| 738 |
+
::-1
|
| 739 |
+
]
|
| 740 |
+
tm.assert_index_equal(result, expected)
|
| 741 |
+
|
| 742 |
+
def test_range_where_start_equal_end(self, inclusive_endpoints_fixture):
|
| 743 |
+
# GH 43394
|
| 744 |
+
start = "2021-09-02"
|
| 745 |
+
end = "2021-09-02"
|
| 746 |
+
result = date_range(
|
| 747 |
+
start=start, end=end, freq="D", inclusive=inclusive_endpoints_fixture
|
| 748 |
+
)
|
| 749 |
+
|
| 750 |
+
both_range = date_range(start=start, end=end, freq="D", inclusive="both")
|
| 751 |
+
if inclusive_endpoints_fixture == "neither":
|
| 752 |
+
expected = both_range[1:-1]
|
| 753 |
+
elif inclusive_endpoints_fixture in ("left", "right", "both"):
|
| 754 |
+
expected = both_range[:]
|
| 755 |
+
|
| 756 |
+
tm.assert_index_equal(result, expected)
|
| 757 |
+
|
| 758 |
+
def test_freq_dateoffset_with_relateivedelta_nanos(self):
|
| 759 |
+
# GH 46877
|
| 760 |
+
freq = DateOffset(hours=10, days=57, nanoseconds=3)
|
| 761 |
+
result = date_range(end="1970-01-01 00:00:00", periods=10, freq=freq, name="a")
|
| 762 |
+
expected = DatetimeIndex(
|
| 763 |
+
[
|
| 764 |
+
"1968-08-02T05:59:59.999999973",
|
| 765 |
+
"1968-09-28T15:59:59.999999976",
|
| 766 |
+
"1968-11-25T01:59:59.999999979",
|
| 767 |
+
"1969-01-21T11:59:59.999999982",
|
| 768 |
+
"1969-03-19T21:59:59.999999985",
|
| 769 |
+
"1969-05-16T07:59:59.999999988",
|
| 770 |
+
"1969-07-12T17:59:59.999999991",
|
| 771 |
+
"1969-09-08T03:59:59.999999994",
|
| 772 |
+
"1969-11-04T13:59:59.999999997",
|
| 773 |
+
"1970-01-01T00:00:00.000000000",
|
| 774 |
+
],
|
| 775 |
+
name="a",
|
| 776 |
+
)
|
| 777 |
+
tm.assert_index_equal(result, expected)
|
| 778 |
+
|
| 779 |
+
@pytest.mark.parametrize(
|
| 780 |
+
"freq,freq_depr",
|
| 781 |
+
[
|
| 782 |
+
("h", "H"),
|
| 783 |
+
("2min", "2T"),
|
| 784 |
+
("1s", "1S"),
|
| 785 |
+
("2ms", "2L"),
|
| 786 |
+
("1us", "1U"),
|
| 787 |
+
("2ns", "2N"),
|
| 788 |
+
],
|
| 789 |
+
)
|
| 790 |
+
def test_frequencies_H_T_S_L_U_N_deprecated(self, freq, freq_depr):
|
| 791 |
+
# GH#52536
|
| 792 |
+
freq_msg = re.split("[0-9]*", freq, maxsplit=1)[1]
|
| 793 |
+
freq_depr_msg = re.split("[0-9]*", freq_depr, maxsplit=1)[1]
|
| 794 |
+
msg = (
|
| 795 |
+
f"'{freq_depr_msg}' is deprecated and will be removed in a future version, "
|
| 796 |
+
)
|
| 797 |
+
f"please use '{freq_msg}' instead"
|
| 798 |
+
|
| 799 |
+
expected = date_range("1/1/2000", periods=2, freq=freq)
|
| 800 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 801 |
+
result = date_range("1/1/2000", periods=2, freq=freq_depr)
|
| 802 |
+
tm.assert_index_equal(result, expected)
|
| 803 |
+
|
| 804 |
+
@pytest.mark.parametrize(
|
| 805 |
+
"freq,freq_depr",
|
| 806 |
+
[
|
| 807 |
+
("200YE", "200A"),
|
| 808 |
+
("YE", "Y"),
|
| 809 |
+
("2YE-MAY", "2A-MAY"),
|
| 810 |
+
("YE-MAY", "Y-MAY"),
|
| 811 |
+
],
|
| 812 |
+
)
|
| 813 |
+
def test_frequencies_A_deprecated_Y_renamed(self, freq, freq_depr):
|
| 814 |
+
# GH#9586, GH#54275
|
| 815 |
+
freq_msg = re.split("[0-9]*", freq, maxsplit=1)[1]
|
| 816 |
+
freq_depr_msg = re.split("[0-9]*", freq_depr, maxsplit=1)[1]
|
| 817 |
+
msg = f"'{freq_depr_msg}' is deprecated and will be removed "
|
| 818 |
+
f"in a future version, please use '{freq_msg}' instead."
|
| 819 |
+
|
| 820 |
+
expected = date_range("1/1/2000", periods=2, freq=freq)
|
| 821 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 822 |
+
result = date_range("1/1/2000", periods=2, freq=freq_depr)
|
| 823 |
+
tm.assert_index_equal(result, expected)
|
| 824 |
+
|
| 825 |
+
def test_to_offset_with_lowercase_deprecated_freq(self) -> None:
|
| 826 |
+
# https://github.com/pandas-dev/pandas/issues/56847
|
| 827 |
+
msg = (
|
| 828 |
+
"'m' is deprecated and will be removed in a future version, please use "
|
| 829 |
+
"'ME' instead."
|
| 830 |
+
)
|
| 831 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 832 |
+
result = date_range("2010-01-01", periods=2, freq="m")
|
| 833 |
+
expected = DatetimeIndex(["2010-01-31", "2010-02-28"], freq="ME")
|
| 834 |
+
tm.assert_index_equal(result, expected)
|
| 835 |
+
|
| 836 |
+
def test_date_range_bday(self):
|
| 837 |
+
sdate = datetime(1999, 12, 25)
|
| 838 |
+
idx = date_range(start=sdate, freq="1B", periods=20)
|
| 839 |
+
assert len(idx) == 20
|
| 840 |
+
assert idx[0] == sdate + 0 * offsets.BDay()
|
| 841 |
+
assert idx.freq == "B"
|
| 842 |
+
|
| 843 |
+
|
| 844 |
+
class TestDateRangeTZ:
|
| 845 |
+
"""Tests for date_range with timezones"""
|
| 846 |
+
|
| 847 |
+
def test_hongkong_tz_convert(self):
|
| 848 |
+
# GH#1673 smoke test
|
| 849 |
+
dr = date_range("2012-01-01", "2012-01-10", freq="D", tz="Hongkong")
|
| 850 |
+
|
| 851 |
+
# it works!
|
| 852 |
+
dr.hour
|
| 853 |
+
|
| 854 |
+
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
|
| 855 |
+
def test_date_range_span_dst_transition(self, tzstr):
|
| 856 |
+
# GH#1778
|
| 857 |
+
|
| 858 |
+
# Standard -> Daylight Savings Time
|
| 859 |
+
dr = date_range("03/06/2012 00:00", periods=200, freq="W-FRI", tz="US/Eastern")
|
| 860 |
+
|
| 861 |
+
assert (dr.hour == 0).all()
|
| 862 |
+
|
| 863 |
+
dr = date_range("2012-11-02", periods=10, tz=tzstr)
|
| 864 |
+
result = dr.hour
|
| 865 |
+
expected = pd.Index([0] * 10, dtype="int32")
|
| 866 |
+
tm.assert_index_equal(result, expected)
|
| 867 |
+
|
| 868 |
+
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
|
| 869 |
+
def test_date_range_timezone_str_argument(self, tzstr):
|
| 870 |
+
tz = timezones.maybe_get_tz(tzstr)
|
| 871 |
+
result = date_range("1/1/2000", periods=10, tz=tzstr)
|
| 872 |
+
expected = date_range("1/1/2000", periods=10, tz=tz)
|
| 873 |
+
|
| 874 |
+
tm.assert_index_equal(result, expected)
|
| 875 |
+
|
| 876 |
+
def test_date_range_with_fixed_tz(self):
|
| 877 |
+
off = FixedOffset(420, "+07:00")
|
| 878 |
+
start = datetime(2012, 3, 11, 5, 0, 0, tzinfo=off)
|
| 879 |
+
end = datetime(2012, 6, 11, 5, 0, 0, tzinfo=off)
|
| 880 |
+
rng = date_range(start=start, end=end)
|
| 881 |
+
assert off == rng.tz
|
| 882 |
+
|
| 883 |
+
rng2 = date_range(start, periods=len(rng), tz=off)
|
| 884 |
+
tm.assert_index_equal(rng, rng2)
|
| 885 |
+
|
| 886 |
+
rng3 = date_range("3/11/2012 05:00:00+07:00", "6/11/2012 05:00:00+07:00")
|
| 887 |
+
assert (rng.values == rng3.values).all()
|
| 888 |
+
|
| 889 |
+
def test_date_range_with_fixedoffset_noname(self):
|
| 890 |
+
off = fixed_off_no_name
|
| 891 |
+
start = datetime(2012, 3, 11, 5, 0, 0, tzinfo=off)
|
| 892 |
+
end = datetime(2012, 6, 11, 5, 0, 0, tzinfo=off)
|
| 893 |
+
rng = date_range(start=start, end=end)
|
| 894 |
+
assert off == rng.tz
|
| 895 |
+
|
| 896 |
+
idx = pd.Index([start, end])
|
| 897 |
+
assert off == idx.tz
|
| 898 |
+
|
| 899 |
+
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
|
| 900 |
+
def test_date_range_with_tz(self, tzstr):
|
| 901 |
+
stamp = Timestamp("3/11/2012 05:00", tz=tzstr)
|
| 902 |
+
assert stamp.hour == 5
|
| 903 |
+
|
| 904 |
+
rng = date_range("3/11/2012 04:00", periods=10, freq="h", tz=tzstr)
|
| 905 |
+
|
| 906 |
+
assert stamp == rng[1]
|
| 907 |
+
|
| 908 |
+
@pytest.mark.parametrize("tz", ["Europe/London", "dateutil/Europe/London"])
|
| 909 |
+
def test_date_range_ambiguous_endpoint(self, tz):
|
| 910 |
+
# construction with an ambiguous end-point
|
| 911 |
+
# GH#11626
|
| 912 |
+
|
| 913 |
+
with pytest.raises(pytz.AmbiguousTimeError, match="Cannot infer dst time"):
|
| 914 |
+
date_range(
|
| 915 |
+
"2013-10-26 23:00", "2013-10-27 01:00", tz="Europe/London", freq="h"
|
| 916 |
+
)
|
| 917 |
+
|
| 918 |
+
times = date_range(
|
| 919 |
+
"2013-10-26 23:00", "2013-10-27 01:00", freq="h", tz=tz, ambiguous="infer"
|
| 920 |
+
)
|
| 921 |
+
assert times[0] == Timestamp("2013-10-26 23:00", tz=tz)
|
| 922 |
+
assert times[-1] == Timestamp("2013-10-27 01:00:00+0000", tz=tz)
|
| 923 |
+
|
| 924 |
+
@pytest.mark.parametrize(
|
| 925 |
+
"tz, option, expected",
|
| 926 |
+
[
|
| 927 |
+
["US/Pacific", "shift_forward", "2019-03-10 03:00"],
|
| 928 |
+
["dateutil/US/Pacific", "shift_forward", "2019-03-10 03:00"],
|
| 929 |
+
["US/Pacific", "shift_backward", "2019-03-10 01:00"],
|
| 930 |
+
["dateutil/US/Pacific", "shift_backward", "2019-03-10 01:00"],
|
| 931 |
+
["US/Pacific", timedelta(hours=1), "2019-03-10 03:00"],
|
| 932 |
+
],
|
| 933 |
+
)
|
| 934 |
+
def test_date_range_nonexistent_endpoint(self, tz, option, expected):
|
| 935 |
+
# construction with an nonexistent end-point
|
| 936 |
+
|
| 937 |
+
with pytest.raises(pytz.NonExistentTimeError, match="2019-03-10 02:00:00"):
|
| 938 |
+
date_range(
|
| 939 |
+
"2019-03-10 00:00", "2019-03-10 02:00", tz="US/Pacific", freq="h"
|
| 940 |
+
)
|
| 941 |
+
|
| 942 |
+
times = date_range(
|
| 943 |
+
"2019-03-10 00:00", "2019-03-10 02:00", freq="h", tz=tz, nonexistent=option
|
| 944 |
+
)
|
| 945 |
+
assert times[-1] == Timestamp(expected, tz=tz)
|
| 946 |
+
|
| 947 |
+
|
| 948 |
+
class TestGenRangeGeneration:
|
| 949 |
+
@pytest.mark.parametrize(
|
| 950 |
+
"freqstr,offset",
|
| 951 |
+
[
|
| 952 |
+
("B", BDay()),
|
| 953 |
+
("C", CDay()),
|
| 954 |
+
],
|
| 955 |
+
)
|
| 956 |
+
def test_generate(self, freqstr, offset):
|
| 957 |
+
rng1 = list(generate_range(START, END, periods=None, offset=offset, unit="ns"))
|
| 958 |
+
rng2 = list(generate_range(START, END, periods=None, offset=freqstr, unit="ns"))
|
| 959 |
+
assert rng1 == rng2
|
| 960 |
+
|
| 961 |
+
def test_1(self):
|
| 962 |
+
rng = list(
|
| 963 |
+
generate_range(
|
| 964 |
+
start=datetime(2009, 3, 25),
|
| 965 |
+
end=None,
|
| 966 |
+
periods=2,
|
| 967 |
+
offset=BDay(),
|
| 968 |
+
unit="ns",
|
| 969 |
+
)
|
| 970 |
+
)
|
| 971 |
+
expected = [datetime(2009, 3, 25), datetime(2009, 3, 26)]
|
| 972 |
+
assert rng == expected
|
| 973 |
+
|
| 974 |
+
def test_2(self):
|
| 975 |
+
rng = list(
|
| 976 |
+
generate_range(
|
| 977 |
+
start=datetime(2008, 1, 1),
|
| 978 |
+
end=datetime(2008, 1, 3),
|
| 979 |
+
periods=None,
|
| 980 |
+
offset=BDay(),
|
| 981 |
+
unit="ns",
|
| 982 |
+
)
|
| 983 |
+
)
|
| 984 |
+
expected = [datetime(2008, 1, 1), datetime(2008, 1, 2), datetime(2008, 1, 3)]
|
| 985 |
+
assert rng == expected
|
| 986 |
+
|
| 987 |
+
def test_3(self):
|
| 988 |
+
rng = list(
|
| 989 |
+
generate_range(
|
| 990 |
+
start=datetime(2008, 1, 5),
|
| 991 |
+
end=datetime(2008, 1, 6),
|
| 992 |
+
periods=None,
|
| 993 |
+
offset=BDay(),
|
| 994 |
+
unit="ns",
|
| 995 |
+
)
|
| 996 |
+
)
|
| 997 |
+
expected = []
|
| 998 |
+
assert rng == expected
|
| 999 |
+
|
| 1000 |
+
def test_precision_finer_than_offset(self):
|
| 1001 |
+
# GH#9907
|
| 1002 |
+
result1 = date_range(
|
| 1003 |
+
start="2015-04-15 00:00:03", end="2016-04-22 00:00:00", freq="QE"
|
| 1004 |
+
)
|
| 1005 |
+
result2 = date_range(
|
| 1006 |
+
start="2015-04-15 00:00:03", end="2015-06-22 00:00:04", freq="W"
|
| 1007 |
+
)
|
| 1008 |
+
expected1_list = [
|
| 1009 |
+
"2015-06-30 00:00:03",
|
| 1010 |
+
"2015-09-30 00:00:03",
|
| 1011 |
+
"2015-12-31 00:00:03",
|
| 1012 |
+
"2016-03-31 00:00:03",
|
| 1013 |
+
]
|
| 1014 |
+
expected2_list = [
|
| 1015 |
+
"2015-04-19 00:00:03",
|
| 1016 |
+
"2015-04-26 00:00:03",
|
| 1017 |
+
"2015-05-03 00:00:03",
|
| 1018 |
+
"2015-05-10 00:00:03",
|
| 1019 |
+
"2015-05-17 00:00:03",
|
| 1020 |
+
"2015-05-24 00:00:03",
|
| 1021 |
+
"2015-05-31 00:00:03",
|
| 1022 |
+
"2015-06-07 00:00:03",
|
| 1023 |
+
"2015-06-14 00:00:03",
|
| 1024 |
+
"2015-06-21 00:00:03",
|
| 1025 |
+
]
|
| 1026 |
+
expected1 = DatetimeIndex(
|
| 1027 |
+
expected1_list, dtype="datetime64[ns]", freq="QE-DEC", tz=None
|
| 1028 |
+
)
|
| 1029 |
+
expected2 = DatetimeIndex(
|
| 1030 |
+
expected2_list, dtype="datetime64[ns]", freq="W-SUN", tz=None
|
| 1031 |
+
)
|
| 1032 |
+
tm.assert_index_equal(result1, expected1)
|
| 1033 |
+
tm.assert_index_equal(result2, expected2)
|
| 1034 |
+
|
| 1035 |
+
dt1, dt2 = "2017-01-01", "2017-01-01"
|
| 1036 |
+
tz1, tz2 = "US/Eastern", "Europe/London"
|
| 1037 |
+
|
| 1038 |
+
@pytest.mark.parametrize(
|
| 1039 |
+
"start,end",
|
| 1040 |
+
[
|
| 1041 |
+
(Timestamp(dt1, tz=tz1), Timestamp(dt2)),
|
| 1042 |
+
(Timestamp(dt1), Timestamp(dt2, tz=tz2)),
|
| 1043 |
+
(Timestamp(dt1, tz=tz1), Timestamp(dt2, tz=tz2)),
|
| 1044 |
+
(Timestamp(dt1, tz=tz2), Timestamp(dt2, tz=tz1)),
|
| 1045 |
+
],
|
| 1046 |
+
)
|
| 1047 |
+
def test_mismatching_tz_raises_err(self, start, end):
|
| 1048 |
+
# issue 18488
|
| 1049 |
+
msg = "Start and end cannot both be tz-aware with different timezones"
|
| 1050 |
+
with pytest.raises(TypeError, match=msg):
|
| 1051 |
+
date_range(start, end)
|
| 1052 |
+
with pytest.raises(TypeError, match=msg):
|
| 1053 |
+
date_range(start, end, freq=BDay())
|
| 1054 |
+
|
| 1055 |
+
|
| 1056 |
+
class TestBusinessDateRange:
|
| 1057 |
+
def test_constructor(self):
|
| 1058 |
+
bdate_range(START, END, freq=BDay())
|
| 1059 |
+
bdate_range(START, periods=20, freq=BDay())
|
| 1060 |
+
bdate_range(end=START, periods=20, freq=BDay())
|
| 1061 |
+
|
| 1062 |
+
msg = "periods must be a number, got B"
|
| 1063 |
+
with pytest.raises(TypeError, match=msg):
|
| 1064 |
+
date_range("2011-1-1", "2012-1-1", "B")
|
| 1065 |
+
|
| 1066 |
+
with pytest.raises(TypeError, match=msg):
|
| 1067 |
+
bdate_range("2011-1-1", "2012-1-1", "B")
|
| 1068 |
+
|
| 1069 |
+
msg = "freq must be specified for bdate_range; use date_range instead"
|
| 1070 |
+
with pytest.raises(TypeError, match=msg):
|
| 1071 |
+
bdate_range(START, END, periods=10, freq=None)
|
| 1072 |
+
|
| 1073 |
+
def test_misc(self):
|
| 1074 |
+
end = datetime(2009, 5, 13)
|
| 1075 |
+
dr = bdate_range(end=end, periods=20)
|
| 1076 |
+
firstDate = end - 19 * BDay()
|
| 1077 |
+
|
| 1078 |
+
assert len(dr) == 20
|
| 1079 |
+
assert dr[0] == firstDate
|
| 1080 |
+
assert dr[-1] == end
|
| 1081 |
+
|
| 1082 |
+
def test_date_parse_failure(self):
|
| 1083 |
+
badly_formed_date = "2007/100/1"
|
| 1084 |
+
|
| 1085 |
+
msg = "Unknown datetime string format, unable to parse: 2007/100/1"
|
| 1086 |
+
with pytest.raises(ValueError, match=msg):
|
| 1087 |
+
Timestamp(badly_formed_date)
|
| 1088 |
+
|
| 1089 |
+
with pytest.raises(ValueError, match=msg):
|
| 1090 |
+
bdate_range(start=badly_formed_date, periods=10)
|
| 1091 |
+
|
| 1092 |
+
with pytest.raises(ValueError, match=msg):
|
| 1093 |
+
bdate_range(end=badly_formed_date, periods=10)
|
| 1094 |
+
|
| 1095 |
+
with pytest.raises(ValueError, match=msg):
|
| 1096 |
+
bdate_range(badly_formed_date, badly_formed_date)
|
| 1097 |
+
|
| 1098 |
+
def test_daterange_bug_456(self):
|
| 1099 |
+
# GH #456
|
| 1100 |
+
rng1 = bdate_range("12/5/2011", "12/5/2011")
|
| 1101 |
+
rng2 = bdate_range("12/2/2011", "12/5/2011")
|
| 1102 |
+
assert rng2._data.freq == BDay()
|
| 1103 |
+
|
| 1104 |
+
result = rng1.union(rng2)
|
| 1105 |
+
assert isinstance(result, DatetimeIndex)
|
| 1106 |
+
|
| 1107 |
+
@pytest.mark.parametrize("inclusive", ["left", "right", "neither", "both"])
|
| 1108 |
+
def test_bdays_and_open_boundaries(self, inclusive):
|
| 1109 |
+
# GH 6673
|
| 1110 |
+
start = "2018-07-21" # Saturday
|
| 1111 |
+
end = "2018-07-29" # Sunday
|
| 1112 |
+
result = date_range(start, end, freq="B", inclusive=inclusive)
|
| 1113 |
+
|
| 1114 |
+
bday_start = "2018-07-23" # Monday
|
| 1115 |
+
bday_end = "2018-07-27" # Friday
|
| 1116 |
+
expected = date_range(bday_start, bday_end, freq="D")
|
| 1117 |
+
tm.assert_index_equal(result, expected)
|
| 1118 |
+
# Note: we do _not_ expect the freqs to match here
|
| 1119 |
+
|
| 1120 |
+
def test_bday_near_overflow(self):
|
| 1121 |
+
# GH#24252 avoid doing unnecessary addition that _would_ overflow
|
| 1122 |
+
start = Timestamp.max.floor("D").to_pydatetime()
|
| 1123 |
+
rng = date_range(start, end=None, periods=1, freq="B")
|
| 1124 |
+
expected = DatetimeIndex([start], freq="B").as_unit("ns")
|
| 1125 |
+
tm.assert_index_equal(rng, expected)
|
| 1126 |
+
|
| 1127 |
+
def test_bday_overflow_error(self):
|
| 1128 |
+
# GH#24252 check that we get OutOfBoundsDatetime and not OverflowError
|
| 1129 |
+
msg = "Out of bounds nanosecond timestamp"
|
| 1130 |
+
start = Timestamp.max.floor("D").to_pydatetime()
|
| 1131 |
+
with pytest.raises(OutOfBoundsDatetime, match=msg):
|
| 1132 |
+
date_range(start, periods=2, freq="B")
|
| 1133 |
+
|
| 1134 |
+
|
| 1135 |
+
class TestCustomDateRange:
|
| 1136 |
+
def test_constructor(self):
|
| 1137 |
+
bdate_range(START, END, freq=CDay())
|
| 1138 |
+
bdate_range(START, periods=20, freq=CDay())
|
| 1139 |
+
bdate_range(end=START, periods=20, freq=CDay())
|
| 1140 |
+
|
| 1141 |
+
msg = "periods must be a number, got C"
|
| 1142 |
+
with pytest.raises(TypeError, match=msg):
|
| 1143 |
+
date_range("2011-1-1", "2012-1-1", "C")
|
| 1144 |
+
|
| 1145 |
+
with pytest.raises(TypeError, match=msg):
|
| 1146 |
+
bdate_range("2011-1-1", "2012-1-1", "C")
|
| 1147 |
+
|
| 1148 |
+
def test_misc(self):
|
| 1149 |
+
end = datetime(2009, 5, 13)
|
| 1150 |
+
dr = bdate_range(end=end, periods=20, freq="C")
|
| 1151 |
+
firstDate = end - 19 * CDay()
|
| 1152 |
+
|
| 1153 |
+
assert len(dr) == 20
|
| 1154 |
+
assert dr[0] == firstDate
|
| 1155 |
+
assert dr[-1] == end
|
| 1156 |
+
|
| 1157 |
+
def test_daterange_bug_456(self):
|
| 1158 |
+
# GH #456
|
| 1159 |
+
rng1 = bdate_range("12/5/2011", "12/5/2011", freq="C")
|
| 1160 |
+
rng2 = bdate_range("12/2/2011", "12/5/2011", freq="C")
|
| 1161 |
+
assert rng2._data.freq == CDay()
|
| 1162 |
+
|
| 1163 |
+
result = rng1.union(rng2)
|
| 1164 |
+
assert isinstance(result, DatetimeIndex)
|
| 1165 |
+
|
| 1166 |
+
def test_cdaterange(self, unit):
|
| 1167 |
+
result = bdate_range("2013-05-01", periods=3, freq="C", unit=unit)
|
| 1168 |
+
expected = DatetimeIndex(
|
| 1169 |
+
["2013-05-01", "2013-05-02", "2013-05-03"], dtype=f"M8[{unit}]", freq="C"
|
| 1170 |
+
)
|
| 1171 |
+
tm.assert_index_equal(result, expected)
|
| 1172 |
+
assert result.freq == expected.freq
|
| 1173 |
+
|
| 1174 |
+
def test_cdaterange_weekmask(self, unit):
|
| 1175 |
+
result = bdate_range(
|
| 1176 |
+
"2013-05-01", periods=3, freq="C", weekmask="Sun Mon Tue Wed Thu", unit=unit
|
| 1177 |
+
)
|
| 1178 |
+
expected = DatetimeIndex(
|
| 1179 |
+
["2013-05-01", "2013-05-02", "2013-05-05"],
|
| 1180 |
+
dtype=f"M8[{unit}]",
|
| 1181 |
+
freq=result.freq,
|
| 1182 |
+
)
|
| 1183 |
+
tm.assert_index_equal(result, expected)
|
| 1184 |
+
assert result.freq == expected.freq
|
| 1185 |
+
|
| 1186 |
+
# raise with non-custom freq
|
| 1187 |
+
msg = (
|
| 1188 |
+
"a custom frequency string is required when holidays or "
|
| 1189 |
+
"weekmask are passed, got frequency B"
|
| 1190 |
+
)
|
| 1191 |
+
with pytest.raises(ValueError, match=msg):
|
| 1192 |
+
bdate_range("2013-05-01", periods=3, weekmask="Sun Mon Tue Wed Thu")
|
| 1193 |
+
|
| 1194 |
+
def test_cdaterange_holidays(self, unit):
|
| 1195 |
+
result = bdate_range(
|
| 1196 |
+
"2013-05-01", periods=3, freq="C", holidays=["2013-05-01"], unit=unit
|
| 1197 |
+
)
|
| 1198 |
+
expected = DatetimeIndex(
|
| 1199 |
+
["2013-05-02", "2013-05-03", "2013-05-06"],
|
| 1200 |
+
dtype=f"M8[{unit}]",
|
| 1201 |
+
freq=result.freq,
|
| 1202 |
+
)
|
| 1203 |
+
tm.assert_index_equal(result, expected)
|
| 1204 |
+
assert result.freq == expected.freq
|
| 1205 |
+
|
| 1206 |
+
# raise with non-custom freq
|
| 1207 |
+
msg = (
|
| 1208 |
+
"a custom frequency string is required when holidays or "
|
| 1209 |
+
"weekmask are passed, got frequency B"
|
| 1210 |
+
)
|
| 1211 |
+
with pytest.raises(ValueError, match=msg):
|
| 1212 |
+
bdate_range("2013-05-01", periods=3, holidays=["2013-05-01"])
|
| 1213 |
+
|
| 1214 |
+
def test_cdaterange_weekmask_and_holidays(self, unit):
|
| 1215 |
+
result = bdate_range(
|
| 1216 |
+
"2013-05-01",
|
| 1217 |
+
periods=3,
|
| 1218 |
+
freq="C",
|
| 1219 |
+
weekmask="Sun Mon Tue Wed Thu",
|
| 1220 |
+
holidays=["2013-05-01"],
|
| 1221 |
+
unit=unit,
|
| 1222 |
+
)
|
| 1223 |
+
expected = DatetimeIndex(
|
| 1224 |
+
["2013-05-02", "2013-05-05", "2013-05-06"],
|
| 1225 |
+
dtype=f"M8[{unit}]",
|
| 1226 |
+
freq=result.freq,
|
| 1227 |
+
)
|
| 1228 |
+
tm.assert_index_equal(result, expected)
|
| 1229 |
+
assert result.freq == expected.freq
|
| 1230 |
+
|
| 1231 |
+
def test_cdaterange_holidays_weekmask_requires_freqstr(self):
|
| 1232 |
+
# raise with non-custom freq
|
| 1233 |
+
msg = (
|
| 1234 |
+
"a custom frequency string is required when holidays or "
|
| 1235 |
+
"weekmask are passed, got frequency B"
|
| 1236 |
+
)
|
| 1237 |
+
with pytest.raises(ValueError, match=msg):
|
| 1238 |
+
bdate_range(
|
| 1239 |
+
"2013-05-01",
|
| 1240 |
+
periods=3,
|
| 1241 |
+
weekmask="Sun Mon Tue Wed Thu",
|
| 1242 |
+
holidays=["2013-05-01"],
|
| 1243 |
+
)
|
| 1244 |
+
|
| 1245 |
+
@pytest.mark.parametrize(
|
| 1246 |
+
"freq", [freq for freq in prefix_mapping if freq.startswith("C")]
|
| 1247 |
+
)
|
| 1248 |
+
def test_all_custom_freq(self, freq):
|
| 1249 |
+
# should not raise
|
| 1250 |
+
bdate_range(
|
| 1251 |
+
START, END, freq=freq, weekmask="Mon Wed Fri", holidays=["2009-03-14"]
|
| 1252 |
+
)
|
| 1253 |
+
|
| 1254 |
+
bad_freq = freq + "FOO"
|
| 1255 |
+
msg = f"invalid custom frequency string: {bad_freq}"
|
| 1256 |
+
with pytest.raises(ValueError, match=msg):
|
| 1257 |
+
bdate_range(START, END, freq=bad_freq)
|
| 1258 |
+
|
| 1259 |
+
@pytest.mark.parametrize(
|
| 1260 |
+
"start_end",
|
| 1261 |
+
[
|
| 1262 |
+
("2018-01-01T00:00:01.000Z", "2018-01-03T00:00:01.000Z"),
|
| 1263 |
+
("2018-01-01T00:00:00.010Z", "2018-01-03T00:00:00.010Z"),
|
| 1264 |
+
("2001-01-01T00:00:00.010Z", "2001-01-03T00:00:00.010Z"),
|
| 1265 |
+
],
|
| 1266 |
+
)
|
| 1267 |
+
def test_range_with_millisecond_resolution(self, start_end):
|
| 1268 |
+
# https://github.com/pandas-dev/pandas/issues/24110
|
| 1269 |
+
start, end = start_end
|
| 1270 |
+
result = date_range(start=start, end=end, periods=2, inclusive="left")
|
| 1271 |
+
expected = DatetimeIndex([start], dtype="M8[ns, UTC]")
|
| 1272 |
+
tm.assert_index_equal(result, expected)
|
| 1273 |
+
|
| 1274 |
+
@pytest.mark.parametrize(
|
| 1275 |
+
"start,period,expected",
|
| 1276 |
+
[
|
| 1277 |
+
("2022-07-23 00:00:00+02:00", 1, ["2022-07-25 00:00:00+02:00"]),
|
| 1278 |
+
("2022-07-22 00:00:00+02:00", 1, ["2022-07-22 00:00:00+02:00"]),
|
| 1279 |
+
(
|
| 1280 |
+
"2022-07-22 00:00:00+02:00",
|
| 1281 |
+
2,
|
| 1282 |
+
["2022-07-22 00:00:00+02:00", "2022-07-25 00:00:00+02:00"],
|
| 1283 |
+
),
|
| 1284 |
+
],
|
| 1285 |
+
)
|
| 1286 |
+
def test_range_with_timezone_and_custombusinessday(self, start, period, expected):
|
| 1287 |
+
# GH49441
|
| 1288 |
+
result = date_range(start=start, periods=period, freq="C")
|
| 1289 |
+
expected = DatetimeIndex(expected).as_unit("ns")
|
| 1290 |
+
tm.assert_index_equal(result, expected)
|
| 1291 |
+
|
| 1292 |
+
|
| 1293 |
+
class TestDateRangeNonNano:
|
| 1294 |
+
def test_date_range_reso_validation(self):
|
| 1295 |
+
msg = "'unit' must be one of 's', 'ms', 'us', 'ns'"
|
| 1296 |
+
with pytest.raises(ValueError, match=msg):
|
| 1297 |
+
date_range("2016-01-01", "2016-03-04", periods=3, unit="h")
|
| 1298 |
+
|
| 1299 |
+
def test_date_range_freq_higher_than_reso(self):
|
| 1300 |
+
# freq being higher-resolution than reso is a problem
|
| 1301 |
+
msg = "Use a lower freq or a higher unit instead"
|
| 1302 |
+
with pytest.raises(ValueError, match=msg):
|
| 1303 |
+
# # TODO give a more useful or informative message?
|
| 1304 |
+
date_range("2016-01-01", "2016-01-02", freq="ns", unit="ms")
|
| 1305 |
+
|
| 1306 |
+
def test_date_range_freq_matches_reso(self):
|
| 1307 |
+
# GH#49106 matching reso is OK
|
| 1308 |
+
dti = date_range("2016-01-01", "2016-01-01 00:00:01", freq="ms", unit="ms")
|
| 1309 |
+
rng = np.arange(1_451_606_400_000, 1_451_606_401_001, dtype=np.int64)
|
| 1310 |
+
expected = DatetimeIndex(rng.view("M8[ms]"), freq="ms")
|
| 1311 |
+
tm.assert_index_equal(dti, expected)
|
| 1312 |
+
|
| 1313 |
+
dti = date_range("2016-01-01", "2016-01-01 00:00:01", freq="us", unit="us")
|
| 1314 |
+
rng = np.arange(1_451_606_400_000_000, 1_451_606_401_000_001, dtype=np.int64)
|
| 1315 |
+
expected = DatetimeIndex(rng.view("M8[us]"), freq="us")
|
| 1316 |
+
tm.assert_index_equal(dti, expected)
|
| 1317 |
+
|
| 1318 |
+
dti = date_range("2016-01-01", "2016-01-01 00:00:00.001", freq="ns", unit="ns")
|
| 1319 |
+
rng = np.arange(
|
| 1320 |
+
1_451_606_400_000_000_000, 1_451_606_400_001_000_001, dtype=np.int64
|
| 1321 |
+
)
|
| 1322 |
+
expected = DatetimeIndex(rng.view("M8[ns]"), freq="ns")
|
| 1323 |
+
tm.assert_index_equal(dti, expected)
|
| 1324 |
+
|
| 1325 |
+
def test_date_range_freq_lower_than_endpoints(self):
|
| 1326 |
+
start = Timestamp("2022-10-19 11:50:44.719781")
|
| 1327 |
+
end = Timestamp("2022-10-19 11:50:47.066458")
|
| 1328 |
+
|
| 1329 |
+
# start and end cannot be cast to "s" unit without lossy rounding,
|
| 1330 |
+
# so we do not allow this in date_range
|
| 1331 |
+
with pytest.raises(ValueError, match="Cannot losslessly convert units"):
|
| 1332 |
+
date_range(start, end, periods=3, unit="s")
|
| 1333 |
+
|
| 1334 |
+
# but we can losslessly cast to "us"
|
| 1335 |
+
dti = date_range(start, end, periods=2, unit="us")
|
| 1336 |
+
rng = np.array(
|
| 1337 |
+
[start.as_unit("us")._value, end.as_unit("us")._value], dtype=np.int64
|
| 1338 |
+
)
|
| 1339 |
+
expected = DatetimeIndex(rng.view("M8[us]"))
|
| 1340 |
+
tm.assert_index_equal(dti, expected)
|
| 1341 |
+
|
| 1342 |
+
def test_date_range_non_nano(self):
|
| 1343 |
+
start = np.datetime64("1066-10-14") # Battle of Hastings
|
| 1344 |
+
end = np.datetime64("2305-07-13") # Jean-Luc Picard's birthday
|
| 1345 |
+
|
| 1346 |
+
dti = date_range(start, end, freq="D", unit="s")
|
| 1347 |
+
assert dti.freq == "D"
|
| 1348 |
+
assert dti.dtype == "M8[s]"
|
| 1349 |
+
|
| 1350 |
+
exp = np.arange(
|
| 1351 |
+
start.astype("M8[s]").view("i8"),
|
| 1352 |
+
(end + 1).astype("M8[s]").view("i8"),
|
| 1353 |
+
24 * 3600,
|
| 1354 |
+
).view("M8[s]")
|
| 1355 |
+
|
| 1356 |
+
tm.assert_numpy_array_equal(dti.to_numpy(), exp)
|
| 1357 |
+
|
| 1358 |
+
|
| 1359 |
+
class TestDateRangeNonTickFreq:
|
| 1360 |
+
# Tests revolving around less-common (non-Tick) `freq` keywords.
|
| 1361 |
+
|
| 1362 |
+
def test_date_range_custom_business_month_begin(self, unit):
|
| 1363 |
+
hcal = USFederalHolidayCalendar()
|
| 1364 |
+
freq = offsets.CBMonthBegin(calendar=hcal)
|
| 1365 |
+
dti = date_range(start="20120101", end="20130101", freq=freq, unit=unit)
|
| 1366 |
+
assert all(freq.is_on_offset(x) for x in dti)
|
| 1367 |
+
|
| 1368 |
+
expected = DatetimeIndex(
|
| 1369 |
+
[
|
| 1370 |
+
"2012-01-03",
|
| 1371 |
+
"2012-02-01",
|
| 1372 |
+
"2012-03-01",
|
| 1373 |
+
"2012-04-02",
|
| 1374 |
+
"2012-05-01",
|
| 1375 |
+
"2012-06-01",
|
| 1376 |
+
"2012-07-02",
|
| 1377 |
+
"2012-08-01",
|
| 1378 |
+
"2012-09-04",
|
| 1379 |
+
"2012-10-01",
|
| 1380 |
+
"2012-11-01",
|
| 1381 |
+
"2012-12-03",
|
| 1382 |
+
],
|
| 1383 |
+
dtype=f"M8[{unit}]",
|
| 1384 |
+
freq=freq,
|
| 1385 |
+
)
|
| 1386 |
+
tm.assert_index_equal(dti, expected)
|
| 1387 |
+
|
| 1388 |
+
def test_date_range_custom_business_month_end(self, unit):
|
| 1389 |
+
hcal = USFederalHolidayCalendar()
|
| 1390 |
+
freq = offsets.CBMonthEnd(calendar=hcal)
|
| 1391 |
+
dti = date_range(start="20120101", end="20130101", freq=freq, unit=unit)
|
| 1392 |
+
assert all(freq.is_on_offset(x) for x in dti)
|
| 1393 |
+
|
| 1394 |
+
expected = DatetimeIndex(
|
| 1395 |
+
[
|
| 1396 |
+
"2012-01-31",
|
| 1397 |
+
"2012-02-29",
|
| 1398 |
+
"2012-03-30",
|
| 1399 |
+
"2012-04-30",
|
| 1400 |
+
"2012-05-31",
|
| 1401 |
+
"2012-06-29",
|
| 1402 |
+
"2012-07-31",
|
| 1403 |
+
"2012-08-31",
|
| 1404 |
+
"2012-09-28",
|
| 1405 |
+
"2012-10-31",
|
| 1406 |
+
"2012-11-30",
|
| 1407 |
+
"2012-12-31",
|
| 1408 |
+
],
|
| 1409 |
+
dtype=f"M8[{unit}]",
|
| 1410 |
+
freq=freq,
|
| 1411 |
+
)
|
| 1412 |
+
tm.assert_index_equal(dti, expected)
|
| 1413 |
+
|
| 1414 |
+
def test_date_range_with_custom_holidays(self, unit):
|
| 1415 |
+
# GH#30593
|
| 1416 |
+
freq = offsets.CustomBusinessHour(start="15:00", holidays=["2020-11-26"])
|
| 1417 |
+
result = date_range(start="2020-11-25 15:00", periods=4, freq=freq, unit=unit)
|
| 1418 |
+
expected = DatetimeIndex(
|
| 1419 |
+
[
|
| 1420 |
+
"2020-11-25 15:00:00",
|
| 1421 |
+
"2020-11-25 16:00:00",
|
| 1422 |
+
"2020-11-27 15:00:00",
|
| 1423 |
+
"2020-11-27 16:00:00",
|
| 1424 |
+
],
|
| 1425 |
+
dtype=f"M8[{unit}]",
|
| 1426 |
+
freq=freq,
|
| 1427 |
+
)
|
| 1428 |
+
tm.assert_index_equal(result, expected)
|
| 1429 |
+
|
| 1430 |
+
def test_date_range_businesshour(self, unit):
|
| 1431 |
+
idx = DatetimeIndex(
|
| 1432 |
+
[
|
| 1433 |
+
"2014-07-04 09:00",
|
| 1434 |
+
"2014-07-04 10:00",
|
| 1435 |
+
"2014-07-04 11:00",
|
| 1436 |
+
"2014-07-04 12:00",
|
| 1437 |
+
"2014-07-04 13:00",
|
| 1438 |
+
"2014-07-04 14:00",
|
| 1439 |
+
"2014-07-04 15:00",
|
| 1440 |
+
"2014-07-04 16:00",
|
| 1441 |
+
],
|
| 1442 |
+
dtype=f"M8[{unit}]",
|
| 1443 |
+
freq="bh",
|
| 1444 |
+
)
|
| 1445 |
+
rng = date_range("2014-07-04 09:00", "2014-07-04 16:00", freq="bh", unit=unit)
|
| 1446 |
+
tm.assert_index_equal(idx, rng)
|
| 1447 |
+
|
| 1448 |
+
idx = DatetimeIndex(
|
| 1449 |
+
["2014-07-04 16:00", "2014-07-07 09:00"], dtype=f"M8[{unit}]", freq="bh"
|
| 1450 |
+
)
|
| 1451 |
+
rng = date_range("2014-07-04 16:00", "2014-07-07 09:00", freq="bh", unit=unit)
|
| 1452 |
+
tm.assert_index_equal(idx, rng)
|
| 1453 |
+
|
| 1454 |
+
idx = DatetimeIndex(
|
| 1455 |
+
[
|
| 1456 |
+
"2014-07-04 09:00",
|
| 1457 |
+
"2014-07-04 10:00",
|
| 1458 |
+
"2014-07-04 11:00",
|
| 1459 |
+
"2014-07-04 12:00",
|
| 1460 |
+
"2014-07-04 13:00",
|
| 1461 |
+
"2014-07-04 14:00",
|
| 1462 |
+
"2014-07-04 15:00",
|
| 1463 |
+
"2014-07-04 16:00",
|
| 1464 |
+
"2014-07-07 09:00",
|
| 1465 |
+
"2014-07-07 10:00",
|
| 1466 |
+
"2014-07-07 11:00",
|
| 1467 |
+
"2014-07-07 12:00",
|
| 1468 |
+
"2014-07-07 13:00",
|
| 1469 |
+
"2014-07-07 14:00",
|
| 1470 |
+
"2014-07-07 15:00",
|
| 1471 |
+
"2014-07-07 16:00",
|
| 1472 |
+
"2014-07-08 09:00",
|
| 1473 |
+
"2014-07-08 10:00",
|
| 1474 |
+
"2014-07-08 11:00",
|
| 1475 |
+
"2014-07-08 12:00",
|
| 1476 |
+
"2014-07-08 13:00",
|
| 1477 |
+
"2014-07-08 14:00",
|
| 1478 |
+
"2014-07-08 15:00",
|
| 1479 |
+
"2014-07-08 16:00",
|
| 1480 |
+
],
|
| 1481 |
+
dtype=f"M8[{unit}]",
|
| 1482 |
+
freq="bh",
|
| 1483 |
+
)
|
| 1484 |
+
rng = date_range("2014-07-04 09:00", "2014-07-08 16:00", freq="bh", unit=unit)
|
| 1485 |
+
tm.assert_index_equal(idx, rng)
|
| 1486 |
+
|
| 1487 |
+
def test_date_range_business_hour2(self, unit):
|
| 1488 |
+
idx1 = date_range(
|
| 1489 |
+
start="2014-07-04 15:00", end="2014-07-08 10:00", freq="bh", unit=unit
|
| 1490 |
+
)
|
| 1491 |
+
idx2 = date_range(start="2014-07-04 15:00", periods=12, freq="bh", unit=unit)
|
| 1492 |
+
idx3 = date_range(end="2014-07-08 10:00", periods=12, freq="bh", unit=unit)
|
| 1493 |
+
expected = DatetimeIndex(
|
| 1494 |
+
[
|
| 1495 |
+
"2014-07-04 15:00",
|
| 1496 |
+
"2014-07-04 16:00",
|
| 1497 |
+
"2014-07-07 09:00",
|
| 1498 |
+
"2014-07-07 10:00",
|
| 1499 |
+
"2014-07-07 11:00",
|
| 1500 |
+
"2014-07-07 12:00",
|
| 1501 |
+
"2014-07-07 13:00",
|
| 1502 |
+
"2014-07-07 14:00",
|
| 1503 |
+
"2014-07-07 15:00",
|
| 1504 |
+
"2014-07-07 16:00",
|
| 1505 |
+
"2014-07-08 09:00",
|
| 1506 |
+
"2014-07-08 10:00",
|
| 1507 |
+
],
|
| 1508 |
+
dtype=f"M8[{unit}]",
|
| 1509 |
+
freq="bh",
|
| 1510 |
+
)
|
| 1511 |
+
tm.assert_index_equal(idx1, expected)
|
| 1512 |
+
tm.assert_index_equal(idx2, expected)
|
| 1513 |
+
tm.assert_index_equal(idx3, expected)
|
| 1514 |
+
|
| 1515 |
+
idx4 = date_range(
|
| 1516 |
+
start="2014-07-04 15:45", end="2014-07-08 10:45", freq="bh", unit=unit
|
| 1517 |
+
)
|
| 1518 |
+
idx5 = date_range(start="2014-07-04 15:45", periods=12, freq="bh", unit=unit)
|
| 1519 |
+
idx6 = date_range(end="2014-07-08 10:45", periods=12, freq="bh", unit=unit)
|
| 1520 |
+
|
| 1521 |
+
expected2 = expected + Timedelta(minutes=45).as_unit(unit)
|
| 1522 |
+
expected2.freq = "bh"
|
| 1523 |
+
tm.assert_index_equal(idx4, expected2)
|
| 1524 |
+
tm.assert_index_equal(idx5, expected2)
|
| 1525 |
+
tm.assert_index_equal(idx6, expected2)
|
| 1526 |
+
|
| 1527 |
+
def test_date_range_business_hour_short(self, unit):
|
| 1528 |
+
# GH#49835
|
| 1529 |
+
idx4 = date_range(start="2014-07-01 10:00", freq="bh", periods=1, unit=unit)
|
| 1530 |
+
expected4 = DatetimeIndex(["2014-07-01 10:00"], dtype=f"M8[{unit}]", freq="bh")
|
| 1531 |
+
tm.assert_index_equal(idx4, expected4)
|
| 1532 |
+
|
| 1533 |
+
def test_date_range_year_start(self, unit):
|
| 1534 |
+
# see GH#9313
|
| 1535 |
+
rng = date_range("1/1/2013", "7/1/2017", freq="YS", unit=unit)
|
| 1536 |
+
exp = DatetimeIndex(
|
| 1537 |
+
["2013-01-01", "2014-01-01", "2015-01-01", "2016-01-01", "2017-01-01"],
|
| 1538 |
+
dtype=f"M8[{unit}]",
|
| 1539 |
+
freq="YS",
|
| 1540 |
+
)
|
| 1541 |
+
tm.assert_index_equal(rng, exp)
|
| 1542 |
+
|
| 1543 |
+
def test_date_range_year_end(self, unit):
|
| 1544 |
+
# see GH#9313
|
| 1545 |
+
rng = date_range("1/1/2013", "7/1/2017", freq="YE", unit=unit)
|
| 1546 |
+
exp = DatetimeIndex(
|
| 1547 |
+
["2013-12-31", "2014-12-31", "2015-12-31", "2016-12-31"],
|
| 1548 |
+
dtype=f"M8[{unit}]",
|
| 1549 |
+
freq="YE",
|
| 1550 |
+
)
|
| 1551 |
+
tm.assert_index_equal(rng, exp)
|
| 1552 |
+
|
| 1553 |
+
def test_date_range_negative_freq_year_end(self, unit):
|
| 1554 |
+
# GH#11018
|
| 1555 |
+
rng = date_range("2011-12-31", freq="-2YE", periods=3, unit=unit)
|
| 1556 |
+
exp = DatetimeIndex(
|
| 1557 |
+
["2011-12-31", "2009-12-31", "2007-12-31"], dtype=f"M8[{unit}]", freq="-2YE"
|
| 1558 |
+
)
|
| 1559 |
+
tm.assert_index_equal(rng, exp)
|
| 1560 |
+
assert rng.freq == "-2YE"
|
| 1561 |
+
|
| 1562 |
+
def test_date_range_business_year_end_year(self, unit):
|
| 1563 |
+
# see GH#9313
|
| 1564 |
+
rng = date_range("1/1/2013", "7/1/2017", freq="BYE", unit=unit)
|
| 1565 |
+
exp = DatetimeIndex(
|
| 1566 |
+
["2013-12-31", "2014-12-31", "2015-12-31", "2016-12-30"],
|
| 1567 |
+
dtype=f"M8[{unit}]",
|
| 1568 |
+
freq="BYE",
|
| 1569 |
+
)
|
| 1570 |
+
tm.assert_index_equal(rng, exp)
|
| 1571 |
+
|
| 1572 |
+
def test_date_range_bms(self, unit):
|
| 1573 |
+
# GH#1645
|
| 1574 |
+
result = date_range("1/1/2000", periods=10, freq="BMS", unit=unit)
|
| 1575 |
+
|
| 1576 |
+
expected = DatetimeIndex(
|
| 1577 |
+
[
|
| 1578 |
+
"2000-01-03",
|
| 1579 |
+
"2000-02-01",
|
| 1580 |
+
"2000-03-01",
|
| 1581 |
+
"2000-04-03",
|
| 1582 |
+
"2000-05-01",
|
| 1583 |
+
"2000-06-01",
|
| 1584 |
+
"2000-07-03",
|
| 1585 |
+
"2000-08-01",
|
| 1586 |
+
"2000-09-01",
|
| 1587 |
+
"2000-10-02",
|
| 1588 |
+
],
|
| 1589 |
+
dtype=f"M8[{unit}]",
|
| 1590 |
+
freq="BMS",
|
| 1591 |
+
)
|
| 1592 |
+
tm.assert_index_equal(result, expected)
|
| 1593 |
+
|
| 1594 |
+
def test_date_range_semi_month_begin(self, unit):
|
| 1595 |
+
dates = [
|
| 1596 |
+
datetime(2007, 12, 15),
|
| 1597 |
+
datetime(2008, 1, 1),
|
| 1598 |
+
datetime(2008, 1, 15),
|
| 1599 |
+
datetime(2008, 2, 1),
|
| 1600 |
+
datetime(2008, 2, 15),
|
| 1601 |
+
datetime(2008, 3, 1),
|
| 1602 |
+
datetime(2008, 3, 15),
|
| 1603 |
+
datetime(2008, 4, 1),
|
| 1604 |
+
datetime(2008, 4, 15),
|
| 1605 |
+
datetime(2008, 5, 1),
|
| 1606 |
+
datetime(2008, 5, 15),
|
| 1607 |
+
datetime(2008, 6, 1),
|
| 1608 |
+
datetime(2008, 6, 15),
|
| 1609 |
+
datetime(2008, 7, 1),
|
| 1610 |
+
datetime(2008, 7, 15),
|
| 1611 |
+
datetime(2008, 8, 1),
|
| 1612 |
+
datetime(2008, 8, 15),
|
| 1613 |
+
datetime(2008, 9, 1),
|
| 1614 |
+
datetime(2008, 9, 15),
|
| 1615 |
+
datetime(2008, 10, 1),
|
| 1616 |
+
datetime(2008, 10, 15),
|
| 1617 |
+
datetime(2008, 11, 1),
|
| 1618 |
+
datetime(2008, 11, 15),
|
| 1619 |
+
datetime(2008, 12, 1),
|
| 1620 |
+
datetime(2008, 12, 15),
|
| 1621 |
+
]
|
| 1622 |
+
# ensure generating a range with DatetimeIndex gives same result
|
| 1623 |
+
result = date_range(start=dates[0], end=dates[-1], freq="SMS", unit=unit)
|
| 1624 |
+
exp = DatetimeIndex(dates, dtype=f"M8[{unit}]", freq="SMS")
|
| 1625 |
+
tm.assert_index_equal(result, exp)
|
| 1626 |
+
|
| 1627 |
+
def test_date_range_semi_month_end(self, unit):
|
| 1628 |
+
dates = [
|
| 1629 |
+
datetime(2007, 12, 31),
|
| 1630 |
+
datetime(2008, 1, 15),
|
| 1631 |
+
datetime(2008, 1, 31),
|
| 1632 |
+
datetime(2008, 2, 15),
|
| 1633 |
+
datetime(2008, 2, 29),
|
| 1634 |
+
datetime(2008, 3, 15),
|
| 1635 |
+
datetime(2008, 3, 31),
|
| 1636 |
+
datetime(2008, 4, 15),
|
| 1637 |
+
datetime(2008, 4, 30),
|
| 1638 |
+
datetime(2008, 5, 15),
|
| 1639 |
+
datetime(2008, 5, 31),
|
| 1640 |
+
datetime(2008, 6, 15),
|
| 1641 |
+
datetime(2008, 6, 30),
|
| 1642 |
+
datetime(2008, 7, 15),
|
| 1643 |
+
datetime(2008, 7, 31),
|
| 1644 |
+
datetime(2008, 8, 15),
|
| 1645 |
+
datetime(2008, 8, 31),
|
| 1646 |
+
datetime(2008, 9, 15),
|
| 1647 |
+
datetime(2008, 9, 30),
|
| 1648 |
+
datetime(2008, 10, 15),
|
| 1649 |
+
datetime(2008, 10, 31),
|
| 1650 |
+
datetime(2008, 11, 15),
|
| 1651 |
+
datetime(2008, 11, 30),
|
| 1652 |
+
datetime(2008, 12, 15),
|
| 1653 |
+
datetime(2008, 12, 31),
|
| 1654 |
+
]
|
| 1655 |
+
# ensure generating a range with DatetimeIndex gives same result
|
| 1656 |
+
result = date_range(start=dates[0], end=dates[-1], freq="SME", unit=unit)
|
| 1657 |
+
exp = DatetimeIndex(dates, dtype=f"M8[{unit}]", freq="SME")
|
| 1658 |
+
tm.assert_index_equal(result, exp)
|
| 1659 |
+
|
| 1660 |
+
def test_date_range_week_of_month(self, unit):
|
| 1661 |
+
# GH#20517
|
| 1662 |
+
# Note the start here is not on_offset for this freq
|
| 1663 |
+
result = date_range(start="20110101", periods=1, freq="WOM-1MON", unit=unit)
|
| 1664 |
+
expected = DatetimeIndex(["2011-01-03"], dtype=f"M8[{unit}]", freq="WOM-1MON")
|
| 1665 |
+
tm.assert_index_equal(result, expected)
|
| 1666 |
+
|
| 1667 |
+
result2 = date_range(start="20110101", periods=2, freq="WOM-1MON", unit=unit)
|
| 1668 |
+
expected2 = DatetimeIndex(
|
| 1669 |
+
["2011-01-03", "2011-02-07"], dtype=f"M8[{unit}]", freq="WOM-1MON"
|
| 1670 |
+
)
|
| 1671 |
+
tm.assert_index_equal(result2, expected2)
|
| 1672 |
+
|
| 1673 |
+
def test_date_range_week_of_month2(self, unit):
|
| 1674 |
+
# GH#5115, GH#5348
|
| 1675 |
+
result = date_range("2013-1-1", periods=4, freq="WOM-1SAT", unit=unit)
|
| 1676 |
+
expected = DatetimeIndex(
|
| 1677 |
+
["2013-01-05", "2013-02-02", "2013-03-02", "2013-04-06"],
|
| 1678 |
+
dtype=f"M8[{unit}]",
|
| 1679 |
+
freq="WOM-1SAT",
|
| 1680 |
+
)
|
| 1681 |
+
tm.assert_index_equal(result, expected)
|
| 1682 |
+
|
| 1683 |
+
def test_date_range_negative_freq_month_end(self, unit):
|
| 1684 |
+
# GH#11018
|
| 1685 |
+
rng = date_range("2011-01-31", freq="-2ME", periods=3, unit=unit)
|
| 1686 |
+
exp = DatetimeIndex(
|
| 1687 |
+
["2011-01-31", "2010-11-30", "2010-09-30"], dtype=f"M8[{unit}]", freq="-2ME"
|
| 1688 |
+
)
|
| 1689 |
+
tm.assert_index_equal(rng, exp)
|
| 1690 |
+
assert rng.freq == "-2ME"
|
| 1691 |
+
|
| 1692 |
+
def test_date_range_fy5253(self, unit):
|
| 1693 |
+
freq = offsets.FY5253(startingMonth=1, weekday=3, variation="nearest")
|
| 1694 |
+
dti = date_range(
|
| 1695 |
+
start="2013-01-01",
|
| 1696 |
+
periods=2,
|
| 1697 |
+
freq=freq,
|
| 1698 |
+
unit=unit,
|
| 1699 |
+
)
|
| 1700 |
+
expected = DatetimeIndex(
|
| 1701 |
+
["2013-01-31", "2014-01-30"], dtype=f"M8[{unit}]", freq=freq
|
| 1702 |
+
)
|
| 1703 |
+
|
| 1704 |
+
tm.assert_index_equal(dti, expected)
|
| 1705 |
+
|
| 1706 |
+
@pytest.mark.parametrize(
|
| 1707 |
+
"freqstr,offset",
|
| 1708 |
+
[
|
| 1709 |
+
("QS", offsets.QuarterBegin(startingMonth=1)),
|
| 1710 |
+
("BQE", offsets.BQuarterEnd(startingMonth=12)),
|
| 1711 |
+
("W-SUN", offsets.Week(weekday=6)),
|
| 1712 |
+
],
|
| 1713 |
+
)
|
| 1714 |
+
def test_date_range_freqstr_matches_offset(self, freqstr, offset):
|
| 1715 |
+
sdate = datetime(1999, 12, 25)
|
| 1716 |
+
edate = datetime(2000, 1, 1)
|
| 1717 |
+
|
| 1718 |
+
idx1 = date_range(start=sdate, end=edate, freq=freqstr)
|
| 1719 |
+
idx2 = date_range(start=sdate, end=edate, freq=offset)
|
| 1720 |
+
assert len(idx1) == len(idx2)
|
| 1721 |
+
assert idx1.freq == idx2.freq
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_datetime.py
ADDED
|
@@ -0,0 +1,216 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import datetime as dt
|
| 2 |
+
from datetime import date
|
| 3 |
+
import re
|
| 4 |
+
|
| 5 |
+
import numpy as np
|
| 6 |
+
import pytest
|
| 7 |
+
|
| 8 |
+
from pandas.compat.numpy import np_long
|
| 9 |
+
|
| 10 |
+
import pandas as pd
|
| 11 |
+
from pandas import (
|
| 12 |
+
DataFrame,
|
| 13 |
+
DatetimeIndex,
|
| 14 |
+
Index,
|
| 15 |
+
Timestamp,
|
| 16 |
+
date_range,
|
| 17 |
+
offsets,
|
| 18 |
+
)
|
| 19 |
+
import pandas._testing as tm
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class TestDatetimeIndex:
|
| 23 |
+
def test_is_(self):
|
| 24 |
+
dti = date_range(start="1/1/2005", end="12/1/2005", freq="ME")
|
| 25 |
+
assert dti.is_(dti)
|
| 26 |
+
assert dti.is_(dti.view())
|
| 27 |
+
assert not dti.is_(dti.copy())
|
| 28 |
+
|
| 29 |
+
def test_time_overflow_for_32bit_machines(self):
|
| 30 |
+
# GH8943. On some machines NumPy defaults to np.int32 (for example,
|
| 31 |
+
# 32-bit Linux machines). In the function _generate_regular_range
|
| 32 |
+
# found in tseries/index.py, `periods` gets multiplied by `strides`
|
| 33 |
+
# (which has value 1e9) and since the max value for np.int32 is ~2e9,
|
| 34 |
+
# and since those machines won't promote np.int32 to np.int64, we get
|
| 35 |
+
# overflow.
|
| 36 |
+
periods = np_long(1000)
|
| 37 |
+
|
| 38 |
+
idx1 = date_range(start="2000", periods=periods, freq="s")
|
| 39 |
+
assert len(idx1) == periods
|
| 40 |
+
|
| 41 |
+
idx2 = date_range(end="2000", periods=periods, freq="s")
|
| 42 |
+
assert len(idx2) == periods
|
| 43 |
+
|
| 44 |
+
def test_nat(self):
|
| 45 |
+
assert DatetimeIndex([np.nan])[0] is pd.NaT
|
| 46 |
+
|
| 47 |
+
def test_week_of_month_frequency(self):
|
| 48 |
+
# GH 5348: "ValueError: Could not evaluate WOM-1SUN" shouldn't raise
|
| 49 |
+
d1 = date(2002, 9, 1)
|
| 50 |
+
d2 = date(2013, 10, 27)
|
| 51 |
+
d3 = date(2012, 9, 30)
|
| 52 |
+
idx1 = DatetimeIndex([d1, d2])
|
| 53 |
+
idx2 = DatetimeIndex([d3])
|
| 54 |
+
result_append = idx1.append(idx2)
|
| 55 |
+
expected = DatetimeIndex([d1, d2, d3])
|
| 56 |
+
tm.assert_index_equal(result_append, expected)
|
| 57 |
+
result_union = idx1.union(idx2)
|
| 58 |
+
expected = DatetimeIndex([d1, d3, d2])
|
| 59 |
+
tm.assert_index_equal(result_union, expected)
|
| 60 |
+
|
| 61 |
+
def test_append_nondatetimeindex(self):
|
| 62 |
+
rng = date_range("1/1/2000", periods=10)
|
| 63 |
+
idx = Index(["a", "b", "c", "d"])
|
| 64 |
+
|
| 65 |
+
result = rng.append(idx)
|
| 66 |
+
assert isinstance(result[0], Timestamp)
|
| 67 |
+
|
| 68 |
+
def test_misc_coverage(self):
|
| 69 |
+
rng = date_range("1/1/2000", periods=5)
|
| 70 |
+
result = rng.groupby(rng.day)
|
| 71 |
+
assert isinstance(next(iter(result.values()))[0], Timestamp)
|
| 72 |
+
|
| 73 |
+
# TODO: belongs in frame groupby tests?
|
| 74 |
+
def test_groupby_function_tuple_1677(self):
|
| 75 |
+
df = DataFrame(
|
| 76 |
+
np.random.default_rng(2).random(100),
|
| 77 |
+
index=date_range("1/1/2000", periods=100),
|
| 78 |
+
)
|
| 79 |
+
monthly_group = df.groupby(lambda x: (x.year, x.month))
|
| 80 |
+
|
| 81 |
+
result = monthly_group.mean()
|
| 82 |
+
assert isinstance(result.index[0], tuple)
|
| 83 |
+
|
| 84 |
+
def assert_index_parameters(self, index):
|
| 85 |
+
assert index.freq == "40960ns"
|
| 86 |
+
assert index.inferred_freq == "40960ns"
|
| 87 |
+
|
| 88 |
+
def test_ns_index(self):
|
| 89 |
+
nsamples = 400
|
| 90 |
+
ns = int(1e9 / 24414)
|
| 91 |
+
dtstart = np.datetime64("2012-09-20T00:00:00")
|
| 92 |
+
|
| 93 |
+
dt = dtstart + np.arange(nsamples) * np.timedelta64(ns, "ns")
|
| 94 |
+
freq = ns * offsets.Nano()
|
| 95 |
+
index = DatetimeIndex(dt, freq=freq, name="time")
|
| 96 |
+
self.assert_index_parameters(index)
|
| 97 |
+
|
| 98 |
+
new_index = date_range(start=index[0], end=index[-1], freq=index.freq)
|
| 99 |
+
self.assert_index_parameters(new_index)
|
| 100 |
+
|
| 101 |
+
def test_asarray_tz_naive(self):
|
| 102 |
+
# This shouldn't produce a warning.
|
| 103 |
+
idx = date_range("2000", periods=2)
|
| 104 |
+
# M8[ns] by default
|
| 105 |
+
result = np.asarray(idx)
|
| 106 |
+
|
| 107 |
+
expected = np.array(["2000-01-01", "2000-01-02"], dtype="M8[ns]")
|
| 108 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 109 |
+
|
| 110 |
+
# optionally, object
|
| 111 |
+
result = np.asarray(idx, dtype=object)
|
| 112 |
+
|
| 113 |
+
expected = np.array([Timestamp("2000-01-01"), Timestamp("2000-01-02")])
|
| 114 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 115 |
+
|
| 116 |
+
def test_asarray_tz_aware(self):
|
| 117 |
+
tz = "US/Central"
|
| 118 |
+
idx = date_range("2000", periods=2, tz=tz)
|
| 119 |
+
expected = np.array(["2000-01-01T06", "2000-01-02T06"], dtype="M8[ns]")
|
| 120 |
+
result = np.asarray(idx, dtype="datetime64[ns]")
|
| 121 |
+
|
| 122 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 123 |
+
|
| 124 |
+
# Old behavior with no warning
|
| 125 |
+
result = np.asarray(idx, dtype="M8[ns]")
|
| 126 |
+
|
| 127 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 128 |
+
|
| 129 |
+
# Future behavior with no warning
|
| 130 |
+
expected = np.array(
|
| 131 |
+
[Timestamp("2000-01-01", tz=tz), Timestamp("2000-01-02", tz=tz)]
|
| 132 |
+
)
|
| 133 |
+
result = np.asarray(idx, dtype=object)
|
| 134 |
+
|
| 135 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 136 |
+
|
| 137 |
+
def test_CBH_deprecated(self):
|
| 138 |
+
msg = "'CBH' is deprecated and will be removed in a future version."
|
| 139 |
+
|
| 140 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 141 |
+
expected = date_range(
|
| 142 |
+
dt.datetime(2022, 12, 11), dt.datetime(2022, 12, 13), freq="CBH"
|
| 143 |
+
)
|
| 144 |
+
result = DatetimeIndex(
|
| 145 |
+
[
|
| 146 |
+
"2022-12-12 09:00:00",
|
| 147 |
+
"2022-12-12 10:00:00",
|
| 148 |
+
"2022-12-12 11:00:00",
|
| 149 |
+
"2022-12-12 12:00:00",
|
| 150 |
+
"2022-12-12 13:00:00",
|
| 151 |
+
"2022-12-12 14:00:00",
|
| 152 |
+
"2022-12-12 15:00:00",
|
| 153 |
+
"2022-12-12 16:00:00",
|
| 154 |
+
],
|
| 155 |
+
dtype="datetime64[ns]",
|
| 156 |
+
freq="cbh",
|
| 157 |
+
)
|
| 158 |
+
|
| 159 |
+
tm.assert_index_equal(result, expected)
|
| 160 |
+
|
| 161 |
+
@pytest.mark.parametrize(
|
| 162 |
+
"freq_depr, expected_values, expected_freq",
|
| 163 |
+
[
|
| 164 |
+
(
|
| 165 |
+
"AS-AUG",
|
| 166 |
+
["2021-08-01", "2022-08-01", "2023-08-01"],
|
| 167 |
+
"YS-AUG",
|
| 168 |
+
),
|
| 169 |
+
(
|
| 170 |
+
"1BAS-MAY",
|
| 171 |
+
["2021-05-03", "2022-05-02", "2023-05-01"],
|
| 172 |
+
"1BYS-MAY",
|
| 173 |
+
),
|
| 174 |
+
],
|
| 175 |
+
)
|
| 176 |
+
def test_AS_BAS_deprecated(self, freq_depr, expected_values, expected_freq):
|
| 177 |
+
# GH#55479
|
| 178 |
+
freq_msg = re.split("[0-9]*", freq_depr, maxsplit=1)[1]
|
| 179 |
+
msg = f"'{freq_msg}' is deprecated and will be removed in a future version."
|
| 180 |
+
|
| 181 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 182 |
+
expected = date_range(
|
| 183 |
+
dt.datetime(2020, 12, 1), dt.datetime(2023, 12, 1), freq=freq_depr
|
| 184 |
+
)
|
| 185 |
+
result = DatetimeIndex(
|
| 186 |
+
expected_values,
|
| 187 |
+
dtype="datetime64[ns]",
|
| 188 |
+
freq=expected_freq,
|
| 189 |
+
)
|
| 190 |
+
|
| 191 |
+
tm.assert_index_equal(result, expected)
|
| 192 |
+
|
| 193 |
+
@pytest.mark.parametrize(
|
| 194 |
+
"freq, expected_values, freq_depr",
|
| 195 |
+
[
|
| 196 |
+
("2BYE-MAR", ["2016-03-31"], "2BA-MAR"),
|
| 197 |
+
("2BYE-JUN", ["2016-06-30"], "2BY-JUN"),
|
| 198 |
+
("2BME", ["2016-02-29", "2016-04-29", "2016-06-30"], "2BM"),
|
| 199 |
+
("2BQE", ["2016-03-31"], "2BQ"),
|
| 200 |
+
("1BQE-MAR", ["2016-03-31", "2016-06-30"], "1BQ-MAR"),
|
| 201 |
+
],
|
| 202 |
+
)
|
| 203 |
+
def test_BM_BQ_BY_deprecated(self, freq, expected_values, freq_depr):
|
| 204 |
+
# GH#52064
|
| 205 |
+
msg = f"'{freq_depr[1:]}' is deprecated and will be removed "
|
| 206 |
+
f"in a future version, please use '{freq[1:]}' instead."
|
| 207 |
+
|
| 208 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 209 |
+
expected = date_range(start="2016-02-21", end="2016-08-21", freq=freq_depr)
|
| 210 |
+
result = DatetimeIndex(
|
| 211 |
+
data=expected_values,
|
| 212 |
+
dtype="datetime64[ns]",
|
| 213 |
+
freq=freq,
|
| 214 |
+
)
|
| 215 |
+
|
| 216 |
+
tm.assert_index_equal(result, expected)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_formats.py
ADDED
|
@@ -0,0 +1,356 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datetime import datetime
|
| 2 |
+
|
| 3 |
+
import dateutil.tz
|
| 4 |
+
import numpy as np
|
| 5 |
+
import pytest
|
| 6 |
+
import pytz
|
| 7 |
+
|
| 8 |
+
import pandas as pd
|
| 9 |
+
from pandas import (
|
| 10 |
+
DatetimeIndex,
|
| 11 |
+
NaT,
|
| 12 |
+
Series,
|
| 13 |
+
)
|
| 14 |
+
import pandas._testing as tm
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
@pytest.fixture(params=["s", "ms", "us", "ns"])
|
| 18 |
+
def unit(request):
|
| 19 |
+
return request.param
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def test_get_values_for_csv():
|
| 23 |
+
index = pd.date_range(freq="1D", periods=3, start="2017-01-01")
|
| 24 |
+
|
| 25 |
+
# First, with no arguments.
|
| 26 |
+
expected = np.array(["2017-01-01", "2017-01-02", "2017-01-03"], dtype=object)
|
| 27 |
+
|
| 28 |
+
result = index._get_values_for_csv()
|
| 29 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 30 |
+
|
| 31 |
+
# No NaN values, so na_rep has no effect
|
| 32 |
+
result = index._get_values_for_csv(na_rep="pandas")
|
| 33 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 34 |
+
|
| 35 |
+
# Make sure date formatting works
|
| 36 |
+
expected = np.array(["01-2017-01", "01-2017-02", "01-2017-03"], dtype=object)
|
| 37 |
+
|
| 38 |
+
result = index._get_values_for_csv(date_format="%m-%Y-%d")
|
| 39 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 40 |
+
|
| 41 |
+
# NULL object handling should work
|
| 42 |
+
index = DatetimeIndex(["2017-01-01", NaT, "2017-01-03"])
|
| 43 |
+
expected = np.array(["2017-01-01", "NaT", "2017-01-03"], dtype=object)
|
| 44 |
+
|
| 45 |
+
result = index._get_values_for_csv(na_rep="NaT")
|
| 46 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 47 |
+
|
| 48 |
+
expected = np.array(["2017-01-01", "pandas", "2017-01-03"], dtype=object)
|
| 49 |
+
|
| 50 |
+
result = index._get_values_for_csv(na_rep="pandas")
|
| 51 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 52 |
+
|
| 53 |
+
result = index._get_values_for_csv(na_rep="NaT", date_format="%Y-%m-%d %H:%M:%S.%f")
|
| 54 |
+
expected = np.array(
|
| 55 |
+
["2017-01-01 00:00:00.000000", "NaT", "2017-01-03 00:00:00.000000"],
|
| 56 |
+
dtype=object,
|
| 57 |
+
)
|
| 58 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 59 |
+
|
| 60 |
+
# invalid format
|
| 61 |
+
result = index._get_values_for_csv(na_rep="NaT", date_format="foo")
|
| 62 |
+
expected = np.array(["foo", "NaT", "foo"], dtype=object)
|
| 63 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
class TestDatetimeIndexRendering:
|
| 67 |
+
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
|
| 68 |
+
def test_dti_with_timezone_repr(self, tzstr):
|
| 69 |
+
rng = pd.date_range("4/13/2010", "5/6/2010")
|
| 70 |
+
|
| 71 |
+
rng_eastern = rng.tz_localize(tzstr)
|
| 72 |
+
|
| 73 |
+
rng_repr = repr(rng_eastern)
|
| 74 |
+
assert "2010-04-13 00:00:00" in rng_repr
|
| 75 |
+
|
| 76 |
+
def test_dti_repr_dates(self):
|
| 77 |
+
text = str(pd.to_datetime([datetime(2013, 1, 1), datetime(2014, 1, 1)]))
|
| 78 |
+
assert "['2013-01-01'," in text
|
| 79 |
+
assert ", '2014-01-01']" in text
|
| 80 |
+
|
| 81 |
+
def test_dti_repr_mixed(self):
|
| 82 |
+
text = str(
|
| 83 |
+
pd.to_datetime(
|
| 84 |
+
[datetime(2013, 1, 1), datetime(2014, 1, 1, 12), datetime(2014, 1, 1)]
|
| 85 |
+
)
|
| 86 |
+
)
|
| 87 |
+
assert "'2013-01-01 00:00:00'," in text
|
| 88 |
+
assert "'2014-01-01 00:00:00']" in text
|
| 89 |
+
|
| 90 |
+
def test_dti_repr_short(self):
|
| 91 |
+
dr = pd.date_range(start="1/1/2012", periods=1)
|
| 92 |
+
repr(dr)
|
| 93 |
+
|
| 94 |
+
dr = pd.date_range(start="1/1/2012", periods=2)
|
| 95 |
+
repr(dr)
|
| 96 |
+
|
| 97 |
+
dr = pd.date_range(start="1/1/2012", periods=3)
|
| 98 |
+
repr(dr)
|
| 99 |
+
|
| 100 |
+
@pytest.mark.parametrize(
|
| 101 |
+
"dates, freq, expected_repr",
|
| 102 |
+
[
|
| 103 |
+
(
|
| 104 |
+
["2012-01-01 00:00:00"],
|
| 105 |
+
"60min",
|
| 106 |
+
(
|
| 107 |
+
"DatetimeIndex(['2012-01-01 00:00:00'], "
|
| 108 |
+
"dtype='datetime64[ns]', freq='60min')"
|
| 109 |
+
),
|
| 110 |
+
),
|
| 111 |
+
(
|
| 112 |
+
["2012-01-01 00:00:00", "2012-01-01 01:00:00"],
|
| 113 |
+
"60min",
|
| 114 |
+
"DatetimeIndex(['2012-01-01 00:00:00', '2012-01-01 01:00:00'], "
|
| 115 |
+
"dtype='datetime64[ns]', freq='60min')",
|
| 116 |
+
),
|
| 117 |
+
(
|
| 118 |
+
["2012-01-01"],
|
| 119 |
+
"24h",
|
| 120 |
+
"DatetimeIndex(['2012-01-01'], dtype='datetime64[ns]', freq='24h')",
|
| 121 |
+
),
|
| 122 |
+
],
|
| 123 |
+
)
|
| 124 |
+
def test_dti_repr_time_midnight(self, dates, freq, expected_repr, unit):
|
| 125 |
+
# GH53634
|
| 126 |
+
dti = DatetimeIndex(dates, freq).as_unit(unit)
|
| 127 |
+
actual_repr = repr(dti)
|
| 128 |
+
assert actual_repr == expected_repr.replace("[ns]", f"[{unit}]")
|
| 129 |
+
|
| 130 |
+
def test_dti_representation(self, unit):
|
| 131 |
+
idxs = []
|
| 132 |
+
idxs.append(DatetimeIndex([], freq="D"))
|
| 133 |
+
idxs.append(DatetimeIndex(["2011-01-01"], freq="D"))
|
| 134 |
+
idxs.append(DatetimeIndex(["2011-01-01", "2011-01-02"], freq="D"))
|
| 135 |
+
idxs.append(DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D"))
|
| 136 |
+
idxs.append(
|
| 137 |
+
DatetimeIndex(
|
| 138 |
+
["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
|
| 139 |
+
freq="h",
|
| 140 |
+
tz="Asia/Tokyo",
|
| 141 |
+
)
|
| 142 |
+
)
|
| 143 |
+
idxs.append(
|
| 144 |
+
DatetimeIndex(
|
| 145 |
+
["2011-01-01 09:00", "2011-01-01 10:00", NaT], tz="US/Eastern"
|
| 146 |
+
)
|
| 147 |
+
)
|
| 148 |
+
idxs.append(
|
| 149 |
+
DatetimeIndex(["2011-01-01 09:00", "2011-01-01 10:00", NaT], tz="UTC")
|
| 150 |
+
)
|
| 151 |
+
|
| 152 |
+
exp = []
|
| 153 |
+
exp.append("DatetimeIndex([], dtype='datetime64[ns]', freq='D')")
|
| 154 |
+
exp.append("DatetimeIndex(['2011-01-01'], dtype='datetime64[ns]', freq='D')")
|
| 155 |
+
exp.append(
|
| 156 |
+
"DatetimeIndex(['2011-01-01', '2011-01-02'], "
|
| 157 |
+
"dtype='datetime64[ns]', freq='D')"
|
| 158 |
+
)
|
| 159 |
+
exp.append(
|
| 160 |
+
"DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], "
|
| 161 |
+
"dtype='datetime64[ns]', freq='D')"
|
| 162 |
+
)
|
| 163 |
+
exp.append(
|
| 164 |
+
"DatetimeIndex(['2011-01-01 09:00:00+09:00', "
|
| 165 |
+
"'2011-01-01 10:00:00+09:00', '2011-01-01 11:00:00+09:00']"
|
| 166 |
+
", dtype='datetime64[ns, Asia/Tokyo]', freq='h')"
|
| 167 |
+
)
|
| 168 |
+
exp.append(
|
| 169 |
+
"DatetimeIndex(['2011-01-01 09:00:00-05:00', "
|
| 170 |
+
"'2011-01-01 10:00:00-05:00', 'NaT'], "
|
| 171 |
+
"dtype='datetime64[ns, US/Eastern]', freq=None)"
|
| 172 |
+
)
|
| 173 |
+
exp.append(
|
| 174 |
+
"DatetimeIndex(['2011-01-01 09:00:00+00:00', "
|
| 175 |
+
"'2011-01-01 10:00:00+00:00', 'NaT'], "
|
| 176 |
+
"dtype='datetime64[ns, UTC]', freq=None)"
|
| 177 |
+
""
|
| 178 |
+
)
|
| 179 |
+
|
| 180 |
+
with pd.option_context("display.width", 300):
|
| 181 |
+
for index, expected in zip(idxs, exp):
|
| 182 |
+
index = index.as_unit(unit)
|
| 183 |
+
expected = expected.replace("[ns", f"[{unit}")
|
| 184 |
+
result = repr(index)
|
| 185 |
+
assert result == expected
|
| 186 |
+
result = str(index)
|
| 187 |
+
assert result == expected
|
| 188 |
+
|
| 189 |
+
# TODO: this is a Series.__repr__ test
|
| 190 |
+
def test_dti_representation_to_series(self, unit):
|
| 191 |
+
idx1 = DatetimeIndex([], freq="D")
|
| 192 |
+
idx2 = DatetimeIndex(["2011-01-01"], freq="D")
|
| 193 |
+
idx3 = DatetimeIndex(["2011-01-01", "2011-01-02"], freq="D")
|
| 194 |
+
idx4 = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D")
|
| 195 |
+
idx5 = DatetimeIndex(
|
| 196 |
+
["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
|
| 197 |
+
freq="h",
|
| 198 |
+
tz="Asia/Tokyo",
|
| 199 |
+
)
|
| 200 |
+
idx6 = DatetimeIndex(
|
| 201 |
+
["2011-01-01 09:00", "2011-01-01 10:00", NaT], tz="US/Eastern"
|
| 202 |
+
)
|
| 203 |
+
idx7 = DatetimeIndex(["2011-01-01 09:00", "2011-01-02 10:15"])
|
| 204 |
+
|
| 205 |
+
exp1 = """Series([], dtype: datetime64[ns])"""
|
| 206 |
+
|
| 207 |
+
exp2 = "0 2011-01-01\ndtype: datetime64[ns]"
|
| 208 |
+
|
| 209 |
+
exp3 = "0 2011-01-01\n1 2011-01-02\ndtype: datetime64[ns]"
|
| 210 |
+
|
| 211 |
+
exp4 = (
|
| 212 |
+
"0 2011-01-01\n"
|
| 213 |
+
"1 2011-01-02\n"
|
| 214 |
+
"2 2011-01-03\n"
|
| 215 |
+
"dtype: datetime64[ns]"
|
| 216 |
+
)
|
| 217 |
+
|
| 218 |
+
exp5 = (
|
| 219 |
+
"0 2011-01-01 09:00:00+09:00\n"
|
| 220 |
+
"1 2011-01-01 10:00:00+09:00\n"
|
| 221 |
+
"2 2011-01-01 11:00:00+09:00\n"
|
| 222 |
+
"dtype: datetime64[ns, Asia/Tokyo]"
|
| 223 |
+
)
|
| 224 |
+
|
| 225 |
+
exp6 = (
|
| 226 |
+
"0 2011-01-01 09:00:00-05:00\n"
|
| 227 |
+
"1 2011-01-01 10:00:00-05:00\n"
|
| 228 |
+
"2 NaT\n"
|
| 229 |
+
"dtype: datetime64[ns, US/Eastern]"
|
| 230 |
+
)
|
| 231 |
+
|
| 232 |
+
exp7 = (
|
| 233 |
+
"0 2011-01-01 09:00:00\n"
|
| 234 |
+
"1 2011-01-02 10:15:00\n"
|
| 235 |
+
"dtype: datetime64[ns]"
|
| 236 |
+
)
|
| 237 |
+
|
| 238 |
+
with pd.option_context("display.width", 300):
|
| 239 |
+
for idx, expected in zip(
|
| 240 |
+
[idx1, idx2, idx3, idx4, idx5, idx6, idx7],
|
| 241 |
+
[exp1, exp2, exp3, exp4, exp5, exp6, exp7],
|
| 242 |
+
):
|
| 243 |
+
ser = Series(idx.as_unit(unit))
|
| 244 |
+
result = repr(ser)
|
| 245 |
+
assert result == expected.replace("[ns", f"[{unit}")
|
| 246 |
+
|
| 247 |
+
def test_dti_summary(self):
|
| 248 |
+
# GH#9116
|
| 249 |
+
idx1 = DatetimeIndex([], freq="D")
|
| 250 |
+
idx2 = DatetimeIndex(["2011-01-01"], freq="D")
|
| 251 |
+
idx3 = DatetimeIndex(["2011-01-01", "2011-01-02"], freq="D")
|
| 252 |
+
idx4 = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D")
|
| 253 |
+
idx5 = DatetimeIndex(
|
| 254 |
+
["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
|
| 255 |
+
freq="h",
|
| 256 |
+
tz="Asia/Tokyo",
|
| 257 |
+
)
|
| 258 |
+
idx6 = DatetimeIndex(
|
| 259 |
+
["2011-01-01 09:00", "2011-01-01 10:00", NaT], tz="US/Eastern"
|
| 260 |
+
)
|
| 261 |
+
|
| 262 |
+
exp1 = "DatetimeIndex: 0 entries\nFreq: D"
|
| 263 |
+
|
| 264 |
+
exp2 = "DatetimeIndex: 1 entries, 2011-01-01 to 2011-01-01\nFreq: D"
|
| 265 |
+
|
| 266 |
+
exp3 = "DatetimeIndex: 2 entries, 2011-01-01 to 2011-01-02\nFreq: D"
|
| 267 |
+
|
| 268 |
+
exp4 = "DatetimeIndex: 3 entries, 2011-01-01 to 2011-01-03\nFreq: D"
|
| 269 |
+
|
| 270 |
+
exp5 = (
|
| 271 |
+
"DatetimeIndex: 3 entries, 2011-01-01 09:00:00+09:00 "
|
| 272 |
+
"to 2011-01-01 11:00:00+09:00\n"
|
| 273 |
+
"Freq: h"
|
| 274 |
+
)
|
| 275 |
+
|
| 276 |
+
exp6 = """DatetimeIndex: 3 entries, 2011-01-01 09:00:00-05:00 to NaT"""
|
| 277 |
+
|
| 278 |
+
for idx, expected in zip(
|
| 279 |
+
[idx1, idx2, idx3, idx4, idx5, idx6], [exp1, exp2, exp3, exp4, exp5, exp6]
|
| 280 |
+
):
|
| 281 |
+
result = idx._summary()
|
| 282 |
+
assert result == expected
|
| 283 |
+
|
| 284 |
+
@pytest.mark.parametrize("tz", [None, pytz.utc, dateutil.tz.tzutc()])
|
| 285 |
+
@pytest.mark.parametrize("freq", ["B", "C"])
|
| 286 |
+
def test_dti_business_repr_etc_smoke(self, tz, freq):
|
| 287 |
+
# only really care that it works
|
| 288 |
+
dti = pd.bdate_range(
|
| 289 |
+
datetime(2009, 1, 1), datetime(2010, 1, 1), tz=tz, freq=freq
|
| 290 |
+
)
|
| 291 |
+
repr(dti)
|
| 292 |
+
dti._summary()
|
| 293 |
+
dti[2:2]._summary()
|
| 294 |
+
|
| 295 |
+
|
| 296 |
+
class TestFormat:
|
| 297 |
+
def test_format(self):
|
| 298 |
+
# GH#35439
|
| 299 |
+
idx = pd.date_range("20130101", periods=5)
|
| 300 |
+
expected = [f"{x:%Y-%m-%d}" for x in idx]
|
| 301 |
+
msg = r"DatetimeIndex\.format is deprecated"
|
| 302 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 303 |
+
assert idx.format() == expected
|
| 304 |
+
|
| 305 |
+
def test_format_with_name_time_info(self):
|
| 306 |
+
# bug I fixed 12/20/2011
|
| 307 |
+
dates = pd.date_range("2011-01-01 04:00:00", periods=10, name="something")
|
| 308 |
+
|
| 309 |
+
msg = "DatetimeIndex.format is deprecated"
|
| 310 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 311 |
+
formatted = dates.format(name=True)
|
| 312 |
+
assert formatted[0] == "something"
|
| 313 |
+
|
| 314 |
+
def test_format_datetime_with_time(self):
|
| 315 |
+
dti = DatetimeIndex([datetime(2012, 2, 7), datetime(2012, 2, 7, 23)])
|
| 316 |
+
|
| 317 |
+
msg = "DatetimeIndex.format is deprecated"
|
| 318 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 319 |
+
result = dti.format()
|
| 320 |
+
expected = ["2012-02-07 00:00:00", "2012-02-07 23:00:00"]
|
| 321 |
+
assert len(result) == 2
|
| 322 |
+
assert result == expected
|
| 323 |
+
|
| 324 |
+
def test_format_datetime(self):
|
| 325 |
+
msg = "DatetimeIndex.format is deprecated"
|
| 326 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 327 |
+
formatted = pd.to_datetime([datetime(2003, 1, 1, 12), NaT]).format()
|
| 328 |
+
assert formatted[0] == "2003-01-01 12:00:00"
|
| 329 |
+
assert formatted[1] == "NaT"
|
| 330 |
+
|
| 331 |
+
def test_format_date(self):
|
| 332 |
+
msg = "DatetimeIndex.format is deprecated"
|
| 333 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 334 |
+
formatted = pd.to_datetime([datetime(2003, 1, 1), NaT]).format()
|
| 335 |
+
assert formatted[0] == "2003-01-01"
|
| 336 |
+
assert formatted[1] == "NaT"
|
| 337 |
+
|
| 338 |
+
def test_format_date_tz(self):
|
| 339 |
+
dti = pd.to_datetime([datetime(2013, 1, 1)], utc=True)
|
| 340 |
+
msg = "DatetimeIndex.format is deprecated"
|
| 341 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 342 |
+
formatted = dti.format()
|
| 343 |
+
assert formatted[0] == "2013-01-01 00:00:00+00:00"
|
| 344 |
+
|
| 345 |
+
dti = pd.to_datetime([datetime(2013, 1, 1), NaT], utc=True)
|
| 346 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 347 |
+
formatted = dti.format()
|
| 348 |
+
assert formatted[0] == "2013-01-01 00:00:00+00:00"
|
| 349 |
+
|
| 350 |
+
def test_format_date_explicit_date_format(self):
|
| 351 |
+
dti = pd.to_datetime([datetime(2003, 2, 1), NaT])
|
| 352 |
+
msg = "DatetimeIndex.format is deprecated"
|
| 353 |
+
with tm.assert_produces_warning(FutureWarning, match=msg):
|
| 354 |
+
formatted = dti.format(date_format="%m-%d-%Y", na_rep="UT")
|
| 355 |
+
assert formatted[0] == "02-01-2003"
|
| 356 |
+
assert formatted[1] == "UT"
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_freq_attr.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
|
| 3 |
+
from pandas import (
|
| 4 |
+
DatetimeIndex,
|
| 5 |
+
date_range,
|
| 6 |
+
)
|
| 7 |
+
|
| 8 |
+
from pandas.tseries.offsets import (
|
| 9 |
+
BDay,
|
| 10 |
+
DateOffset,
|
| 11 |
+
Day,
|
| 12 |
+
Hour,
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class TestFreq:
|
| 17 |
+
def test_freq_setter_errors(self):
|
| 18 |
+
# GH#20678
|
| 19 |
+
idx = DatetimeIndex(["20180101", "20180103", "20180105"])
|
| 20 |
+
|
| 21 |
+
# setting with an incompatible freq
|
| 22 |
+
msg = (
|
| 23 |
+
"Inferred frequency 2D from passed values does not conform to "
|
| 24 |
+
"passed frequency 5D"
|
| 25 |
+
)
|
| 26 |
+
with pytest.raises(ValueError, match=msg):
|
| 27 |
+
idx._data.freq = "5D"
|
| 28 |
+
|
| 29 |
+
# setting with non-freq string
|
| 30 |
+
with pytest.raises(ValueError, match="Invalid frequency"):
|
| 31 |
+
idx._data.freq = "foo"
|
| 32 |
+
|
| 33 |
+
@pytest.mark.parametrize("values", [["20180101", "20180103", "20180105"], []])
|
| 34 |
+
@pytest.mark.parametrize("freq", ["2D", Day(2), "2B", BDay(2), "48h", Hour(48)])
|
| 35 |
+
@pytest.mark.parametrize("tz", [None, "US/Eastern"])
|
| 36 |
+
def test_freq_setter(self, values, freq, tz):
|
| 37 |
+
# GH#20678
|
| 38 |
+
idx = DatetimeIndex(values, tz=tz)
|
| 39 |
+
|
| 40 |
+
# can set to an offset, converting from string if necessary
|
| 41 |
+
idx._data.freq = freq
|
| 42 |
+
assert idx.freq == freq
|
| 43 |
+
assert isinstance(idx.freq, DateOffset)
|
| 44 |
+
|
| 45 |
+
# can reset to None
|
| 46 |
+
idx._data.freq = None
|
| 47 |
+
assert idx.freq is None
|
| 48 |
+
|
| 49 |
+
def test_freq_view_safe(self):
|
| 50 |
+
# Setting the freq for one DatetimeIndex shouldn't alter the freq
|
| 51 |
+
# for another that views the same data
|
| 52 |
+
|
| 53 |
+
dti = date_range("2016-01-01", periods=5)
|
| 54 |
+
dta = dti._data
|
| 55 |
+
|
| 56 |
+
dti2 = DatetimeIndex(dta)._with_freq(None)
|
| 57 |
+
assert dti2.freq is None
|
| 58 |
+
|
| 59 |
+
# Original was not altered
|
| 60 |
+
assert dti.freq == "D"
|
| 61 |
+
assert dta.freq == "D"
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_indexing.py
ADDED
|
@@ -0,0 +1,717 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datetime import (
|
| 2 |
+
date,
|
| 3 |
+
datetime,
|
| 4 |
+
time,
|
| 5 |
+
timedelta,
|
| 6 |
+
)
|
| 7 |
+
|
| 8 |
+
import numpy as np
|
| 9 |
+
import pytest
|
| 10 |
+
|
| 11 |
+
from pandas._libs import index as libindex
|
| 12 |
+
from pandas.compat.numpy import np_long
|
| 13 |
+
|
| 14 |
+
import pandas as pd
|
| 15 |
+
from pandas import (
|
| 16 |
+
DatetimeIndex,
|
| 17 |
+
Index,
|
| 18 |
+
Timestamp,
|
| 19 |
+
bdate_range,
|
| 20 |
+
date_range,
|
| 21 |
+
notna,
|
| 22 |
+
)
|
| 23 |
+
import pandas._testing as tm
|
| 24 |
+
|
| 25 |
+
from pandas.tseries.frequencies import to_offset
|
| 26 |
+
|
| 27 |
+
START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
class TestGetItem:
|
| 31 |
+
def test_getitem_slice_keeps_name(self):
|
| 32 |
+
# GH4226
|
| 33 |
+
st = Timestamp("2013-07-01 00:00:00", tz="America/Los_Angeles")
|
| 34 |
+
et = Timestamp("2013-07-02 00:00:00", tz="America/Los_Angeles")
|
| 35 |
+
dr = date_range(st, et, freq="h", name="timebucket")
|
| 36 |
+
assert dr[1:].name == dr.name
|
| 37 |
+
|
| 38 |
+
@pytest.mark.parametrize("tz", [None, "Asia/Tokyo"])
|
| 39 |
+
def test_getitem(self, tz):
|
| 40 |
+
idx = date_range("2011-01-01", "2011-01-31", freq="D", tz=tz, name="idx")
|
| 41 |
+
|
| 42 |
+
result = idx[0]
|
| 43 |
+
assert result == Timestamp("2011-01-01", tz=idx.tz)
|
| 44 |
+
|
| 45 |
+
result = idx[0:5]
|
| 46 |
+
expected = date_range(
|
| 47 |
+
"2011-01-01", "2011-01-05", freq="D", tz=idx.tz, name="idx"
|
| 48 |
+
)
|
| 49 |
+
tm.assert_index_equal(result, expected)
|
| 50 |
+
assert result.freq == expected.freq
|
| 51 |
+
|
| 52 |
+
result = idx[0:10:2]
|
| 53 |
+
expected = date_range(
|
| 54 |
+
"2011-01-01", "2011-01-09", freq="2D", tz=idx.tz, name="idx"
|
| 55 |
+
)
|
| 56 |
+
tm.assert_index_equal(result, expected)
|
| 57 |
+
assert result.freq == expected.freq
|
| 58 |
+
|
| 59 |
+
result = idx[-20:-5:3]
|
| 60 |
+
expected = date_range(
|
| 61 |
+
"2011-01-12", "2011-01-24", freq="3D", tz=idx.tz, name="idx"
|
| 62 |
+
)
|
| 63 |
+
tm.assert_index_equal(result, expected)
|
| 64 |
+
assert result.freq == expected.freq
|
| 65 |
+
|
| 66 |
+
result = idx[4::-1]
|
| 67 |
+
expected = DatetimeIndex(
|
| 68 |
+
["2011-01-05", "2011-01-04", "2011-01-03", "2011-01-02", "2011-01-01"],
|
| 69 |
+
dtype=idx.dtype,
|
| 70 |
+
freq="-1D",
|
| 71 |
+
name="idx",
|
| 72 |
+
)
|
| 73 |
+
tm.assert_index_equal(result, expected)
|
| 74 |
+
assert result.freq == expected.freq
|
| 75 |
+
|
| 76 |
+
@pytest.mark.parametrize("freq", ["B", "C"])
|
| 77 |
+
def test_dti_business_getitem(self, freq):
|
| 78 |
+
rng = bdate_range(START, END, freq=freq)
|
| 79 |
+
smaller = rng[:5]
|
| 80 |
+
exp = DatetimeIndex(rng.view(np.ndarray)[:5], freq=freq)
|
| 81 |
+
tm.assert_index_equal(smaller, exp)
|
| 82 |
+
assert smaller.freq == exp.freq
|
| 83 |
+
assert smaller.freq == rng.freq
|
| 84 |
+
|
| 85 |
+
sliced = rng[::5]
|
| 86 |
+
assert sliced.freq == to_offset(freq) * 5
|
| 87 |
+
|
| 88 |
+
fancy_indexed = rng[[4, 3, 2, 1, 0]]
|
| 89 |
+
assert len(fancy_indexed) == 5
|
| 90 |
+
assert isinstance(fancy_indexed, DatetimeIndex)
|
| 91 |
+
assert fancy_indexed.freq is None
|
| 92 |
+
|
| 93 |
+
# 32-bit vs. 64-bit platforms
|
| 94 |
+
assert rng[4] == rng[np_long(4)]
|
| 95 |
+
|
| 96 |
+
@pytest.mark.parametrize("freq", ["B", "C"])
|
| 97 |
+
def test_dti_business_getitem_matplotlib_hackaround(self, freq):
|
| 98 |
+
rng = bdate_range(START, END, freq=freq)
|
| 99 |
+
with pytest.raises(ValueError, match="Multi-dimensional indexing"):
|
| 100 |
+
# GH#30588 multi-dimensional indexing deprecated
|
| 101 |
+
rng[:, None]
|
| 102 |
+
|
| 103 |
+
def test_getitem_int_list(self):
|
| 104 |
+
dti = date_range(start="1/1/2005", end="12/1/2005", freq="ME")
|
| 105 |
+
dti2 = dti[[1, 3, 5]]
|
| 106 |
+
|
| 107 |
+
v1 = dti2[0]
|
| 108 |
+
v2 = dti2[1]
|
| 109 |
+
v3 = dti2[2]
|
| 110 |
+
|
| 111 |
+
assert v1 == Timestamp("2/28/2005")
|
| 112 |
+
assert v2 == Timestamp("4/30/2005")
|
| 113 |
+
assert v3 == Timestamp("6/30/2005")
|
| 114 |
+
|
| 115 |
+
# getitem with non-slice drops freq
|
| 116 |
+
assert dti2.freq is None
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
class TestWhere:
|
| 120 |
+
def test_where_doesnt_retain_freq(self):
|
| 121 |
+
dti = date_range("20130101", periods=3, freq="D", name="idx")
|
| 122 |
+
cond = [True, True, False]
|
| 123 |
+
expected = DatetimeIndex([dti[0], dti[1], dti[0]], freq=None, name="idx")
|
| 124 |
+
|
| 125 |
+
result = dti.where(cond, dti[::-1])
|
| 126 |
+
tm.assert_index_equal(result, expected)
|
| 127 |
+
|
| 128 |
+
def test_where_other(self):
|
| 129 |
+
# other is ndarray or Index
|
| 130 |
+
i = date_range("20130101", periods=3, tz="US/Eastern")
|
| 131 |
+
|
| 132 |
+
for arr in [np.nan, pd.NaT]:
|
| 133 |
+
result = i.where(notna(i), other=arr)
|
| 134 |
+
expected = i
|
| 135 |
+
tm.assert_index_equal(result, expected)
|
| 136 |
+
|
| 137 |
+
i2 = i.copy()
|
| 138 |
+
i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist())
|
| 139 |
+
result = i.where(notna(i2), i2)
|
| 140 |
+
tm.assert_index_equal(result, i2)
|
| 141 |
+
|
| 142 |
+
i2 = i.copy()
|
| 143 |
+
i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist())
|
| 144 |
+
result = i.where(notna(i2), i2._values)
|
| 145 |
+
tm.assert_index_equal(result, i2)
|
| 146 |
+
|
| 147 |
+
def test_where_invalid_dtypes(self):
|
| 148 |
+
dti = date_range("20130101", periods=3, tz="US/Eastern")
|
| 149 |
+
|
| 150 |
+
tail = dti[2:].tolist()
|
| 151 |
+
i2 = Index([pd.NaT, pd.NaT] + tail)
|
| 152 |
+
|
| 153 |
+
mask = notna(i2)
|
| 154 |
+
|
| 155 |
+
# passing tz-naive ndarray to tzaware DTI
|
| 156 |
+
result = dti.where(mask, i2.values)
|
| 157 |
+
expected = Index([pd.NaT.asm8, pd.NaT.asm8] + tail, dtype=object)
|
| 158 |
+
tm.assert_index_equal(result, expected)
|
| 159 |
+
|
| 160 |
+
# passing tz-aware DTI to tznaive DTI
|
| 161 |
+
naive = dti.tz_localize(None)
|
| 162 |
+
result = naive.where(mask, i2)
|
| 163 |
+
expected = Index([i2[0], i2[1]] + naive[2:].tolist(), dtype=object)
|
| 164 |
+
tm.assert_index_equal(result, expected)
|
| 165 |
+
|
| 166 |
+
pi = i2.tz_localize(None).to_period("D")
|
| 167 |
+
result = dti.where(mask, pi)
|
| 168 |
+
expected = Index([pi[0], pi[1]] + tail, dtype=object)
|
| 169 |
+
tm.assert_index_equal(result, expected)
|
| 170 |
+
|
| 171 |
+
tda = i2.asi8.view("timedelta64[ns]")
|
| 172 |
+
result = dti.where(mask, tda)
|
| 173 |
+
expected = Index([tda[0], tda[1]] + tail, dtype=object)
|
| 174 |
+
assert isinstance(expected[0], np.timedelta64)
|
| 175 |
+
tm.assert_index_equal(result, expected)
|
| 176 |
+
|
| 177 |
+
result = dti.where(mask, i2.asi8)
|
| 178 |
+
expected = Index([pd.NaT._value, pd.NaT._value] + tail, dtype=object)
|
| 179 |
+
assert isinstance(expected[0], int)
|
| 180 |
+
tm.assert_index_equal(result, expected)
|
| 181 |
+
|
| 182 |
+
# non-matching scalar
|
| 183 |
+
td = pd.Timedelta(days=4)
|
| 184 |
+
result = dti.where(mask, td)
|
| 185 |
+
expected = Index([td, td] + tail, dtype=object)
|
| 186 |
+
assert expected[0] is td
|
| 187 |
+
tm.assert_index_equal(result, expected)
|
| 188 |
+
|
| 189 |
+
def test_where_mismatched_nat(self, tz_aware_fixture):
|
| 190 |
+
tz = tz_aware_fixture
|
| 191 |
+
dti = date_range("2013-01-01", periods=3, tz=tz)
|
| 192 |
+
cond = np.array([True, False, True])
|
| 193 |
+
|
| 194 |
+
tdnat = np.timedelta64("NaT", "ns")
|
| 195 |
+
expected = Index([dti[0], tdnat, dti[2]], dtype=object)
|
| 196 |
+
assert expected[1] is tdnat
|
| 197 |
+
|
| 198 |
+
result = dti.where(cond, tdnat)
|
| 199 |
+
tm.assert_index_equal(result, expected)
|
| 200 |
+
|
| 201 |
+
def test_where_tz(self):
|
| 202 |
+
i = date_range("20130101", periods=3, tz="US/Eastern")
|
| 203 |
+
result = i.where(notna(i))
|
| 204 |
+
expected = i
|
| 205 |
+
tm.assert_index_equal(result, expected)
|
| 206 |
+
|
| 207 |
+
i2 = i.copy()
|
| 208 |
+
i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist())
|
| 209 |
+
result = i.where(notna(i2))
|
| 210 |
+
expected = i2
|
| 211 |
+
tm.assert_index_equal(result, expected)
|
| 212 |
+
|
| 213 |
+
|
| 214 |
+
class TestTake:
|
| 215 |
+
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
|
| 216 |
+
def test_dti_take_dont_lose_meta(self, tzstr):
|
| 217 |
+
rng = date_range("1/1/2000", periods=20, tz=tzstr)
|
| 218 |
+
|
| 219 |
+
result = rng.take(range(5))
|
| 220 |
+
assert result.tz == rng.tz
|
| 221 |
+
assert result.freq == rng.freq
|
| 222 |
+
|
| 223 |
+
def test_take_nan_first_datetime(self):
|
| 224 |
+
index = DatetimeIndex([pd.NaT, Timestamp("20130101"), Timestamp("20130102")])
|
| 225 |
+
result = index.take([-1, 0, 1])
|
| 226 |
+
expected = DatetimeIndex([index[-1], index[0], index[1]])
|
| 227 |
+
tm.assert_index_equal(result, expected)
|
| 228 |
+
|
| 229 |
+
@pytest.mark.parametrize("tz", [None, "Asia/Tokyo"])
|
| 230 |
+
def test_take(self, tz):
|
| 231 |
+
# GH#10295
|
| 232 |
+
idx = date_range("2011-01-01", "2011-01-31", freq="D", name="idx", tz=tz)
|
| 233 |
+
|
| 234 |
+
result = idx.take([0])
|
| 235 |
+
assert result == Timestamp("2011-01-01", tz=idx.tz)
|
| 236 |
+
|
| 237 |
+
result = idx.take([0, 1, 2])
|
| 238 |
+
expected = date_range(
|
| 239 |
+
"2011-01-01", "2011-01-03", freq="D", tz=idx.tz, name="idx"
|
| 240 |
+
)
|
| 241 |
+
tm.assert_index_equal(result, expected)
|
| 242 |
+
assert result.freq == expected.freq
|
| 243 |
+
|
| 244 |
+
result = idx.take([0, 2, 4])
|
| 245 |
+
expected = date_range(
|
| 246 |
+
"2011-01-01", "2011-01-05", freq="2D", tz=idx.tz, name="idx"
|
| 247 |
+
)
|
| 248 |
+
tm.assert_index_equal(result, expected)
|
| 249 |
+
assert result.freq == expected.freq
|
| 250 |
+
|
| 251 |
+
result = idx.take([7, 4, 1])
|
| 252 |
+
expected = date_range(
|
| 253 |
+
"2011-01-08", "2011-01-02", freq="-3D", tz=idx.tz, name="idx"
|
| 254 |
+
)
|
| 255 |
+
tm.assert_index_equal(result, expected)
|
| 256 |
+
assert result.freq == expected.freq
|
| 257 |
+
|
| 258 |
+
result = idx.take([3, 2, 5])
|
| 259 |
+
expected = DatetimeIndex(
|
| 260 |
+
["2011-01-04", "2011-01-03", "2011-01-06"],
|
| 261 |
+
dtype=idx.dtype,
|
| 262 |
+
freq=None,
|
| 263 |
+
name="idx",
|
| 264 |
+
)
|
| 265 |
+
tm.assert_index_equal(result, expected)
|
| 266 |
+
assert result.freq is None
|
| 267 |
+
|
| 268 |
+
result = idx.take([-3, 2, 5])
|
| 269 |
+
expected = DatetimeIndex(
|
| 270 |
+
["2011-01-29", "2011-01-03", "2011-01-06"],
|
| 271 |
+
dtype=idx.dtype,
|
| 272 |
+
freq=None,
|
| 273 |
+
name="idx",
|
| 274 |
+
)
|
| 275 |
+
tm.assert_index_equal(result, expected)
|
| 276 |
+
assert result.freq is None
|
| 277 |
+
|
| 278 |
+
def test_take_invalid_kwargs(self):
|
| 279 |
+
idx = date_range("2011-01-01", "2011-01-31", freq="D", name="idx")
|
| 280 |
+
indices = [1, 6, 5, 9, 10, 13, 15, 3]
|
| 281 |
+
|
| 282 |
+
msg = r"take\(\) got an unexpected keyword argument 'foo'"
|
| 283 |
+
with pytest.raises(TypeError, match=msg):
|
| 284 |
+
idx.take(indices, foo=2)
|
| 285 |
+
|
| 286 |
+
msg = "the 'out' parameter is not supported"
|
| 287 |
+
with pytest.raises(ValueError, match=msg):
|
| 288 |
+
idx.take(indices, out=indices)
|
| 289 |
+
|
| 290 |
+
msg = "the 'mode' parameter is not supported"
|
| 291 |
+
with pytest.raises(ValueError, match=msg):
|
| 292 |
+
idx.take(indices, mode="clip")
|
| 293 |
+
|
| 294 |
+
# TODO: This method came from test_datetime; de-dup with version above
|
| 295 |
+
@pytest.mark.parametrize("tz", [None, "US/Eastern", "Asia/Tokyo"])
|
| 296 |
+
def test_take2(self, tz):
|
| 297 |
+
dates = [
|
| 298 |
+
datetime(2010, 1, 1, 14),
|
| 299 |
+
datetime(2010, 1, 1, 15),
|
| 300 |
+
datetime(2010, 1, 1, 17),
|
| 301 |
+
datetime(2010, 1, 1, 21),
|
| 302 |
+
]
|
| 303 |
+
|
| 304 |
+
idx = date_range(
|
| 305 |
+
start="2010-01-01 09:00",
|
| 306 |
+
end="2010-02-01 09:00",
|
| 307 |
+
freq="h",
|
| 308 |
+
tz=tz,
|
| 309 |
+
name="idx",
|
| 310 |
+
)
|
| 311 |
+
expected = DatetimeIndex(dates, freq=None, name="idx", dtype=idx.dtype)
|
| 312 |
+
|
| 313 |
+
taken1 = idx.take([5, 6, 8, 12])
|
| 314 |
+
taken2 = idx[[5, 6, 8, 12]]
|
| 315 |
+
|
| 316 |
+
for taken in [taken1, taken2]:
|
| 317 |
+
tm.assert_index_equal(taken, expected)
|
| 318 |
+
assert isinstance(taken, DatetimeIndex)
|
| 319 |
+
assert taken.freq is None
|
| 320 |
+
assert taken.tz == expected.tz
|
| 321 |
+
assert taken.name == expected.name
|
| 322 |
+
|
| 323 |
+
def test_take_fill_value(self):
|
| 324 |
+
# GH#12631
|
| 325 |
+
idx = DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx")
|
| 326 |
+
result = idx.take(np.array([1, 0, -1]))
|
| 327 |
+
expected = DatetimeIndex(["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx")
|
| 328 |
+
tm.assert_index_equal(result, expected)
|
| 329 |
+
|
| 330 |
+
# fill_value
|
| 331 |
+
result = idx.take(np.array([1, 0, -1]), fill_value=True)
|
| 332 |
+
expected = DatetimeIndex(["2011-02-01", "2011-01-01", "NaT"], name="xxx")
|
| 333 |
+
tm.assert_index_equal(result, expected)
|
| 334 |
+
|
| 335 |
+
# allow_fill=False
|
| 336 |
+
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
|
| 337 |
+
expected = DatetimeIndex(["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx")
|
| 338 |
+
tm.assert_index_equal(result, expected)
|
| 339 |
+
|
| 340 |
+
msg = (
|
| 341 |
+
"When allow_fill=True and fill_value is not None, "
|
| 342 |
+
"all indices must be >= -1"
|
| 343 |
+
)
|
| 344 |
+
with pytest.raises(ValueError, match=msg):
|
| 345 |
+
idx.take(np.array([1, 0, -2]), fill_value=True)
|
| 346 |
+
with pytest.raises(ValueError, match=msg):
|
| 347 |
+
idx.take(np.array([1, 0, -5]), fill_value=True)
|
| 348 |
+
|
| 349 |
+
msg = "out of bounds"
|
| 350 |
+
with pytest.raises(IndexError, match=msg):
|
| 351 |
+
idx.take(np.array([1, -5]))
|
| 352 |
+
|
| 353 |
+
def test_take_fill_value_with_timezone(self):
|
| 354 |
+
idx = DatetimeIndex(
|
| 355 |
+
["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx", tz="US/Eastern"
|
| 356 |
+
)
|
| 357 |
+
result = idx.take(np.array([1, 0, -1]))
|
| 358 |
+
expected = DatetimeIndex(
|
| 359 |
+
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx", tz="US/Eastern"
|
| 360 |
+
)
|
| 361 |
+
tm.assert_index_equal(result, expected)
|
| 362 |
+
|
| 363 |
+
# fill_value
|
| 364 |
+
result = idx.take(np.array([1, 0, -1]), fill_value=True)
|
| 365 |
+
expected = DatetimeIndex(
|
| 366 |
+
["2011-02-01", "2011-01-01", "NaT"], name="xxx", tz="US/Eastern"
|
| 367 |
+
)
|
| 368 |
+
tm.assert_index_equal(result, expected)
|
| 369 |
+
|
| 370 |
+
# allow_fill=False
|
| 371 |
+
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
|
| 372 |
+
expected = DatetimeIndex(
|
| 373 |
+
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx", tz="US/Eastern"
|
| 374 |
+
)
|
| 375 |
+
tm.assert_index_equal(result, expected)
|
| 376 |
+
|
| 377 |
+
msg = (
|
| 378 |
+
"When allow_fill=True and fill_value is not None, "
|
| 379 |
+
"all indices must be >= -1"
|
| 380 |
+
)
|
| 381 |
+
with pytest.raises(ValueError, match=msg):
|
| 382 |
+
idx.take(np.array([1, 0, -2]), fill_value=True)
|
| 383 |
+
with pytest.raises(ValueError, match=msg):
|
| 384 |
+
idx.take(np.array([1, 0, -5]), fill_value=True)
|
| 385 |
+
|
| 386 |
+
msg = "out of bounds"
|
| 387 |
+
with pytest.raises(IndexError, match=msg):
|
| 388 |
+
idx.take(np.array([1, -5]))
|
| 389 |
+
|
| 390 |
+
|
| 391 |
+
class TestGetLoc:
|
| 392 |
+
def test_get_loc_key_unit_mismatch(self):
|
| 393 |
+
idx = date_range("2000-01-01", periods=3)
|
| 394 |
+
key = idx[1].as_unit("ms")
|
| 395 |
+
loc = idx.get_loc(key)
|
| 396 |
+
assert loc == 1
|
| 397 |
+
assert key in idx
|
| 398 |
+
|
| 399 |
+
def test_get_loc_key_unit_mismatch_not_castable(self):
|
| 400 |
+
dta = date_range("2000-01-01", periods=3)._data.astype("M8[s]")
|
| 401 |
+
dti = DatetimeIndex(dta)
|
| 402 |
+
key = dta[0].as_unit("ns") + pd.Timedelta(1)
|
| 403 |
+
|
| 404 |
+
with pytest.raises(
|
| 405 |
+
KeyError, match=r"Timestamp\('2000-01-01 00:00:00.000000001'\)"
|
| 406 |
+
):
|
| 407 |
+
dti.get_loc(key)
|
| 408 |
+
|
| 409 |
+
assert key not in dti
|
| 410 |
+
|
| 411 |
+
def test_get_loc_time_obj(self):
|
| 412 |
+
# time indexing
|
| 413 |
+
idx = date_range("2000-01-01", periods=24, freq="h")
|
| 414 |
+
|
| 415 |
+
result = idx.get_loc(time(12))
|
| 416 |
+
expected = np.array([12])
|
| 417 |
+
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
|
| 418 |
+
|
| 419 |
+
result = idx.get_loc(time(12, 30))
|
| 420 |
+
expected = np.array([])
|
| 421 |
+
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
|
| 422 |
+
|
| 423 |
+
@pytest.mark.parametrize("offset", [-10, 10])
|
| 424 |
+
def test_get_loc_time_obj2(self, monkeypatch, offset):
|
| 425 |
+
# GH#8667
|
| 426 |
+
size_cutoff = 50
|
| 427 |
+
n = size_cutoff + offset
|
| 428 |
+
key = time(15, 11, 30)
|
| 429 |
+
start = key.hour * 3600 + key.minute * 60 + key.second
|
| 430 |
+
step = 24 * 3600
|
| 431 |
+
|
| 432 |
+
with monkeypatch.context():
|
| 433 |
+
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", size_cutoff)
|
| 434 |
+
idx = date_range("2014-11-26", periods=n, freq="s")
|
| 435 |
+
ts = pd.Series(np.random.default_rng(2).standard_normal(n), index=idx)
|
| 436 |
+
locs = np.arange(start, n, step, dtype=np.intp)
|
| 437 |
+
|
| 438 |
+
result = ts.index.get_loc(key)
|
| 439 |
+
tm.assert_numpy_array_equal(result, locs)
|
| 440 |
+
tm.assert_series_equal(ts[key], ts.iloc[locs])
|
| 441 |
+
|
| 442 |
+
left, right = ts.copy(), ts.copy()
|
| 443 |
+
left[key] *= -10
|
| 444 |
+
right.iloc[locs] *= -10
|
| 445 |
+
tm.assert_series_equal(left, right)
|
| 446 |
+
|
| 447 |
+
def test_get_loc_time_nat(self):
|
| 448 |
+
# GH#35114
|
| 449 |
+
# Case where key's total microseconds happens to match iNaT % 1e6 // 1000
|
| 450 |
+
tic = time(minute=12, second=43, microsecond=145224)
|
| 451 |
+
dti = DatetimeIndex([pd.NaT])
|
| 452 |
+
|
| 453 |
+
loc = dti.get_loc(tic)
|
| 454 |
+
expected = np.array([], dtype=np.intp)
|
| 455 |
+
tm.assert_numpy_array_equal(loc, expected)
|
| 456 |
+
|
| 457 |
+
def test_get_loc_nat(self):
|
| 458 |
+
# GH#20464
|
| 459 |
+
index = DatetimeIndex(["1/3/2000", "NaT"])
|
| 460 |
+
assert index.get_loc(pd.NaT) == 1
|
| 461 |
+
|
| 462 |
+
assert index.get_loc(None) == 1
|
| 463 |
+
|
| 464 |
+
assert index.get_loc(np.nan) == 1
|
| 465 |
+
|
| 466 |
+
assert index.get_loc(pd.NA) == 1
|
| 467 |
+
|
| 468 |
+
assert index.get_loc(np.datetime64("NaT")) == 1
|
| 469 |
+
|
| 470 |
+
with pytest.raises(KeyError, match="NaT"):
|
| 471 |
+
index.get_loc(np.timedelta64("NaT"))
|
| 472 |
+
|
| 473 |
+
@pytest.mark.parametrize("key", [pd.Timedelta(0), pd.Timedelta(1), timedelta(0)])
|
| 474 |
+
def test_get_loc_timedelta_invalid_key(self, key):
|
| 475 |
+
# GH#20464
|
| 476 |
+
dti = date_range("1970-01-01", periods=10)
|
| 477 |
+
msg = "Cannot index DatetimeIndex with [Tt]imedelta"
|
| 478 |
+
with pytest.raises(TypeError, match=msg):
|
| 479 |
+
dti.get_loc(key)
|
| 480 |
+
|
| 481 |
+
def test_get_loc_reasonable_key_error(self):
|
| 482 |
+
# GH#1062
|
| 483 |
+
index = DatetimeIndex(["1/3/2000"])
|
| 484 |
+
with pytest.raises(KeyError, match="2000"):
|
| 485 |
+
index.get_loc("1/1/2000")
|
| 486 |
+
|
| 487 |
+
def test_get_loc_year_str(self):
|
| 488 |
+
rng = date_range("1/1/2000", "1/1/2010")
|
| 489 |
+
|
| 490 |
+
result = rng.get_loc("2009")
|
| 491 |
+
expected = slice(3288, 3653)
|
| 492 |
+
assert result == expected
|
| 493 |
+
|
| 494 |
+
|
| 495 |
+
class TestContains:
|
| 496 |
+
def test_dti_contains_with_duplicates(self):
|
| 497 |
+
d = datetime(2011, 12, 5, 20, 30)
|
| 498 |
+
ix = DatetimeIndex([d, d])
|
| 499 |
+
assert d in ix
|
| 500 |
+
|
| 501 |
+
@pytest.mark.parametrize(
|
| 502 |
+
"vals",
|
| 503 |
+
[
|
| 504 |
+
[0, 1, 0],
|
| 505 |
+
[0, 0, -1],
|
| 506 |
+
[0, -1, -1],
|
| 507 |
+
["2015", "2015", "2016"],
|
| 508 |
+
["2015", "2015", "2014"],
|
| 509 |
+
],
|
| 510 |
+
)
|
| 511 |
+
def test_contains_nonunique(self, vals):
|
| 512 |
+
# GH#9512
|
| 513 |
+
idx = DatetimeIndex(vals)
|
| 514 |
+
assert idx[0] in idx
|
| 515 |
+
|
| 516 |
+
|
| 517 |
+
class TestGetIndexer:
|
| 518 |
+
def test_get_indexer_date_objs(self):
|
| 519 |
+
rng = date_range("1/1/2000", periods=20)
|
| 520 |
+
|
| 521 |
+
result = rng.get_indexer(rng.map(lambda x: x.date()))
|
| 522 |
+
expected = rng.get_indexer(rng)
|
| 523 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 524 |
+
|
| 525 |
+
def test_get_indexer(self):
|
| 526 |
+
idx = date_range("2000-01-01", periods=3)
|
| 527 |
+
exp = np.array([0, 1, 2], dtype=np.intp)
|
| 528 |
+
tm.assert_numpy_array_equal(idx.get_indexer(idx), exp)
|
| 529 |
+
|
| 530 |
+
target = idx[0] + pd.to_timedelta(["-1 hour", "12 hours", "1 day 1 hour"])
|
| 531 |
+
tm.assert_numpy_array_equal(
|
| 532 |
+
idx.get_indexer(target, "pad"), np.array([-1, 0, 1], dtype=np.intp)
|
| 533 |
+
)
|
| 534 |
+
tm.assert_numpy_array_equal(
|
| 535 |
+
idx.get_indexer(target, "backfill"), np.array([0, 1, 2], dtype=np.intp)
|
| 536 |
+
)
|
| 537 |
+
tm.assert_numpy_array_equal(
|
| 538 |
+
idx.get_indexer(target, "nearest"), np.array([0, 1, 1], dtype=np.intp)
|
| 539 |
+
)
|
| 540 |
+
tm.assert_numpy_array_equal(
|
| 541 |
+
idx.get_indexer(target, "nearest", tolerance=pd.Timedelta("1 hour")),
|
| 542 |
+
np.array([0, -1, 1], dtype=np.intp),
|
| 543 |
+
)
|
| 544 |
+
tol_raw = [
|
| 545 |
+
pd.Timedelta("1 hour"),
|
| 546 |
+
pd.Timedelta("1 hour"),
|
| 547 |
+
pd.Timedelta("1 hour").to_timedelta64(),
|
| 548 |
+
]
|
| 549 |
+
tm.assert_numpy_array_equal(
|
| 550 |
+
idx.get_indexer(
|
| 551 |
+
target, "nearest", tolerance=[np.timedelta64(x) for x in tol_raw]
|
| 552 |
+
),
|
| 553 |
+
np.array([0, -1, 1], dtype=np.intp),
|
| 554 |
+
)
|
| 555 |
+
tol_bad = [
|
| 556 |
+
pd.Timedelta("2 hour").to_timedelta64(),
|
| 557 |
+
pd.Timedelta("1 hour").to_timedelta64(),
|
| 558 |
+
"foo",
|
| 559 |
+
]
|
| 560 |
+
msg = "Could not convert 'foo' to NumPy timedelta"
|
| 561 |
+
with pytest.raises(ValueError, match=msg):
|
| 562 |
+
idx.get_indexer(target, "nearest", tolerance=tol_bad)
|
| 563 |
+
with pytest.raises(ValueError, match="abbreviation w/o a number"):
|
| 564 |
+
idx.get_indexer(idx[[0]], method="nearest", tolerance="foo")
|
| 565 |
+
|
| 566 |
+
@pytest.mark.parametrize(
|
| 567 |
+
"target",
|
| 568 |
+
[
|
| 569 |
+
[date(2020, 1, 1), Timestamp("2020-01-02")],
|
| 570 |
+
[Timestamp("2020-01-01"), date(2020, 1, 2)],
|
| 571 |
+
],
|
| 572 |
+
)
|
| 573 |
+
def test_get_indexer_mixed_dtypes(self, target):
|
| 574 |
+
# https://github.com/pandas-dev/pandas/issues/33741
|
| 575 |
+
values = DatetimeIndex([Timestamp("2020-01-01"), Timestamp("2020-01-02")])
|
| 576 |
+
result = values.get_indexer(target)
|
| 577 |
+
expected = np.array([0, 1], dtype=np.intp)
|
| 578 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 579 |
+
|
| 580 |
+
@pytest.mark.parametrize(
|
| 581 |
+
"target, positions",
|
| 582 |
+
[
|
| 583 |
+
([date(9999, 1, 1), Timestamp("2020-01-01")], [-1, 0]),
|
| 584 |
+
([Timestamp("2020-01-01"), date(9999, 1, 1)], [0, -1]),
|
| 585 |
+
([date(9999, 1, 1), date(9999, 1, 1)], [-1, -1]),
|
| 586 |
+
],
|
| 587 |
+
)
|
| 588 |
+
def test_get_indexer_out_of_bounds_date(self, target, positions):
|
| 589 |
+
values = DatetimeIndex([Timestamp("2020-01-01"), Timestamp("2020-01-02")])
|
| 590 |
+
|
| 591 |
+
result = values.get_indexer(target)
|
| 592 |
+
expected = np.array(positions, dtype=np.intp)
|
| 593 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 594 |
+
|
| 595 |
+
def test_get_indexer_pad_requires_monotonicity(self):
|
| 596 |
+
rng = date_range("1/1/2000", "3/1/2000", freq="B")
|
| 597 |
+
|
| 598 |
+
# neither monotonic increasing or decreasing
|
| 599 |
+
rng2 = rng[[1, 0, 2]]
|
| 600 |
+
|
| 601 |
+
msg = "index must be monotonic increasing or decreasing"
|
| 602 |
+
with pytest.raises(ValueError, match=msg):
|
| 603 |
+
rng2.get_indexer(rng, method="pad")
|
| 604 |
+
|
| 605 |
+
|
| 606 |
+
class TestMaybeCastSliceBound:
|
| 607 |
+
def test_maybe_cast_slice_bounds_empty(self):
|
| 608 |
+
# GH#14354
|
| 609 |
+
empty_idx = date_range(freq="1h", periods=0, end="2015")
|
| 610 |
+
|
| 611 |
+
right = empty_idx._maybe_cast_slice_bound("2015-01-02", "right")
|
| 612 |
+
exp = Timestamp("2015-01-02 23:59:59.999999999")
|
| 613 |
+
assert right == exp
|
| 614 |
+
|
| 615 |
+
left = empty_idx._maybe_cast_slice_bound("2015-01-02", "left")
|
| 616 |
+
exp = Timestamp("2015-01-02 00:00:00")
|
| 617 |
+
assert left == exp
|
| 618 |
+
|
| 619 |
+
def test_maybe_cast_slice_duplicate_monotonic(self):
|
| 620 |
+
# https://github.com/pandas-dev/pandas/issues/16515
|
| 621 |
+
idx = DatetimeIndex(["2017", "2017"])
|
| 622 |
+
result = idx._maybe_cast_slice_bound("2017-01-01", "left")
|
| 623 |
+
expected = Timestamp("2017-01-01")
|
| 624 |
+
assert result == expected
|
| 625 |
+
|
| 626 |
+
|
| 627 |
+
class TestGetSliceBounds:
|
| 628 |
+
@pytest.mark.parametrize("box", [date, datetime, Timestamp])
|
| 629 |
+
@pytest.mark.parametrize("side, expected", [("left", 4), ("right", 5)])
|
| 630 |
+
def test_get_slice_bounds_datetime_within(
|
| 631 |
+
self, box, side, expected, tz_aware_fixture
|
| 632 |
+
):
|
| 633 |
+
# GH 35690
|
| 634 |
+
tz = tz_aware_fixture
|
| 635 |
+
index = bdate_range("2000-01-03", "2000-02-11").tz_localize(tz)
|
| 636 |
+
key = box(year=2000, month=1, day=7)
|
| 637 |
+
|
| 638 |
+
if tz is not None:
|
| 639 |
+
with pytest.raises(TypeError, match="Cannot compare tz-naive"):
|
| 640 |
+
# GH#36148 we require tzawareness-compat as of 2.0
|
| 641 |
+
index.get_slice_bound(key, side=side)
|
| 642 |
+
else:
|
| 643 |
+
result = index.get_slice_bound(key, side=side)
|
| 644 |
+
assert result == expected
|
| 645 |
+
|
| 646 |
+
@pytest.mark.parametrize("box", [datetime, Timestamp])
|
| 647 |
+
@pytest.mark.parametrize("side", ["left", "right"])
|
| 648 |
+
@pytest.mark.parametrize("year, expected", [(1999, 0), (2020, 30)])
|
| 649 |
+
def test_get_slice_bounds_datetime_outside(
|
| 650 |
+
self, box, side, year, expected, tz_aware_fixture
|
| 651 |
+
):
|
| 652 |
+
# GH 35690
|
| 653 |
+
tz = tz_aware_fixture
|
| 654 |
+
index = bdate_range("2000-01-03", "2000-02-11").tz_localize(tz)
|
| 655 |
+
key = box(year=year, month=1, day=7)
|
| 656 |
+
|
| 657 |
+
if tz is not None:
|
| 658 |
+
with pytest.raises(TypeError, match="Cannot compare tz-naive"):
|
| 659 |
+
# GH#36148 we require tzawareness-compat as of 2.0
|
| 660 |
+
index.get_slice_bound(key, side=side)
|
| 661 |
+
else:
|
| 662 |
+
result = index.get_slice_bound(key, side=side)
|
| 663 |
+
assert result == expected
|
| 664 |
+
|
| 665 |
+
@pytest.mark.parametrize("box", [datetime, Timestamp])
|
| 666 |
+
def test_slice_datetime_locs(self, box, tz_aware_fixture):
|
| 667 |
+
# GH 34077
|
| 668 |
+
tz = tz_aware_fixture
|
| 669 |
+
index = DatetimeIndex(["2010-01-01", "2010-01-03"]).tz_localize(tz)
|
| 670 |
+
key = box(2010, 1, 1)
|
| 671 |
+
|
| 672 |
+
if tz is not None:
|
| 673 |
+
with pytest.raises(TypeError, match="Cannot compare tz-naive"):
|
| 674 |
+
# GH#36148 we require tzawareness-compat as of 2.0
|
| 675 |
+
index.slice_locs(key, box(2010, 1, 2))
|
| 676 |
+
else:
|
| 677 |
+
result = index.slice_locs(key, box(2010, 1, 2))
|
| 678 |
+
expected = (0, 1)
|
| 679 |
+
assert result == expected
|
| 680 |
+
|
| 681 |
+
|
| 682 |
+
class TestIndexerBetweenTime:
|
| 683 |
+
def test_indexer_between_time(self):
|
| 684 |
+
# GH#11818
|
| 685 |
+
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
|
| 686 |
+
msg = r"Cannot convert arg \[datetime\.datetime\(2010, 1, 2, 1, 0\)\] to a time"
|
| 687 |
+
with pytest.raises(ValueError, match=msg):
|
| 688 |
+
rng.indexer_between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5))
|
| 689 |
+
|
| 690 |
+
@pytest.mark.parametrize("unit", ["us", "ms", "s"])
|
| 691 |
+
def test_indexer_between_time_non_nano(self, unit):
|
| 692 |
+
# For simple cases like this, the non-nano indexer_between_time
|
| 693 |
+
# should match the nano result
|
| 694 |
+
|
| 695 |
+
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
|
| 696 |
+
arr_nano = rng._data._ndarray
|
| 697 |
+
|
| 698 |
+
arr = arr_nano.astype(f"M8[{unit}]")
|
| 699 |
+
|
| 700 |
+
dta = type(rng._data)._simple_new(arr, dtype=arr.dtype)
|
| 701 |
+
dti = DatetimeIndex(dta)
|
| 702 |
+
assert dti.dtype == arr.dtype
|
| 703 |
+
|
| 704 |
+
tic = time(1, 25)
|
| 705 |
+
toc = time(2, 29)
|
| 706 |
+
|
| 707 |
+
result = dti.indexer_between_time(tic, toc)
|
| 708 |
+
expected = rng.indexer_between_time(tic, toc)
|
| 709 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 710 |
+
|
| 711 |
+
# case with non-zero micros in arguments
|
| 712 |
+
tic = time(1, 25, 0, 45678)
|
| 713 |
+
toc = time(2, 29, 0, 1234)
|
| 714 |
+
|
| 715 |
+
result = dti.indexer_between_time(tic, toc)
|
| 716 |
+
expected = rng.indexer_between_time(tic, toc)
|
| 717 |
+
tm.assert_numpy_array_equal(result, expected)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_iter.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import dateutil.tz
|
| 2 |
+
import numpy as np
|
| 3 |
+
import pytest
|
| 4 |
+
|
| 5 |
+
from pandas import (
|
| 6 |
+
DatetimeIndex,
|
| 7 |
+
date_range,
|
| 8 |
+
to_datetime,
|
| 9 |
+
)
|
| 10 |
+
from pandas.core.arrays import datetimes
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class TestDatetimeIndexIteration:
|
| 14 |
+
@pytest.mark.parametrize(
|
| 15 |
+
"tz", [None, "UTC", "US/Central", dateutil.tz.tzoffset(None, -28800)]
|
| 16 |
+
)
|
| 17 |
+
def test_iteration_preserves_nanoseconds(self, tz):
|
| 18 |
+
# GH#19603
|
| 19 |
+
index = DatetimeIndex(
|
| 20 |
+
["2018-02-08 15:00:00.168456358", "2018-02-08 15:00:00.168456359"], tz=tz
|
| 21 |
+
)
|
| 22 |
+
for i, ts in enumerate(index):
|
| 23 |
+
assert ts == index[i] # pylint: disable=unnecessary-list-index-lookup
|
| 24 |
+
|
| 25 |
+
def test_iter_readonly(self):
|
| 26 |
+
# GH#28055 ints_to_pydatetime with readonly array
|
| 27 |
+
arr = np.array([np.datetime64("2012-02-15T12:00:00.000000000")])
|
| 28 |
+
arr.setflags(write=False)
|
| 29 |
+
dti = to_datetime(arr)
|
| 30 |
+
list(dti)
|
| 31 |
+
|
| 32 |
+
def test_iteration_preserves_tz(self):
|
| 33 |
+
# see GH#8890
|
| 34 |
+
index = date_range("2012-01-01", periods=3, freq="h", tz="US/Eastern")
|
| 35 |
+
|
| 36 |
+
for i, ts in enumerate(index):
|
| 37 |
+
result = ts
|
| 38 |
+
expected = index[i] # pylint: disable=unnecessary-list-index-lookup
|
| 39 |
+
assert result == expected
|
| 40 |
+
|
| 41 |
+
def test_iteration_preserves_tz2(self):
|
| 42 |
+
index = date_range(
|
| 43 |
+
"2012-01-01", periods=3, freq="h", tz=dateutil.tz.tzoffset(None, -28800)
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
for i, ts in enumerate(index):
|
| 47 |
+
result = ts
|
| 48 |
+
expected = index[i] # pylint: disable=unnecessary-list-index-lookup
|
| 49 |
+
assert result._repr_base == expected._repr_base
|
| 50 |
+
assert result == expected
|
| 51 |
+
|
| 52 |
+
def test_iteration_preserves_tz3(self):
|
| 53 |
+
# GH#9100
|
| 54 |
+
index = DatetimeIndex(
|
| 55 |
+
["2014-12-01 03:32:39.987000-08:00", "2014-12-01 04:12:34.987000-08:00"]
|
| 56 |
+
)
|
| 57 |
+
for i, ts in enumerate(index):
|
| 58 |
+
result = ts
|
| 59 |
+
expected = index[i] # pylint: disable=unnecessary-list-index-lookup
|
| 60 |
+
assert result._repr_base == expected._repr_base
|
| 61 |
+
assert result == expected
|
| 62 |
+
|
| 63 |
+
@pytest.mark.parametrize("offset", [-5, -1, 0, 1])
|
| 64 |
+
def test_iteration_over_chunksize(self, offset, monkeypatch):
|
| 65 |
+
# GH#21012
|
| 66 |
+
chunksize = 5
|
| 67 |
+
index = date_range(
|
| 68 |
+
"2000-01-01 00:00:00", periods=chunksize - offset, freq="min"
|
| 69 |
+
)
|
| 70 |
+
num = 0
|
| 71 |
+
with monkeypatch.context() as m:
|
| 72 |
+
m.setattr(datetimes, "_ITER_CHUNKSIZE", chunksize)
|
| 73 |
+
for stamp in index:
|
| 74 |
+
assert index[num] == stamp
|
| 75 |
+
num += 1
|
| 76 |
+
assert num == len(index)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_join.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datetime import (
|
| 2 |
+
datetime,
|
| 3 |
+
timezone,
|
| 4 |
+
)
|
| 5 |
+
|
| 6 |
+
import numpy as np
|
| 7 |
+
import pytest
|
| 8 |
+
|
| 9 |
+
from pandas import (
|
| 10 |
+
DataFrame,
|
| 11 |
+
DatetimeIndex,
|
| 12 |
+
Index,
|
| 13 |
+
Timestamp,
|
| 14 |
+
date_range,
|
| 15 |
+
period_range,
|
| 16 |
+
to_datetime,
|
| 17 |
+
)
|
| 18 |
+
import pandas._testing as tm
|
| 19 |
+
|
| 20 |
+
from pandas.tseries.offsets import (
|
| 21 |
+
BDay,
|
| 22 |
+
BMonthEnd,
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class TestJoin:
|
| 27 |
+
def test_does_not_convert_mixed_integer(self):
|
| 28 |
+
df = DataFrame(np.ones((3, 2)), columns=date_range("2020-01-01", periods=2))
|
| 29 |
+
cols = df.columns.join(df.index, how="outer")
|
| 30 |
+
joined = cols.join(df.columns)
|
| 31 |
+
assert cols.dtype == np.dtype("O")
|
| 32 |
+
assert cols.dtype == joined.dtype
|
| 33 |
+
tm.assert_numpy_array_equal(cols.values, joined.values)
|
| 34 |
+
|
| 35 |
+
def test_join_self(self, join_type):
|
| 36 |
+
index = date_range("1/1/2000", periods=10)
|
| 37 |
+
joined = index.join(index, how=join_type)
|
| 38 |
+
assert index is joined
|
| 39 |
+
|
| 40 |
+
def test_join_with_period_index(self, join_type):
|
| 41 |
+
df = DataFrame(
|
| 42 |
+
np.ones((10, 2)),
|
| 43 |
+
index=date_range("2020-01-01", periods=10),
|
| 44 |
+
columns=period_range("2020-01-01", periods=2),
|
| 45 |
+
)
|
| 46 |
+
s = df.iloc[:5, 0]
|
| 47 |
+
|
| 48 |
+
expected = df.columns.astype("O").join(s.index, how=join_type)
|
| 49 |
+
result = df.columns.join(s.index, how=join_type)
|
| 50 |
+
tm.assert_index_equal(expected, result)
|
| 51 |
+
|
| 52 |
+
def test_join_object_index(self):
|
| 53 |
+
rng = date_range("1/1/2000", periods=10)
|
| 54 |
+
idx = Index(["a", "b", "c", "d"])
|
| 55 |
+
|
| 56 |
+
result = rng.join(idx, how="outer")
|
| 57 |
+
assert isinstance(result[0], Timestamp)
|
| 58 |
+
|
| 59 |
+
def test_join_utc_convert(self, join_type):
|
| 60 |
+
rng = date_range("1/1/2011", periods=100, freq="h", tz="utc")
|
| 61 |
+
|
| 62 |
+
left = rng.tz_convert("US/Eastern")
|
| 63 |
+
right = rng.tz_convert("Europe/Berlin")
|
| 64 |
+
|
| 65 |
+
result = left.join(left[:-5], how=join_type)
|
| 66 |
+
assert isinstance(result, DatetimeIndex)
|
| 67 |
+
assert result.tz == left.tz
|
| 68 |
+
|
| 69 |
+
result = left.join(right[:-5], how=join_type)
|
| 70 |
+
assert isinstance(result, DatetimeIndex)
|
| 71 |
+
assert result.tz is timezone.utc
|
| 72 |
+
|
| 73 |
+
def test_datetimeindex_union_join_empty(self, sort, using_infer_string):
|
| 74 |
+
dti = date_range(start="1/1/2001", end="2/1/2001", freq="D")
|
| 75 |
+
empty = Index([])
|
| 76 |
+
|
| 77 |
+
result = dti.union(empty, sort=sort)
|
| 78 |
+
if using_infer_string:
|
| 79 |
+
assert isinstance(result, DatetimeIndex)
|
| 80 |
+
tm.assert_index_equal(result, dti)
|
| 81 |
+
else:
|
| 82 |
+
expected = dti.astype("O")
|
| 83 |
+
tm.assert_index_equal(result, expected)
|
| 84 |
+
|
| 85 |
+
result = dti.join(empty)
|
| 86 |
+
assert isinstance(result, DatetimeIndex)
|
| 87 |
+
tm.assert_index_equal(result, dti)
|
| 88 |
+
|
| 89 |
+
def test_join_nonunique(self):
|
| 90 |
+
idx1 = to_datetime(["2012-11-06 16:00:11.477563", "2012-11-06 16:00:11.477563"])
|
| 91 |
+
idx2 = to_datetime(["2012-11-06 15:11:09.006507", "2012-11-06 15:11:09.006507"])
|
| 92 |
+
rs = idx1.join(idx2, how="outer")
|
| 93 |
+
assert rs.is_monotonic_increasing
|
| 94 |
+
|
| 95 |
+
@pytest.mark.parametrize("freq", ["B", "C"])
|
| 96 |
+
def test_outer_join(self, freq):
|
| 97 |
+
# should just behave as union
|
| 98 |
+
start, end = datetime(2009, 1, 1), datetime(2010, 1, 1)
|
| 99 |
+
rng = date_range(start=start, end=end, freq=freq)
|
| 100 |
+
|
| 101 |
+
# overlapping
|
| 102 |
+
left = rng[:10]
|
| 103 |
+
right = rng[5:10]
|
| 104 |
+
|
| 105 |
+
the_join = left.join(right, how="outer")
|
| 106 |
+
assert isinstance(the_join, DatetimeIndex)
|
| 107 |
+
|
| 108 |
+
# non-overlapping, gap in middle
|
| 109 |
+
left = rng[:5]
|
| 110 |
+
right = rng[10:]
|
| 111 |
+
|
| 112 |
+
the_join = left.join(right, how="outer")
|
| 113 |
+
assert isinstance(the_join, DatetimeIndex)
|
| 114 |
+
assert the_join.freq is None
|
| 115 |
+
|
| 116 |
+
# non-overlapping, no gap
|
| 117 |
+
left = rng[:5]
|
| 118 |
+
right = rng[5:10]
|
| 119 |
+
|
| 120 |
+
the_join = left.join(right, how="outer")
|
| 121 |
+
assert isinstance(the_join, DatetimeIndex)
|
| 122 |
+
|
| 123 |
+
# overlapping, but different offset
|
| 124 |
+
other = date_range(start, end, freq=BMonthEnd())
|
| 125 |
+
|
| 126 |
+
the_join = rng.join(other, how="outer")
|
| 127 |
+
assert isinstance(the_join, DatetimeIndex)
|
| 128 |
+
assert the_join.freq is None
|
| 129 |
+
|
| 130 |
+
def test_naive_aware_conflicts(self):
|
| 131 |
+
start, end = datetime(2009, 1, 1), datetime(2010, 1, 1)
|
| 132 |
+
naive = date_range(start, end, freq=BDay(), tz=None)
|
| 133 |
+
aware = date_range(start, end, freq=BDay(), tz="Asia/Hong_Kong")
|
| 134 |
+
|
| 135 |
+
msg = "tz-naive.*tz-aware"
|
| 136 |
+
with pytest.raises(TypeError, match=msg):
|
| 137 |
+
naive.join(aware)
|
| 138 |
+
|
| 139 |
+
with pytest.raises(TypeError, match=msg):
|
| 140 |
+
aware.join(naive)
|
| 141 |
+
|
| 142 |
+
@pytest.mark.parametrize("tz", [None, "US/Pacific"])
|
| 143 |
+
def test_join_preserves_freq(self, tz):
|
| 144 |
+
# GH#32157
|
| 145 |
+
dti = date_range("2016-01-01", periods=10, tz=tz)
|
| 146 |
+
result = dti[:5].join(dti[5:], how="outer")
|
| 147 |
+
assert result.freq == dti.freq
|
| 148 |
+
tm.assert_index_equal(result, dti)
|
| 149 |
+
|
| 150 |
+
result = dti[:5].join(dti[6:], how="outer")
|
| 151 |
+
assert result.freq is None
|
| 152 |
+
expected = dti.delete(5)
|
| 153 |
+
tm.assert_index_equal(result, expected)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_npfuncs.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
|
| 3 |
+
from pandas import date_range
|
| 4 |
+
import pandas._testing as tm
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class TestSplit:
|
| 8 |
+
def test_split_non_utc(self):
|
| 9 |
+
# GH#14042
|
| 10 |
+
indices = date_range("2016-01-01 00:00:00+0200", freq="s", periods=10)
|
| 11 |
+
result = np.split(indices, indices_or_sections=[])[0]
|
| 12 |
+
expected = indices._with_freq(None)
|
| 13 |
+
tm.assert_index_equal(result, expected)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_ops.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datetime import datetime
|
| 2 |
+
|
| 3 |
+
import pytest
|
| 4 |
+
|
| 5 |
+
from pandas import (
|
| 6 |
+
DatetimeIndex,
|
| 7 |
+
Index,
|
| 8 |
+
bdate_range,
|
| 9 |
+
date_range,
|
| 10 |
+
)
|
| 11 |
+
import pandas._testing as tm
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class TestDatetimeIndexOps:
|
| 15 |
+
def test_infer_freq(self, freq_sample):
|
| 16 |
+
# GH 11018
|
| 17 |
+
idx = date_range("2011-01-01 09:00:00", freq=freq_sample, periods=10)
|
| 18 |
+
result = DatetimeIndex(idx.asi8, freq="infer")
|
| 19 |
+
tm.assert_index_equal(idx, result)
|
| 20 |
+
assert result.freq == freq_sample
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
@pytest.mark.parametrize("freq", ["B", "C"])
|
| 24 |
+
class TestBusinessDatetimeIndex:
|
| 25 |
+
@pytest.fixture
|
| 26 |
+
def rng(self, freq):
|
| 27 |
+
START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
|
| 28 |
+
return bdate_range(START, END, freq=freq)
|
| 29 |
+
|
| 30 |
+
def test_comparison(self, rng):
|
| 31 |
+
d = rng[10]
|
| 32 |
+
|
| 33 |
+
comp = rng > d
|
| 34 |
+
assert comp[11]
|
| 35 |
+
assert not comp[9]
|
| 36 |
+
|
| 37 |
+
def test_copy(self, rng):
|
| 38 |
+
cp = rng.copy()
|
| 39 |
+
tm.assert_index_equal(cp, rng)
|
| 40 |
+
|
| 41 |
+
def test_identical(self, rng):
|
| 42 |
+
t1 = rng.copy()
|
| 43 |
+
t2 = rng.copy()
|
| 44 |
+
assert t1.identical(t2)
|
| 45 |
+
|
| 46 |
+
# name
|
| 47 |
+
t1 = t1.rename("foo")
|
| 48 |
+
assert t1.equals(t2)
|
| 49 |
+
assert not t1.identical(t2)
|
| 50 |
+
t2 = t2.rename("foo")
|
| 51 |
+
assert t1.identical(t2)
|
| 52 |
+
|
| 53 |
+
# freq
|
| 54 |
+
t2v = Index(t2.values)
|
| 55 |
+
assert t1.equals(t2v)
|
| 56 |
+
assert not t1.identical(t2v)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_partial_slicing.py
ADDED
|
@@ -0,0 +1,466 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
""" test partial slicing on Series/Frame """
|
| 2 |
+
|
| 3 |
+
from datetime import datetime
|
| 4 |
+
|
| 5 |
+
import numpy as np
|
| 6 |
+
import pytest
|
| 7 |
+
|
| 8 |
+
from pandas import (
|
| 9 |
+
DataFrame,
|
| 10 |
+
DatetimeIndex,
|
| 11 |
+
Index,
|
| 12 |
+
MultiIndex,
|
| 13 |
+
Series,
|
| 14 |
+
Timedelta,
|
| 15 |
+
Timestamp,
|
| 16 |
+
date_range,
|
| 17 |
+
)
|
| 18 |
+
import pandas._testing as tm
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class TestSlicing:
|
| 22 |
+
def test_string_index_series_name_converted(self):
|
| 23 |
+
# GH#1644
|
| 24 |
+
df = DataFrame(
|
| 25 |
+
np.random.default_rng(2).standard_normal((10, 4)),
|
| 26 |
+
index=date_range("1/1/2000", periods=10),
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
result = df.loc["1/3/2000"]
|
| 30 |
+
assert result.name == df.index[2]
|
| 31 |
+
|
| 32 |
+
result = df.T["1/3/2000"]
|
| 33 |
+
assert result.name == df.index[2]
|
| 34 |
+
|
| 35 |
+
def test_stringified_slice_with_tz(self):
|
| 36 |
+
# GH#2658
|
| 37 |
+
start = "2013-01-07"
|
| 38 |
+
idx = date_range(start=start, freq="1d", periods=10, tz="US/Eastern")
|
| 39 |
+
df = DataFrame(np.arange(10), index=idx)
|
| 40 |
+
df["2013-01-14 23:44:34.437768-05:00":] # no exception here
|
| 41 |
+
|
| 42 |
+
def test_return_type_doesnt_depend_on_monotonicity(self):
|
| 43 |
+
# GH#24892 we get Series back regardless of whether our DTI is monotonic
|
| 44 |
+
dti = date_range(start="2015-5-13 23:59:00", freq="min", periods=3)
|
| 45 |
+
ser = Series(range(3), index=dti)
|
| 46 |
+
|
| 47 |
+
# non-monotonic index
|
| 48 |
+
ser2 = Series(range(3), index=[dti[1], dti[0], dti[2]])
|
| 49 |
+
|
| 50 |
+
# key with resolution strictly lower than "min"
|
| 51 |
+
key = "2015-5-14 00"
|
| 52 |
+
|
| 53 |
+
# monotonic increasing index
|
| 54 |
+
result = ser.loc[key]
|
| 55 |
+
expected = ser.iloc[1:]
|
| 56 |
+
tm.assert_series_equal(result, expected)
|
| 57 |
+
|
| 58 |
+
# monotonic decreasing index
|
| 59 |
+
result = ser.iloc[::-1].loc[key]
|
| 60 |
+
expected = ser.iloc[::-1][:-1]
|
| 61 |
+
tm.assert_series_equal(result, expected)
|
| 62 |
+
|
| 63 |
+
# non-monotonic index
|
| 64 |
+
result2 = ser2.loc[key]
|
| 65 |
+
expected2 = ser2.iloc[::2]
|
| 66 |
+
tm.assert_series_equal(result2, expected2)
|
| 67 |
+
|
| 68 |
+
def test_return_type_doesnt_depend_on_monotonicity_higher_reso(self):
|
| 69 |
+
# GH#24892 we get Series back regardless of whether our DTI is monotonic
|
| 70 |
+
dti = date_range(start="2015-5-13 23:59:00", freq="min", periods=3)
|
| 71 |
+
ser = Series(range(3), index=dti)
|
| 72 |
+
|
| 73 |
+
# non-monotonic index
|
| 74 |
+
ser2 = Series(range(3), index=[dti[1], dti[0], dti[2]])
|
| 75 |
+
|
| 76 |
+
# key with resolution strictly *higher) than "min"
|
| 77 |
+
key = "2015-5-14 00:00:00"
|
| 78 |
+
|
| 79 |
+
# monotonic increasing index
|
| 80 |
+
result = ser.loc[key]
|
| 81 |
+
assert result == 1
|
| 82 |
+
|
| 83 |
+
# monotonic decreasing index
|
| 84 |
+
result = ser.iloc[::-1].loc[key]
|
| 85 |
+
assert result == 1
|
| 86 |
+
|
| 87 |
+
# non-monotonic index
|
| 88 |
+
result2 = ser2.loc[key]
|
| 89 |
+
assert result2 == 0
|
| 90 |
+
|
| 91 |
+
def test_monotone_DTI_indexing_bug(self):
|
| 92 |
+
# GH 19362
|
| 93 |
+
# Testing accessing the first element in a monotonic descending
|
| 94 |
+
# partial string indexing.
|
| 95 |
+
|
| 96 |
+
df = DataFrame(list(range(5)))
|
| 97 |
+
date_list = [
|
| 98 |
+
"2018-01-02",
|
| 99 |
+
"2017-02-10",
|
| 100 |
+
"2016-03-10",
|
| 101 |
+
"2015-03-15",
|
| 102 |
+
"2014-03-16",
|
| 103 |
+
]
|
| 104 |
+
date_index = DatetimeIndex(date_list)
|
| 105 |
+
df["date"] = date_index
|
| 106 |
+
expected = DataFrame({0: list(range(5)), "date": date_index})
|
| 107 |
+
tm.assert_frame_equal(df, expected)
|
| 108 |
+
|
| 109 |
+
# We get a slice because df.index's resolution is hourly and we
|
| 110 |
+
# are slicing with a daily-resolution string. If both were daily,
|
| 111 |
+
# we would get a single item back
|
| 112 |
+
dti = date_range("20170101 01:00:00", periods=3)
|
| 113 |
+
df = DataFrame({"A": [1, 2, 3]}, index=dti[::-1])
|
| 114 |
+
|
| 115 |
+
expected = DataFrame({"A": 1}, index=dti[-1:][::-1])
|
| 116 |
+
result = df.loc["2017-01-03"]
|
| 117 |
+
tm.assert_frame_equal(result, expected)
|
| 118 |
+
|
| 119 |
+
result2 = df.iloc[::-1].loc["2017-01-03"]
|
| 120 |
+
expected2 = expected.iloc[::-1]
|
| 121 |
+
tm.assert_frame_equal(result2, expected2)
|
| 122 |
+
|
| 123 |
+
def test_slice_year(self):
|
| 124 |
+
dti = date_range(freq="B", start=datetime(2005, 1, 1), periods=500)
|
| 125 |
+
|
| 126 |
+
s = Series(np.arange(len(dti)), index=dti)
|
| 127 |
+
result = s["2005"]
|
| 128 |
+
expected = s[s.index.year == 2005]
|
| 129 |
+
tm.assert_series_equal(result, expected)
|
| 130 |
+
|
| 131 |
+
df = DataFrame(np.random.default_rng(2).random((len(dti), 5)), index=dti)
|
| 132 |
+
result = df.loc["2005"]
|
| 133 |
+
expected = df[df.index.year == 2005]
|
| 134 |
+
tm.assert_frame_equal(result, expected)
|
| 135 |
+
|
| 136 |
+
@pytest.mark.parametrize(
|
| 137 |
+
"partial_dtime",
|
| 138 |
+
[
|
| 139 |
+
"2019",
|
| 140 |
+
"2019Q4",
|
| 141 |
+
"Dec 2019",
|
| 142 |
+
"2019-12-31",
|
| 143 |
+
"2019-12-31 23",
|
| 144 |
+
"2019-12-31 23:59",
|
| 145 |
+
],
|
| 146 |
+
)
|
| 147 |
+
def test_slice_end_of_period_resolution(self, partial_dtime):
|
| 148 |
+
# GH#31064
|
| 149 |
+
dti = date_range("2019-12-31 23:59:55.999999999", periods=10, freq="s")
|
| 150 |
+
|
| 151 |
+
ser = Series(range(10), index=dti)
|
| 152 |
+
result = ser[partial_dtime]
|
| 153 |
+
expected = ser.iloc[:5]
|
| 154 |
+
tm.assert_series_equal(result, expected)
|
| 155 |
+
|
| 156 |
+
def test_slice_quarter(self):
|
| 157 |
+
dti = date_range(freq="D", start=datetime(2000, 6, 1), periods=500)
|
| 158 |
+
|
| 159 |
+
s = Series(np.arange(len(dti)), index=dti)
|
| 160 |
+
assert len(s["2001Q1"]) == 90
|
| 161 |
+
|
| 162 |
+
df = DataFrame(np.random.default_rng(2).random((len(dti), 5)), index=dti)
|
| 163 |
+
assert len(df.loc["1Q01"]) == 90
|
| 164 |
+
|
| 165 |
+
def test_slice_month(self):
|
| 166 |
+
dti = date_range(freq="D", start=datetime(2005, 1, 1), periods=500)
|
| 167 |
+
s = Series(np.arange(len(dti)), index=dti)
|
| 168 |
+
assert len(s["2005-11"]) == 30
|
| 169 |
+
|
| 170 |
+
df = DataFrame(np.random.default_rng(2).random((len(dti), 5)), index=dti)
|
| 171 |
+
assert len(df.loc["2005-11"]) == 30
|
| 172 |
+
|
| 173 |
+
tm.assert_series_equal(s["2005-11"], s["11-2005"])
|
| 174 |
+
|
| 175 |
+
def test_partial_slice(self):
|
| 176 |
+
rng = date_range(freq="D", start=datetime(2005, 1, 1), periods=500)
|
| 177 |
+
s = Series(np.arange(len(rng)), index=rng)
|
| 178 |
+
|
| 179 |
+
result = s["2005-05":"2006-02"]
|
| 180 |
+
expected = s["20050501":"20060228"]
|
| 181 |
+
tm.assert_series_equal(result, expected)
|
| 182 |
+
|
| 183 |
+
result = s["2005-05":]
|
| 184 |
+
expected = s["20050501":]
|
| 185 |
+
tm.assert_series_equal(result, expected)
|
| 186 |
+
|
| 187 |
+
result = s[:"2006-02"]
|
| 188 |
+
expected = s[:"20060228"]
|
| 189 |
+
tm.assert_series_equal(result, expected)
|
| 190 |
+
|
| 191 |
+
result = s["2005-1-1"]
|
| 192 |
+
assert result == s.iloc[0]
|
| 193 |
+
|
| 194 |
+
with pytest.raises(KeyError, match=r"^'2004-12-31'$"):
|
| 195 |
+
s["2004-12-31"]
|
| 196 |
+
|
| 197 |
+
def test_partial_slice_daily(self):
|
| 198 |
+
rng = date_range(freq="h", start=datetime(2005, 1, 31), periods=500)
|
| 199 |
+
s = Series(np.arange(len(rng)), index=rng)
|
| 200 |
+
|
| 201 |
+
result = s["2005-1-31"]
|
| 202 |
+
tm.assert_series_equal(result, s.iloc[:24])
|
| 203 |
+
|
| 204 |
+
with pytest.raises(KeyError, match=r"^'2004-12-31 00'$"):
|
| 205 |
+
s["2004-12-31 00"]
|
| 206 |
+
|
| 207 |
+
def test_partial_slice_hourly(self):
|
| 208 |
+
rng = date_range(freq="min", start=datetime(2005, 1, 1, 20, 0, 0), periods=500)
|
| 209 |
+
s = Series(np.arange(len(rng)), index=rng)
|
| 210 |
+
|
| 211 |
+
result = s["2005-1-1"]
|
| 212 |
+
tm.assert_series_equal(result, s.iloc[: 60 * 4])
|
| 213 |
+
|
| 214 |
+
result = s["2005-1-1 20"]
|
| 215 |
+
tm.assert_series_equal(result, s.iloc[:60])
|
| 216 |
+
|
| 217 |
+
assert s["2005-1-1 20:00"] == s.iloc[0]
|
| 218 |
+
with pytest.raises(KeyError, match=r"^'2004-12-31 00:15'$"):
|
| 219 |
+
s["2004-12-31 00:15"]
|
| 220 |
+
|
| 221 |
+
def test_partial_slice_minutely(self):
|
| 222 |
+
rng = date_range(freq="s", start=datetime(2005, 1, 1, 23, 59, 0), periods=500)
|
| 223 |
+
s = Series(np.arange(len(rng)), index=rng)
|
| 224 |
+
|
| 225 |
+
result = s["2005-1-1 23:59"]
|
| 226 |
+
tm.assert_series_equal(result, s.iloc[:60])
|
| 227 |
+
|
| 228 |
+
result = s["2005-1-1"]
|
| 229 |
+
tm.assert_series_equal(result, s.iloc[:60])
|
| 230 |
+
|
| 231 |
+
assert s[Timestamp("2005-1-1 23:59:00")] == s.iloc[0]
|
| 232 |
+
with pytest.raises(KeyError, match=r"^'2004-12-31 00:00:00'$"):
|
| 233 |
+
s["2004-12-31 00:00:00"]
|
| 234 |
+
|
| 235 |
+
def test_partial_slice_second_precision(self):
|
| 236 |
+
rng = date_range(
|
| 237 |
+
start=datetime(2005, 1, 1, 0, 0, 59, microsecond=999990),
|
| 238 |
+
periods=20,
|
| 239 |
+
freq="us",
|
| 240 |
+
)
|
| 241 |
+
s = Series(np.arange(20), rng)
|
| 242 |
+
|
| 243 |
+
tm.assert_series_equal(s["2005-1-1 00:00"], s.iloc[:10])
|
| 244 |
+
tm.assert_series_equal(s["2005-1-1 00:00:59"], s.iloc[:10])
|
| 245 |
+
|
| 246 |
+
tm.assert_series_equal(s["2005-1-1 00:01"], s.iloc[10:])
|
| 247 |
+
tm.assert_series_equal(s["2005-1-1 00:01:00"], s.iloc[10:])
|
| 248 |
+
|
| 249 |
+
assert s[Timestamp("2005-1-1 00:00:59.999990")] == s.iloc[0]
|
| 250 |
+
with pytest.raises(KeyError, match="2005-1-1 00:00:00"):
|
| 251 |
+
s["2005-1-1 00:00:00"]
|
| 252 |
+
|
| 253 |
+
def test_partial_slicing_dataframe(self):
|
| 254 |
+
# GH14856
|
| 255 |
+
# Test various combinations of string slicing resolution vs.
|
| 256 |
+
# index resolution
|
| 257 |
+
# - If string resolution is less precise than index resolution,
|
| 258 |
+
# string is considered a slice
|
| 259 |
+
# - If string resolution is equal to or more precise than index
|
| 260 |
+
# resolution, string is considered an exact match
|
| 261 |
+
formats = [
|
| 262 |
+
"%Y",
|
| 263 |
+
"%Y-%m",
|
| 264 |
+
"%Y-%m-%d",
|
| 265 |
+
"%Y-%m-%d %H",
|
| 266 |
+
"%Y-%m-%d %H:%M",
|
| 267 |
+
"%Y-%m-%d %H:%M:%S",
|
| 268 |
+
]
|
| 269 |
+
resolutions = ["year", "month", "day", "hour", "minute", "second"]
|
| 270 |
+
for rnum, resolution in enumerate(resolutions[2:], 2):
|
| 271 |
+
# we check only 'day', 'hour', 'minute' and 'second'
|
| 272 |
+
unit = Timedelta("1 " + resolution)
|
| 273 |
+
middate = datetime(2012, 1, 1, 0, 0, 0)
|
| 274 |
+
index = DatetimeIndex([middate - unit, middate, middate + unit])
|
| 275 |
+
values = [1, 2, 3]
|
| 276 |
+
df = DataFrame({"a": values}, index, dtype=np.int64)
|
| 277 |
+
assert df.index.resolution == resolution
|
| 278 |
+
|
| 279 |
+
# Timestamp with the same resolution as index
|
| 280 |
+
# Should be exact match for Series (return scalar)
|
| 281 |
+
# and raise KeyError for Frame
|
| 282 |
+
for timestamp, expected in zip(index, values):
|
| 283 |
+
ts_string = timestamp.strftime(formats[rnum])
|
| 284 |
+
# make ts_string as precise as index
|
| 285 |
+
result = df["a"][ts_string]
|
| 286 |
+
assert isinstance(result, np.int64)
|
| 287 |
+
assert result == expected
|
| 288 |
+
msg = rf"^'{ts_string}'$"
|
| 289 |
+
with pytest.raises(KeyError, match=msg):
|
| 290 |
+
df[ts_string]
|
| 291 |
+
|
| 292 |
+
# Timestamp with resolution less precise than index
|
| 293 |
+
for fmt in formats[:rnum]:
|
| 294 |
+
for element, theslice in [[0, slice(None, 1)], [1, slice(1, None)]]:
|
| 295 |
+
ts_string = index[element].strftime(fmt)
|
| 296 |
+
|
| 297 |
+
# Series should return slice
|
| 298 |
+
result = df["a"][ts_string]
|
| 299 |
+
expected = df["a"][theslice]
|
| 300 |
+
tm.assert_series_equal(result, expected)
|
| 301 |
+
|
| 302 |
+
# pre-2.0 df[ts_string] was overloaded to interpret this
|
| 303 |
+
# as slicing along index
|
| 304 |
+
with pytest.raises(KeyError, match=ts_string):
|
| 305 |
+
df[ts_string]
|
| 306 |
+
|
| 307 |
+
# Timestamp with resolution more precise than index
|
| 308 |
+
# Compatible with existing key
|
| 309 |
+
# Should return scalar for Series
|
| 310 |
+
# and raise KeyError for Frame
|
| 311 |
+
for fmt in formats[rnum + 1 :]:
|
| 312 |
+
ts_string = index[1].strftime(fmt)
|
| 313 |
+
result = df["a"][ts_string]
|
| 314 |
+
assert isinstance(result, np.int64)
|
| 315 |
+
assert result == 2
|
| 316 |
+
msg = rf"^'{ts_string}'$"
|
| 317 |
+
with pytest.raises(KeyError, match=msg):
|
| 318 |
+
df[ts_string]
|
| 319 |
+
|
| 320 |
+
# Not compatible with existing key
|
| 321 |
+
# Should raise KeyError
|
| 322 |
+
for fmt, res in list(zip(formats, resolutions))[rnum + 1 :]:
|
| 323 |
+
ts = index[1] + Timedelta("1 " + res)
|
| 324 |
+
ts_string = ts.strftime(fmt)
|
| 325 |
+
msg = rf"^'{ts_string}'$"
|
| 326 |
+
with pytest.raises(KeyError, match=msg):
|
| 327 |
+
df["a"][ts_string]
|
| 328 |
+
with pytest.raises(KeyError, match=msg):
|
| 329 |
+
df[ts_string]
|
| 330 |
+
|
| 331 |
+
def test_partial_slicing_with_multiindex(self):
|
| 332 |
+
# GH 4758
|
| 333 |
+
# partial string indexing with a multi-index buggy
|
| 334 |
+
df = DataFrame(
|
| 335 |
+
{
|
| 336 |
+
"ACCOUNT": ["ACCT1", "ACCT1", "ACCT1", "ACCT2"],
|
| 337 |
+
"TICKER": ["ABC", "MNP", "XYZ", "XYZ"],
|
| 338 |
+
"val": [1, 2, 3, 4],
|
| 339 |
+
},
|
| 340 |
+
index=date_range("2013-06-19 09:30:00", periods=4, freq="5min"),
|
| 341 |
+
)
|
| 342 |
+
df_multi = df.set_index(["ACCOUNT", "TICKER"], append=True)
|
| 343 |
+
|
| 344 |
+
expected = DataFrame(
|
| 345 |
+
[[1]], index=Index(["ABC"], name="TICKER"), columns=["val"]
|
| 346 |
+
)
|
| 347 |
+
result = df_multi.loc[("2013-06-19 09:30:00", "ACCT1")]
|
| 348 |
+
tm.assert_frame_equal(result, expected)
|
| 349 |
+
|
| 350 |
+
expected = df_multi.loc[
|
| 351 |
+
(Timestamp("2013-06-19 09:30:00", tz=None), "ACCT1", "ABC")
|
| 352 |
+
]
|
| 353 |
+
result = df_multi.loc[("2013-06-19 09:30:00", "ACCT1", "ABC")]
|
| 354 |
+
tm.assert_series_equal(result, expected)
|
| 355 |
+
|
| 356 |
+
# partial string indexing on first level, scalar indexing on the other two
|
| 357 |
+
result = df_multi.loc[("2013-06-19", "ACCT1", "ABC")]
|
| 358 |
+
expected = df_multi.iloc[:1].droplevel([1, 2])
|
| 359 |
+
tm.assert_frame_equal(result, expected)
|
| 360 |
+
|
| 361 |
+
def test_partial_slicing_with_multiindex_series(self):
|
| 362 |
+
# GH 4294
|
| 363 |
+
# partial slice on a series mi
|
| 364 |
+
ser = Series(
|
| 365 |
+
range(250),
|
| 366 |
+
index=MultiIndex.from_product(
|
| 367 |
+
[date_range("2000-1-1", periods=50), range(5)]
|
| 368 |
+
),
|
| 369 |
+
)
|
| 370 |
+
|
| 371 |
+
s2 = ser[:-1].copy()
|
| 372 |
+
expected = s2["2000-1-4"]
|
| 373 |
+
result = s2[Timestamp("2000-1-4")]
|
| 374 |
+
tm.assert_series_equal(result, expected)
|
| 375 |
+
|
| 376 |
+
result = ser[Timestamp("2000-1-4")]
|
| 377 |
+
expected = ser["2000-1-4"]
|
| 378 |
+
tm.assert_series_equal(result, expected)
|
| 379 |
+
|
| 380 |
+
df2 = DataFrame(ser)
|
| 381 |
+
expected = df2.xs("2000-1-4")
|
| 382 |
+
result = df2.loc[Timestamp("2000-1-4")]
|
| 383 |
+
tm.assert_frame_equal(result, expected)
|
| 384 |
+
|
| 385 |
+
def test_partial_slice_requires_monotonicity(self):
|
| 386 |
+
# Disallowed since 2.0 (GH 37819)
|
| 387 |
+
ser = Series(np.arange(10), date_range("2014-01-01", periods=10))
|
| 388 |
+
|
| 389 |
+
nonmonotonic = ser.iloc[[3, 5, 4]]
|
| 390 |
+
timestamp = Timestamp("2014-01-10")
|
| 391 |
+
with pytest.raises(
|
| 392 |
+
KeyError, match="Value based partial slicing on non-monotonic"
|
| 393 |
+
):
|
| 394 |
+
nonmonotonic["2014-01-10":]
|
| 395 |
+
|
| 396 |
+
with pytest.raises(KeyError, match=r"Timestamp\('2014-01-10 00:00:00'\)"):
|
| 397 |
+
nonmonotonic[timestamp:]
|
| 398 |
+
|
| 399 |
+
with pytest.raises(
|
| 400 |
+
KeyError, match="Value based partial slicing on non-monotonic"
|
| 401 |
+
):
|
| 402 |
+
nonmonotonic.loc["2014-01-10":]
|
| 403 |
+
|
| 404 |
+
with pytest.raises(KeyError, match=r"Timestamp\('2014-01-10 00:00:00'\)"):
|
| 405 |
+
nonmonotonic.loc[timestamp:]
|
| 406 |
+
|
| 407 |
+
def test_loc_datetime_length_one(self):
|
| 408 |
+
# GH16071
|
| 409 |
+
df = DataFrame(
|
| 410 |
+
columns=["1"],
|
| 411 |
+
index=date_range("2016-10-01T00:00:00", "2016-10-01T23:59:59"),
|
| 412 |
+
)
|
| 413 |
+
result = df.loc[datetime(2016, 10, 1) :]
|
| 414 |
+
tm.assert_frame_equal(result, df)
|
| 415 |
+
|
| 416 |
+
result = df.loc["2016-10-01T00:00:00":]
|
| 417 |
+
tm.assert_frame_equal(result, df)
|
| 418 |
+
|
| 419 |
+
@pytest.mark.parametrize(
|
| 420 |
+
"start",
|
| 421 |
+
[
|
| 422 |
+
"2018-12-02 21:50:00+00:00",
|
| 423 |
+
Timestamp("2018-12-02 21:50:00+00:00"),
|
| 424 |
+
Timestamp("2018-12-02 21:50:00+00:00").to_pydatetime(),
|
| 425 |
+
],
|
| 426 |
+
)
|
| 427 |
+
@pytest.mark.parametrize(
|
| 428 |
+
"end",
|
| 429 |
+
[
|
| 430 |
+
"2018-12-02 21:52:00+00:00",
|
| 431 |
+
Timestamp("2018-12-02 21:52:00+00:00"),
|
| 432 |
+
Timestamp("2018-12-02 21:52:00+00:00").to_pydatetime(),
|
| 433 |
+
],
|
| 434 |
+
)
|
| 435 |
+
def test_getitem_with_datestring_with_UTC_offset(self, start, end):
|
| 436 |
+
# GH 24076
|
| 437 |
+
idx = date_range(
|
| 438 |
+
start="2018-12-02 14:50:00-07:00",
|
| 439 |
+
end="2018-12-02 14:50:00-07:00",
|
| 440 |
+
freq="1min",
|
| 441 |
+
)
|
| 442 |
+
df = DataFrame(1, index=idx, columns=["A"])
|
| 443 |
+
result = df[start:end]
|
| 444 |
+
expected = df.iloc[0:3, :]
|
| 445 |
+
tm.assert_frame_equal(result, expected)
|
| 446 |
+
|
| 447 |
+
# GH 16785
|
| 448 |
+
start = str(start)
|
| 449 |
+
end = str(end)
|
| 450 |
+
with pytest.raises(ValueError, match="Both dates must"):
|
| 451 |
+
df[start : end[:-4] + "1:00"]
|
| 452 |
+
|
| 453 |
+
with pytest.raises(ValueError, match="The index must be timezone"):
|
| 454 |
+
df = df.tz_localize(None)
|
| 455 |
+
df[start:end]
|
| 456 |
+
|
| 457 |
+
def test_slice_reduce_to_series(self):
|
| 458 |
+
# GH 27516
|
| 459 |
+
df = DataFrame(
|
| 460 |
+
{"A": range(24)}, index=date_range("2000", periods=24, freq="ME")
|
| 461 |
+
)
|
| 462 |
+
expected = Series(
|
| 463 |
+
range(12), index=date_range("2000", periods=12, freq="ME"), name="A"
|
| 464 |
+
)
|
| 465 |
+
result = df.loc["2000", "A"]
|
| 466 |
+
tm.assert_series_equal(result, expected)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_pickle.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
|
| 3 |
+
from pandas import (
|
| 4 |
+
NaT,
|
| 5 |
+
date_range,
|
| 6 |
+
to_datetime,
|
| 7 |
+
)
|
| 8 |
+
import pandas._testing as tm
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class TestPickle:
|
| 12 |
+
def test_pickle(self):
|
| 13 |
+
# GH#4606
|
| 14 |
+
idx = to_datetime(["2013-01-01", NaT, "2014-01-06"])
|
| 15 |
+
idx_p = tm.round_trip_pickle(idx)
|
| 16 |
+
assert idx_p[0] == idx[0]
|
| 17 |
+
assert idx_p[1] is NaT
|
| 18 |
+
assert idx_p[2] == idx[2]
|
| 19 |
+
|
| 20 |
+
def test_pickle_dont_infer_freq(self):
|
| 21 |
+
# GH#11002
|
| 22 |
+
# don't infer freq
|
| 23 |
+
idx = date_range("1750-1-1", "2050-1-1", freq="7D")
|
| 24 |
+
idx_p = tm.round_trip_pickle(idx)
|
| 25 |
+
tm.assert_index_equal(idx, idx_p)
|
| 26 |
+
|
| 27 |
+
def test_pickle_after_set_freq(self):
|
| 28 |
+
dti = date_range("20130101", periods=3, tz="US/Eastern", name="foo")
|
| 29 |
+
dti = dti._with_freq(None)
|
| 30 |
+
|
| 31 |
+
res = tm.round_trip_pickle(dti)
|
| 32 |
+
tm.assert_index_equal(res, dti)
|
| 33 |
+
|
| 34 |
+
def test_roundtrip_pickle_with_tz(self):
|
| 35 |
+
# GH#8367
|
| 36 |
+
# round-trip of timezone
|
| 37 |
+
index = date_range("20130101", periods=3, tz="US/Eastern", name="foo")
|
| 38 |
+
unpickled = tm.round_trip_pickle(index)
|
| 39 |
+
tm.assert_index_equal(index, unpickled)
|
| 40 |
+
|
| 41 |
+
@pytest.mark.parametrize("freq", ["B", "C"])
|
| 42 |
+
def test_pickle_unpickle(self, freq):
|
| 43 |
+
rng = date_range("2009-01-01", "2010-01-01", freq=freq)
|
| 44 |
+
unpickled = tm.round_trip_pickle(rng)
|
| 45 |
+
assert unpickled.freq == freq
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_reindex.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datetime import timedelta
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
|
| 5 |
+
from pandas import (
|
| 6 |
+
DatetimeIndex,
|
| 7 |
+
date_range,
|
| 8 |
+
)
|
| 9 |
+
import pandas._testing as tm
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class TestDatetimeIndexReindex:
|
| 13 |
+
def test_reindex_preserves_tz_if_target_is_empty_list_or_array(self):
|
| 14 |
+
# GH#7774
|
| 15 |
+
index = date_range("2013-01-01", periods=3, tz="US/Eastern")
|
| 16 |
+
assert str(index.reindex([])[0].tz) == "US/Eastern"
|
| 17 |
+
assert str(index.reindex(np.array([]))[0].tz) == "US/Eastern"
|
| 18 |
+
|
| 19 |
+
def test_reindex_with_same_tz_nearest(self):
|
| 20 |
+
# GH#32740
|
| 21 |
+
rng_a = date_range("2010-01-01", "2010-01-02", periods=24, tz="utc")
|
| 22 |
+
rng_b = date_range("2010-01-01", "2010-01-02", periods=23, tz="utc")
|
| 23 |
+
result1, result2 = rng_a.reindex(
|
| 24 |
+
rng_b, method="nearest", tolerance=timedelta(seconds=20)
|
| 25 |
+
)
|
| 26 |
+
expected_list1 = [
|
| 27 |
+
"2010-01-01 00:00:00",
|
| 28 |
+
"2010-01-01 01:05:27.272727272",
|
| 29 |
+
"2010-01-01 02:10:54.545454545",
|
| 30 |
+
"2010-01-01 03:16:21.818181818",
|
| 31 |
+
"2010-01-01 04:21:49.090909090",
|
| 32 |
+
"2010-01-01 05:27:16.363636363",
|
| 33 |
+
"2010-01-01 06:32:43.636363636",
|
| 34 |
+
"2010-01-01 07:38:10.909090909",
|
| 35 |
+
"2010-01-01 08:43:38.181818181",
|
| 36 |
+
"2010-01-01 09:49:05.454545454",
|
| 37 |
+
"2010-01-01 10:54:32.727272727",
|
| 38 |
+
"2010-01-01 12:00:00",
|
| 39 |
+
"2010-01-01 13:05:27.272727272",
|
| 40 |
+
"2010-01-01 14:10:54.545454545",
|
| 41 |
+
"2010-01-01 15:16:21.818181818",
|
| 42 |
+
"2010-01-01 16:21:49.090909090",
|
| 43 |
+
"2010-01-01 17:27:16.363636363",
|
| 44 |
+
"2010-01-01 18:32:43.636363636",
|
| 45 |
+
"2010-01-01 19:38:10.909090909",
|
| 46 |
+
"2010-01-01 20:43:38.181818181",
|
| 47 |
+
"2010-01-01 21:49:05.454545454",
|
| 48 |
+
"2010-01-01 22:54:32.727272727",
|
| 49 |
+
"2010-01-02 00:00:00",
|
| 50 |
+
]
|
| 51 |
+
expected1 = DatetimeIndex(
|
| 52 |
+
expected_list1, dtype="datetime64[ns, UTC]", freq=None
|
| 53 |
+
)
|
| 54 |
+
expected2 = np.array([0] + [-1] * 21 + [23], dtype=np.dtype("intp"))
|
| 55 |
+
tm.assert_index_equal(result1, expected1)
|
| 56 |
+
tm.assert_numpy_array_equal(result2, expected2)
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_scalar_compat.py
ADDED
|
@@ -0,0 +1,329 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Tests for DatetimeIndex methods behaving like their Timestamp counterparts
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import calendar
|
| 6 |
+
from datetime import (
|
| 7 |
+
date,
|
| 8 |
+
datetime,
|
| 9 |
+
time,
|
| 10 |
+
)
|
| 11 |
+
import locale
|
| 12 |
+
import unicodedata
|
| 13 |
+
|
| 14 |
+
import numpy as np
|
| 15 |
+
import pytest
|
| 16 |
+
|
| 17 |
+
from pandas._libs.tslibs import timezones
|
| 18 |
+
|
| 19 |
+
from pandas import (
|
| 20 |
+
DatetimeIndex,
|
| 21 |
+
Index,
|
| 22 |
+
NaT,
|
| 23 |
+
Timestamp,
|
| 24 |
+
date_range,
|
| 25 |
+
offsets,
|
| 26 |
+
)
|
| 27 |
+
import pandas._testing as tm
|
| 28 |
+
from pandas.core.arrays import DatetimeArray
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
class TestDatetimeIndexOps:
|
| 32 |
+
def test_dti_no_millisecond_field(self):
|
| 33 |
+
msg = "type object 'DatetimeIndex' has no attribute 'millisecond'"
|
| 34 |
+
with pytest.raises(AttributeError, match=msg):
|
| 35 |
+
DatetimeIndex.millisecond
|
| 36 |
+
|
| 37 |
+
msg = "'DatetimeIndex' object has no attribute 'millisecond'"
|
| 38 |
+
with pytest.raises(AttributeError, match=msg):
|
| 39 |
+
DatetimeIndex([]).millisecond
|
| 40 |
+
|
| 41 |
+
def test_dti_time(self):
|
| 42 |
+
rng = date_range("1/1/2000", freq="12min", periods=10)
|
| 43 |
+
result = Index(rng).time
|
| 44 |
+
expected = [t.time() for t in rng]
|
| 45 |
+
assert (result == expected).all()
|
| 46 |
+
|
| 47 |
+
def test_dti_date(self):
|
| 48 |
+
rng = date_range("1/1/2000", freq="12h", periods=10)
|
| 49 |
+
result = Index(rng).date
|
| 50 |
+
expected = [t.date() for t in rng]
|
| 51 |
+
assert (result == expected).all()
|
| 52 |
+
|
| 53 |
+
@pytest.mark.parametrize(
|
| 54 |
+
"dtype",
|
| 55 |
+
[None, "datetime64[ns, CET]", "datetime64[ns, EST]", "datetime64[ns, UTC]"],
|
| 56 |
+
)
|
| 57 |
+
def test_dti_date2(self, dtype):
|
| 58 |
+
# Regression test for GH#21230
|
| 59 |
+
expected = np.array([date(2018, 6, 4), NaT])
|
| 60 |
+
|
| 61 |
+
index = DatetimeIndex(["2018-06-04 10:00:00", NaT], dtype=dtype)
|
| 62 |
+
result = index.date
|
| 63 |
+
|
| 64 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 65 |
+
|
| 66 |
+
@pytest.mark.parametrize(
|
| 67 |
+
"dtype",
|
| 68 |
+
[None, "datetime64[ns, CET]", "datetime64[ns, EST]", "datetime64[ns, UTC]"],
|
| 69 |
+
)
|
| 70 |
+
def test_dti_time2(self, dtype):
|
| 71 |
+
# Regression test for GH#21267
|
| 72 |
+
expected = np.array([time(10, 20, 30), NaT])
|
| 73 |
+
|
| 74 |
+
index = DatetimeIndex(["2018-06-04 10:20:30", NaT], dtype=dtype)
|
| 75 |
+
result = index.time
|
| 76 |
+
|
| 77 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 78 |
+
|
| 79 |
+
def test_dti_timetz(self, tz_naive_fixture):
|
| 80 |
+
# GH#21358
|
| 81 |
+
tz = timezones.maybe_get_tz(tz_naive_fixture)
|
| 82 |
+
|
| 83 |
+
expected = np.array([time(10, 20, 30, tzinfo=tz), NaT])
|
| 84 |
+
|
| 85 |
+
index = DatetimeIndex(["2018-06-04 10:20:30", NaT], tz=tz)
|
| 86 |
+
result = index.timetz
|
| 87 |
+
|
| 88 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 89 |
+
|
| 90 |
+
@pytest.mark.parametrize(
|
| 91 |
+
"field",
|
| 92 |
+
[
|
| 93 |
+
"dayofweek",
|
| 94 |
+
"day_of_week",
|
| 95 |
+
"dayofyear",
|
| 96 |
+
"day_of_year",
|
| 97 |
+
"quarter",
|
| 98 |
+
"days_in_month",
|
| 99 |
+
"is_month_start",
|
| 100 |
+
"is_month_end",
|
| 101 |
+
"is_quarter_start",
|
| 102 |
+
"is_quarter_end",
|
| 103 |
+
"is_year_start",
|
| 104 |
+
"is_year_end",
|
| 105 |
+
],
|
| 106 |
+
)
|
| 107 |
+
def test_dti_timestamp_fields(self, field):
|
| 108 |
+
# extra fields from DatetimeIndex like quarter and week
|
| 109 |
+
idx = date_range("2020-01-01", periods=10)
|
| 110 |
+
expected = getattr(idx, field)[-1]
|
| 111 |
+
|
| 112 |
+
result = getattr(Timestamp(idx[-1]), field)
|
| 113 |
+
assert result == expected
|
| 114 |
+
|
| 115 |
+
def test_dti_nanosecond(self):
|
| 116 |
+
dti = DatetimeIndex(np.arange(10))
|
| 117 |
+
expected = Index(np.arange(10, dtype=np.int32))
|
| 118 |
+
|
| 119 |
+
tm.assert_index_equal(dti.nanosecond, expected)
|
| 120 |
+
|
| 121 |
+
@pytest.mark.parametrize("prefix", ["", "dateutil/"])
|
| 122 |
+
def test_dti_hour_tzaware(self, prefix):
|
| 123 |
+
strdates = ["1/1/2012", "3/1/2012", "4/1/2012"]
|
| 124 |
+
rng = DatetimeIndex(strdates, tz=prefix + "US/Eastern")
|
| 125 |
+
assert (rng.hour == 0).all()
|
| 126 |
+
|
| 127 |
+
# a more unusual time zone, GH#1946
|
| 128 |
+
dr = date_range(
|
| 129 |
+
"2011-10-02 00:00", freq="h", periods=10, tz=prefix + "America/Atikokan"
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
expected = Index(np.arange(10, dtype=np.int32))
|
| 133 |
+
tm.assert_index_equal(dr.hour, expected)
|
| 134 |
+
|
| 135 |
+
# GH#12806
|
| 136 |
+
# error: Unsupported operand types for + ("List[None]" and "List[str]")
|
| 137 |
+
@pytest.mark.parametrize(
|
| 138 |
+
"time_locale", [None] + tm.get_locales() # type: ignore[operator]
|
| 139 |
+
)
|
| 140 |
+
def test_day_name_month_name(self, time_locale):
|
| 141 |
+
# Test Monday -> Sunday and January -> December, in that sequence
|
| 142 |
+
if time_locale is None:
|
| 143 |
+
# If the time_locale is None, day-name and month_name should
|
| 144 |
+
# return the english attributes
|
| 145 |
+
expected_days = [
|
| 146 |
+
"Monday",
|
| 147 |
+
"Tuesday",
|
| 148 |
+
"Wednesday",
|
| 149 |
+
"Thursday",
|
| 150 |
+
"Friday",
|
| 151 |
+
"Saturday",
|
| 152 |
+
"Sunday",
|
| 153 |
+
]
|
| 154 |
+
expected_months = [
|
| 155 |
+
"January",
|
| 156 |
+
"February",
|
| 157 |
+
"March",
|
| 158 |
+
"April",
|
| 159 |
+
"May",
|
| 160 |
+
"June",
|
| 161 |
+
"July",
|
| 162 |
+
"August",
|
| 163 |
+
"September",
|
| 164 |
+
"October",
|
| 165 |
+
"November",
|
| 166 |
+
"December",
|
| 167 |
+
]
|
| 168 |
+
else:
|
| 169 |
+
with tm.set_locale(time_locale, locale.LC_TIME):
|
| 170 |
+
expected_days = calendar.day_name[:]
|
| 171 |
+
expected_months = calendar.month_name[1:]
|
| 172 |
+
|
| 173 |
+
# GH#11128
|
| 174 |
+
dti = date_range(freq="D", start=datetime(1998, 1, 1), periods=365)
|
| 175 |
+
english_days = [
|
| 176 |
+
"Monday",
|
| 177 |
+
"Tuesday",
|
| 178 |
+
"Wednesday",
|
| 179 |
+
"Thursday",
|
| 180 |
+
"Friday",
|
| 181 |
+
"Saturday",
|
| 182 |
+
"Sunday",
|
| 183 |
+
]
|
| 184 |
+
for day, name, eng_name in zip(range(4, 11), expected_days, english_days):
|
| 185 |
+
name = name.capitalize()
|
| 186 |
+
assert dti.day_name(locale=time_locale)[day] == name
|
| 187 |
+
assert dti.day_name(locale=None)[day] == eng_name
|
| 188 |
+
ts = Timestamp(datetime(2016, 4, day))
|
| 189 |
+
assert ts.day_name(locale=time_locale) == name
|
| 190 |
+
dti = dti.append(DatetimeIndex([NaT]))
|
| 191 |
+
assert np.isnan(dti.day_name(locale=time_locale)[-1])
|
| 192 |
+
ts = Timestamp(NaT)
|
| 193 |
+
assert np.isnan(ts.day_name(locale=time_locale))
|
| 194 |
+
|
| 195 |
+
# GH#12805
|
| 196 |
+
dti = date_range(freq="ME", start="2012", end="2013")
|
| 197 |
+
result = dti.month_name(locale=time_locale)
|
| 198 |
+
expected = Index([month.capitalize() for month in expected_months])
|
| 199 |
+
|
| 200 |
+
# work around different normalization schemes GH#22342
|
| 201 |
+
result = result.str.normalize("NFD")
|
| 202 |
+
expected = expected.str.normalize("NFD")
|
| 203 |
+
|
| 204 |
+
tm.assert_index_equal(result, expected)
|
| 205 |
+
|
| 206 |
+
for item, expected in zip(dti, expected_months):
|
| 207 |
+
result = item.month_name(locale=time_locale)
|
| 208 |
+
expected = expected.capitalize()
|
| 209 |
+
|
| 210 |
+
result = unicodedata.normalize("NFD", result)
|
| 211 |
+
expected = unicodedata.normalize("NFD", result)
|
| 212 |
+
|
| 213 |
+
assert result == expected
|
| 214 |
+
dti = dti.append(DatetimeIndex([NaT]))
|
| 215 |
+
assert np.isnan(dti.month_name(locale=time_locale)[-1])
|
| 216 |
+
|
| 217 |
+
def test_dti_week(self):
|
| 218 |
+
# GH#6538: Check that DatetimeIndex and its TimeStamp elements
|
| 219 |
+
# return the same weekofyear accessor close to new year w/ tz
|
| 220 |
+
dates = ["2013/12/29", "2013/12/30", "2013/12/31"]
|
| 221 |
+
dates = DatetimeIndex(dates, tz="Europe/Brussels")
|
| 222 |
+
expected = [52, 1, 1]
|
| 223 |
+
assert dates.isocalendar().week.tolist() == expected
|
| 224 |
+
assert [d.weekofyear for d in dates] == expected
|
| 225 |
+
|
| 226 |
+
@pytest.mark.parametrize("tz", [None, "US/Eastern"])
|
| 227 |
+
def test_dti_fields(self, tz):
|
| 228 |
+
# GH#13303
|
| 229 |
+
dti = date_range(freq="D", start=datetime(1998, 1, 1), periods=365, tz=tz)
|
| 230 |
+
assert dti.year[0] == 1998
|
| 231 |
+
assert dti.month[0] == 1
|
| 232 |
+
assert dti.day[0] == 1
|
| 233 |
+
assert dti.hour[0] == 0
|
| 234 |
+
assert dti.minute[0] == 0
|
| 235 |
+
assert dti.second[0] == 0
|
| 236 |
+
assert dti.microsecond[0] == 0
|
| 237 |
+
assert dti.dayofweek[0] == 3
|
| 238 |
+
|
| 239 |
+
assert dti.dayofyear[0] == 1
|
| 240 |
+
assert dti.dayofyear[120] == 121
|
| 241 |
+
|
| 242 |
+
assert dti.isocalendar().week.iloc[0] == 1
|
| 243 |
+
assert dti.isocalendar().week.iloc[120] == 18
|
| 244 |
+
|
| 245 |
+
assert dti.quarter[0] == 1
|
| 246 |
+
assert dti.quarter[120] == 2
|
| 247 |
+
|
| 248 |
+
assert dti.days_in_month[0] == 31
|
| 249 |
+
assert dti.days_in_month[90] == 30
|
| 250 |
+
|
| 251 |
+
assert dti.is_month_start[0]
|
| 252 |
+
assert not dti.is_month_start[1]
|
| 253 |
+
assert dti.is_month_start[31]
|
| 254 |
+
assert dti.is_quarter_start[0]
|
| 255 |
+
assert dti.is_quarter_start[90]
|
| 256 |
+
assert dti.is_year_start[0]
|
| 257 |
+
assert not dti.is_year_start[364]
|
| 258 |
+
assert not dti.is_month_end[0]
|
| 259 |
+
assert dti.is_month_end[30]
|
| 260 |
+
assert not dti.is_month_end[31]
|
| 261 |
+
assert dti.is_month_end[364]
|
| 262 |
+
assert not dti.is_quarter_end[0]
|
| 263 |
+
assert not dti.is_quarter_end[30]
|
| 264 |
+
assert dti.is_quarter_end[89]
|
| 265 |
+
assert dti.is_quarter_end[364]
|
| 266 |
+
assert not dti.is_year_end[0]
|
| 267 |
+
assert dti.is_year_end[364]
|
| 268 |
+
|
| 269 |
+
assert len(dti.year) == 365
|
| 270 |
+
assert len(dti.month) == 365
|
| 271 |
+
assert len(dti.day) == 365
|
| 272 |
+
assert len(dti.hour) == 365
|
| 273 |
+
assert len(dti.minute) == 365
|
| 274 |
+
assert len(dti.second) == 365
|
| 275 |
+
assert len(dti.microsecond) == 365
|
| 276 |
+
assert len(dti.dayofweek) == 365
|
| 277 |
+
assert len(dti.dayofyear) == 365
|
| 278 |
+
assert len(dti.isocalendar()) == 365
|
| 279 |
+
assert len(dti.quarter) == 365
|
| 280 |
+
assert len(dti.is_month_start) == 365
|
| 281 |
+
assert len(dti.is_month_end) == 365
|
| 282 |
+
assert len(dti.is_quarter_start) == 365
|
| 283 |
+
assert len(dti.is_quarter_end) == 365
|
| 284 |
+
assert len(dti.is_year_start) == 365
|
| 285 |
+
assert len(dti.is_year_end) == 365
|
| 286 |
+
|
| 287 |
+
dti.name = "name"
|
| 288 |
+
|
| 289 |
+
# non boolean accessors -> return Index
|
| 290 |
+
for accessor in DatetimeArray._field_ops:
|
| 291 |
+
res = getattr(dti, accessor)
|
| 292 |
+
assert len(res) == 365
|
| 293 |
+
assert isinstance(res, Index)
|
| 294 |
+
assert res.name == "name"
|
| 295 |
+
|
| 296 |
+
# boolean accessors -> return array
|
| 297 |
+
for accessor in DatetimeArray._bool_ops:
|
| 298 |
+
res = getattr(dti, accessor)
|
| 299 |
+
assert len(res) == 365
|
| 300 |
+
assert isinstance(res, np.ndarray)
|
| 301 |
+
|
| 302 |
+
# test boolean indexing
|
| 303 |
+
res = dti[dti.is_quarter_start]
|
| 304 |
+
exp = dti[[0, 90, 181, 273]]
|
| 305 |
+
tm.assert_index_equal(res, exp)
|
| 306 |
+
res = dti[dti.is_leap_year]
|
| 307 |
+
exp = DatetimeIndex([], freq="D", tz=dti.tz, name="name").as_unit("ns")
|
| 308 |
+
tm.assert_index_equal(res, exp)
|
| 309 |
+
|
| 310 |
+
def test_dti_is_year_quarter_start(self):
|
| 311 |
+
dti = date_range(freq="BQE-FEB", start=datetime(1998, 1, 1), periods=4)
|
| 312 |
+
|
| 313 |
+
assert sum(dti.is_quarter_start) == 0
|
| 314 |
+
assert sum(dti.is_quarter_end) == 4
|
| 315 |
+
assert sum(dti.is_year_start) == 0
|
| 316 |
+
assert sum(dti.is_year_end) == 1
|
| 317 |
+
|
| 318 |
+
def test_dti_is_month_start(self):
|
| 319 |
+
dti = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-03"])
|
| 320 |
+
|
| 321 |
+
assert dti.is_month_start[0] == 1
|
| 322 |
+
|
| 323 |
+
def test_dti_is_month_start_custom(self):
|
| 324 |
+
# Ensure is_start/end accessors throw ValueError for CustomBusinessDay,
|
| 325 |
+
bday_egypt = offsets.CustomBusinessDay(weekmask="Sun Mon Tue Wed Thu")
|
| 326 |
+
dti = date_range(datetime(2013, 4, 30), periods=5, freq=bday_egypt)
|
| 327 |
+
msg = "Custom business days is not supported by is_month_start"
|
| 328 |
+
with pytest.raises(ValueError, match=msg):
|
| 329 |
+
dti.is_month_start
|
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_setops.py
ADDED
|
@@ -0,0 +1,666 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datetime import (
|
| 2 |
+
datetime,
|
| 3 |
+
timezone,
|
| 4 |
+
)
|
| 5 |
+
|
| 6 |
+
import numpy as np
|
| 7 |
+
import pytest
|
| 8 |
+
import pytz
|
| 9 |
+
|
| 10 |
+
import pandas.util._test_decorators as td
|
| 11 |
+
|
| 12 |
+
import pandas as pd
|
| 13 |
+
from pandas import (
|
| 14 |
+
DataFrame,
|
| 15 |
+
DatetimeIndex,
|
| 16 |
+
Index,
|
| 17 |
+
Series,
|
| 18 |
+
Timestamp,
|
| 19 |
+
bdate_range,
|
| 20 |
+
date_range,
|
| 21 |
+
)
|
| 22 |
+
import pandas._testing as tm
|
| 23 |
+
|
| 24 |
+
from pandas.tseries.offsets import (
|
| 25 |
+
BMonthEnd,
|
| 26 |
+
Minute,
|
| 27 |
+
MonthEnd,
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
class TestDatetimeIndexSetOps:
|
| 34 |
+
tz = [
|
| 35 |
+
None,
|
| 36 |
+
"UTC",
|
| 37 |
+
"Asia/Tokyo",
|
| 38 |
+
"US/Eastern",
|
| 39 |
+
"dateutil/Asia/Singapore",
|
| 40 |
+
"dateutil/US/Pacific",
|
| 41 |
+
]
|
| 42 |
+
|
| 43 |
+
# TODO: moved from test_datetimelike; dedup with version below
|
| 44 |
+
def test_union2(self, sort):
|
| 45 |
+
everything = date_range("2020-01-01", periods=10)
|
| 46 |
+
first = everything[:5]
|
| 47 |
+
second = everything[5:]
|
| 48 |
+
union = first.union(second, sort=sort)
|
| 49 |
+
tm.assert_index_equal(union, everything)
|
| 50 |
+
|
| 51 |
+
@pytest.mark.parametrize("box", [np.array, Series, list])
|
| 52 |
+
def test_union3(self, sort, box):
|
| 53 |
+
everything = date_range("2020-01-01", periods=10)
|
| 54 |
+
first = everything[:5]
|
| 55 |
+
second = everything[5:]
|
| 56 |
+
|
| 57 |
+
# GH 10149 support listlike inputs other than Index objects
|
| 58 |
+
expected = first.union(second, sort=sort)
|
| 59 |
+
case = box(second.values)
|
| 60 |
+
result = first.union(case, sort=sort)
|
| 61 |
+
tm.assert_index_equal(result, expected)
|
| 62 |
+
|
| 63 |
+
@pytest.mark.parametrize("tz", tz)
|
| 64 |
+
def test_union(self, tz, sort):
|
| 65 |
+
rng1 = date_range("1/1/2000", freq="D", periods=5, tz=tz)
|
| 66 |
+
other1 = date_range("1/6/2000", freq="D", periods=5, tz=tz)
|
| 67 |
+
expected1 = date_range("1/1/2000", freq="D", periods=10, tz=tz)
|
| 68 |
+
expected1_notsorted = DatetimeIndex(list(other1) + list(rng1))
|
| 69 |
+
|
| 70 |
+
rng2 = date_range("1/1/2000", freq="D", periods=5, tz=tz)
|
| 71 |
+
other2 = date_range("1/4/2000", freq="D", periods=5, tz=tz)
|
| 72 |
+
expected2 = date_range("1/1/2000", freq="D", periods=8, tz=tz)
|
| 73 |
+
expected2_notsorted = DatetimeIndex(list(other2) + list(rng2[:3]))
|
| 74 |
+
|
| 75 |
+
rng3 = date_range("1/1/2000", freq="D", periods=5, tz=tz)
|
| 76 |
+
other3 = DatetimeIndex([], tz=tz).as_unit("ns")
|
| 77 |
+
expected3 = date_range("1/1/2000", freq="D", periods=5, tz=tz)
|
| 78 |
+
expected3_notsorted = rng3
|
| 79 |
+
|
| 80 |
+
for rng, other, exp, exp_notsorted in [
|
| 81 |
+
(rng1, other1, expected1, expected1_notsorted),
|
| 82 |
+
(rng2, other2, expected2, expected2_notsorted),
|
| 83 |
+
(rng3, other3, expected3, expected3_notsorted),
|
| 84 |
+
]:
|
| 85 |
+
result_union = rng.union(other, sort=sort)
|
| 86 |
+
tm.assert_index_equal(result_union, exp)
|
| 87 |
+
|
| 88 |
+
result_union = other.union(rng, sort=sort)
|
| 89 |
+
if sort is None:
|
| 90 |
+
tm.assert_index_equal(result_union, exp)
|
| 91 |
+
else:
|
| 92 |
+
tm.assert_index_equal(result_union, exp_notsorted)
|
| 93 |
+
|
| 94 |
+
def test_union_coverage(self, sort):
|
| 95 |
+
idx = DatetimeIndex(["2000-01-03", "2000-01-01", "2000-01-02"])
|
| 96 |
+
ordered = DatetimeIndex(idx.sort_values(), freq="infer")
|
| 97 |
+
result = ordered.union(idx, sort=sort)
|
| 98 |
+
tm.assert_index_equal(result, ordered)
|
| 99 |
+
|
| 100 |
+
result = ordered[:0].union(ordered, sort=sort)
|
| 101 |
+
tm.assert_index_equal(result, ordered)
|
| 102 |
+
assert result.freq == ordered.freq
|
| 103 |
+
|
| 104 |
+
def test_union_bug_1730(self, sort):
|
| 105 |
+
rng_a = date_range("1/1/2012", periods=4, freq="3h")
|
| 106 |
+
rng_b = date_range("1/1/2012", periods=4, freq="4h")
|
| 107 |
+
|
| 108 |
+
result = rng_a.union(rng_b, sort=sort)
|
| 109 |
+
exp = list(rng_a) + list(rng_b[1:])
|
| 110 |
+
if sort is None:
|
| 111 |
+
exp = DatetimeIndex(sorted(exp))
|
| 112 |
+
else:
|
| 113 |
+
exp = DatetimeIndex(exp)
|
| 114 |
+
tm.assert_index_equal(result, exp)
|
| 115 |
+
|
| 116 |
+
def test_union_bug_1745(self, sort):
|
| 117 |
+
left = DatetimeIndex(["2012-05-11 15:19:49.695000"])
|
| 118 |
+
right = DatetimeIndex(
|
| 119 |
+
[
|
| 120 |
+
"2012-05-29 13:04:21.322000",
|
| 121 |
+
"2012-05-11 15:27:24.873000",
|
| 122 |
+
"2012-05-11 15:31:05.350000",
|
| 123 |
+
]
|
| 124 |
+
)
|
| 125 |
+
|
| 126 |
+
result = left.union(right, sort=sort)
|
| 127 |
+
exp = DatetimeIndex(
|
| 128 |
+
[
|
| 129 |
+
"2012-05-11 15:19:49.695000",
|
| 130 |
+
"2012-05-29 13:04:21.322000",
|
| 131 |
+
"2012-05-11 15:27:24.873000",
|
| 132 |
+
"2012-05-11 15:31:05.350000",
|
| 133 |
+
]
|
| 134 |
+
)
|
| 135 |
+
if sort is None:
|
| 136 |
+
exp = exp.sort_values()
|
| 137 |
+
tm.assert_index_equal(result, exp)
|
| 138 |
+
|
| 139 |
+
def test_union_bug_4564(self, sort):
|
| 140 |
+
from pandas import DateOffset
|
| 141 |
+
|
| 142 |
+
left = date_range("2013-01-01", "2013-02-01")
|
| 143 |
+
right = left + DateOffset(minutes=15)
|
| 144 |
+
|
| 145 |
+
result = left.union(right, sort=sort)
|
| 146 |
+
exp = list(left) + list(right)
|
| 147 |
+
if sort is None:
|
| 148 |
+
exp = DatetimeIndex(sorted(exp))
|
| 149 |
+
else:
|
| 150 |
+
exp = DatetimeIndex(exp)
|
| 151 |
+
tm.assert_index_equal(result, exp)
|
| 152 |
+
|
| 153 |
+
def test_union_freq_both_none(self, sort):
|
| 154 |
+
# GH11086
|
| 155 |
+
expected = bdate_range("20150101", periods=10)
|
| 156 |
+
expected._data.freq = None
|
| 157 |
+
|
| 158 |
+
result = expected.union(expected, sort=sort)
|
| 159 |
+
tm.assert_index_equal(result, expected)
|
| 160 |
+
assert result.freq is None
|
| 161 |
+
|
| 162 |
+
def test_union_freq_infer(self):
|
| 163 |
+
# When taking the union of two DatetimeIndexes, we infer
|
| 164 |
+
# a freq even if the arguments don't have freq. This matches
|
| 165 |
+
# TimedeltaIndex behavior.
|
| 166 |
+
dti = date_range("2016-01-01", periods=5)
|
| 167 |
+
left = dti[[0, 1, 3, 4]]
|
| 168 |
+
right = dti[[2, 3, 1]]
|
| 169 |
+
|
| 170 |
+
assert left.freq is None
|
| 171 |
+
assert right.freq is None
|
| 172 |
+
|
| 173 |
+
result = left.union(right)
|
| 174 |
+
tm.assert_index_equal(result, dti)
|
| 175 |
+
assert result.freq == "D"
|
| 176 |
+
|
| 177 |
+
def test_union_dataframe_index(self):
|
| 178 |
+
rng1 = date_range("1/1/1999", "1/1/2012", freq="MS")
|
| 179 |
+
s1 = Series(np.random.default_rng(2).standard_normal(len(rng1)), rng1)
|
| 180 |
+
|
| 181 |
+
rng2 = date_range("1/1/1980", "12/1/2001", freq="MS")
|
| 182 |
+
s2 = Series(np.random.default_rng(2).standard_normal(len(rng2)), rng2)
|
| 183 |
+
df = DataFrame({"s1": s1, "s2": s2})
|
| 184 |
+
|
| 185 |
+
exp = date_range("1/1/1980", "1/1/2012", freq="MS")
|
| 186 |
+
tm.assert_index_equal(df.index, exp)
|
| 187 |
+
|
| 188 |
+
def test_union_with_DatetimeIndex(self, sort):
|
| 189 |
+
i1 = Index(np.arange(0, 20, 2, dtype=np.int64))
|
| 190 |
+
i2 = date_range(start="2012-01-03 00:00:00", periods=10, freq="D")
|
| 191 |
+
# Works
|
| 192 |
+
i1.union(i2, sort=sort)
|
| 193 |
+
# Fails with "AttributeError: can't set attribute"
|
| 194 |
+
i2.union(i1, sort=sort)
|
| 195 |
+
|
| 196 |
+
def test_union_same_timezone_different_units(self):
|
| 197 |
+
# GH 55238
|
| 198 |
+
idx1 = date_range("2000-01-01", periods=3, tz="UTC").as_unit("ms")
|
| 199 |
+
idx2 = date_range("2000-01-01", periods=3, tz="UTC").as_unit("us")
|
| 200 |
+
result = idx1.union(idx2)
|
| 201 |
+
expected = date_range("2000-01-01", periods=3, tz="UTC").as_unit("us")
|
| 202 |
+
tm.assert_index_equal(result, expected)
|
| 203 |
+
|
| 204 |
+
# TODO: moved from test_datetimelike; de-duplicate with version below
|
| 205 |
+
def test_intersection2(self):
|
| 206 |
+
first = date_range("2020-01-01", periods=10)
|
| 207 |
+
second = first[5:]
|
| 208 |
+
intersect = first.intersection(second)
|
| 209 |
+
tm.assert_index_equal(intersect, second)
|
| 210 |
+
|
| 211 |
+
# GH 10149
|
| 212 |
+
cases = [klass(second.values) for klass in [np.array, Series, list]]
|
| 213 |
+
for case in cases:
|
| 214 |
+
result = first.intersection(case)
|
| 215 |
+
tm.assert_index_equal(result, second)
|
| 216 |
+
|
| 217 |
+
third = Index(["a", "b", "c"])
|
| 218 |
+
result = first.intersection(third)
|
| 219 |
+
expected = Index([], dtype=object)
|
| 220 |
+
tm.assert_index_equal(result, expected)
|
| 221 |
+
|
| 222 |
+
@pytest.mark.parametrize(
|
| 223 |
+
"tz", [None, "Asia/Tokyo", "US/Eastern", "dateutil/US/Pacific"]
|
| 224 |
+
)
|
| 225 |
+
def test_intersection(self, tz, sort):
|
| 226 |
+
# GH 4690 (with tz)
|
| 227 |
+
base = date_range("6/1/2000", "6/30/2000", freq="D", name="idx")
|
| 228 |
+
|
| 229 |
+
# if target has the same name, it is preserved
|
| 230 |
+
rng2 = date_range("5/15/2000", "6/20/2000", freq="D", name="idx")
|
| 231 |
+
expected2 = date_range("6/1/2000", "6/20/2000", freq="D", name="idx")
|
| 232 |
+
|
| 233 |
+
# if target name is different, it will be reset
|
| 234 |
+
rng3 = date_range("5/15/2000", "6/20/2000", freq="D", name="other")
|
| 235 |
+
expected3 = date_range("6/1/2000", "6/20/2000", freq="D", name=None)
|
| 236 |
+
|
| 237 |
+
rng4 = date_range("7/1/2000", "7/31/2000", freq="D", name="idx")
|
| 238 |
+
expected4 = DatetimeIndex([], freq="D", name="idx", dtype="M8[ns]")
|
| 239 |
+
|
| 240 |
+
for rng, expected in [
|
| 241 |
+
(rng2, expected2),
|
| 242 |
+
(rng3, expected3),
|
| 243 |
+
(rng4, expected4),
|
| 244 |
+
]:
|
| 245 |
+
result = base.intersection(rng)
|
| 246 |
+
tm.assert_index_equal(result, expected)
|
| 247 |
+
assert result.freq == expected.freq
|
| 248 |
+
|
| 249 |
+
# non-monotonic
|
| 250 |
+
base = DatetimeIndex(
|
| 251 |
+
["2011-01-05", "2011-01-04", "2011-01-02", "2011-01-03"], tz=tz, name="idx"
|
| 252 |
+
).as_unit("ns")
|
| 253 |
+
|
| 254 |
+
rng2 = DatetimeIndex(
|
| 255 |
+
["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"], tz=tz, name="idx"
|
| 256 |
+
).as_unit("ns")
|
| 257 |
+
expected2 = DatetimeIndex(
|
| 258 |
+
["2011-01-04", "2011-01-02"], tz=tz, name="idx"
|
| 259 |
+
).as_unit("ns")
|
| 260 |
+
|
| 261 |
+
rng3 = DatetimeIndex(
|
| 262 |
+
["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"],
|
| 263 |
+
tz=tz,
|
| 264 |
+
name="other",
|
| 265 |
+
).as_unit("ns")
|
| 266 |
+
expected3 = DatetimeIndex(
|
| 267 |
+
["2011-01-04", "2011-01-02"], tz=tz, name=None
|
| 268 |
+
).as_unit("ns")
|
| 269 |
+
|
| 270 |
+
# GH 7880
|
| 271 |
+
rng4 = date_range("7/1/2000", "7/31/2000", freq="D", tz=tz, name="idx")
|
| 272 |
+
expected4 = DatetimeIndex([], tz=tz, name="idx").as_unit("ns")
|
| 273 |
+
assert expected4.freq is None
|
| 274 |
+
|
| 275 |
+
for rng, expected in [
|
| 276 |
+
(rng2, expected2),
|
| 277 |
+
(rng3, expected3),
|
| 278 |
+
(rng4, expected4),
|
| 279 |
+
]:
|
| 280 |
+
result = base.intersection(rng, sort=sort)
|
| 281 |
+
if sort is None:
|
| 282 |
+
expected = expected.sort_values()
|
| 283 |
+
tm.assert_index_equal(result, expected)
|
| 284 |
+
assert result.freq == expected.freq
|
| 285 |
+
|
| 286 |
+
# parametrize over both anchored and non-anchored freqs, as they
|
| 287 |
+
# have different code paths
|
| 288 |
+
@pytest.mark.parametrize("freq", ["min", "B"])
|
| 289 |
+
def test_intersection_empty(self, tz_aware_fixture, freq):
|
| 290 |
+
# empty same freq GH2129
|
| 291 |
+
tz = tz_aware_fixture
|
| 292 |
+
rng = date_range("6/1/2000", "6/15/2000", freq=freq, tz=tz)
|
| 293 |
+
result = rng[0:0].intersection(rng)
|
| 294 |
+
assert len(result) == 0
|
| 295 |
+
assert result.freq == rng.freq
|
| 296 |
+
|
| 297 |
+
result = rng.intersection(rng[0:0])
|
| 298 |
+
assert len(result) == 0
|
| 299 |
+
assert result.freq == rng.freq
|
| 300 |
+
|
| 301 |
+
# no overlap GH#33604
|
| 302 |
+
check_freq = freq != "min" # We don't preserve freq on non-anchored offsets
|
| 303 |
+
result = rng[:3].intersection(rng[-3:])
|
| 304 |
+
tm.assert_index_equal(result, rng[:0])
|
| 305 |
+
if check_freq:
|
| 306 |
+
# We don't preserve freq on non-anchored offsets
|
| 307 |
+
assert result.freq == rng.freq
|
| 308 |
+
|
| 309 |
+
# swapped left and right
|
| 310 |
+
result = rng[-3:].intersection(rng[:3])
|
| 311 |
+
tm.assert_index_equal(result, rng[:0])
|
| 312 |
+
if check_freq:
|
| 313 |
+
# We don't preserve freq on non-anchored offsets
|
| 314 |
+
assert result.freq == rng.freq
|
| 315 |
+
|
| 316 |
+
def test_intersection_bug_1708(self):
|
| 317 |
+
from pandas import DateOffset
|
| 318 |
+
|
| 319 |
+
index_1 = date_range("1/1/2012", periods=4, freq="12h")
|
| 320 |
+
index_2 = index_1 + DateOffset(hours=1)
|
| 321 |
+
|
| 322 |
+
result = index_1.intersection(index_2)
|
| 323 |
+
assert len(result) == 0
|
| 324 |
+
|
| 325 |
+
@pytest.mark.parametrize("tz", tz)
|
| 326 |
+
def test_difference(self, tz, sort):
|
| 327 |
+
rng_dates = ["1/2/2000", "1/3/2000", "1/1/2000", "1/4/2000", "1/5/2000"]
|
| 328 |
+
|
| 329 |
+
rng1 = DatetimeIndex(rng_dates, tz=tz)
|
| 330 |
+
other1 = date_range("1/6/2000", freq="D", periods=5, tz=tz)
|
| 331 |
+
expected1 = DatetimeIndex(rng_dates, tz=tz)
|
| 332 |
+
|
| 333 |
+
rng2 = DatetimeIndex(rng_dates, tz=tz)
|
| 334 |
+
other2 = date_range("1/4/2000", freq="D", periods=5, tz=tz)
|
| 335 |
+
expected2 = DatetimeIndex(rng_dates[:3], tz=tz)
|
| 336 |
+
|
| 337 |
+
rng3 = DatetimeIndex(rng_dates, tz=tz)
|
| 338 |
+
other3 = DatetimeIndex([], tz=tz)
|
| 339 |
+
expected3 = DatetimeIndex(rng_dates, tz=tz)
|
| 340 |
+
|
| 341 |
+
for rng, other, expected in [
|
| 342 |
+
(rng1, other1, expected1),
|
| 343 |
+
(rng2, other2, expected2),
|
| 344 |
+
(rng3, other3, expected3),
|
| 345 |
+
]:
|
| 346 |
+
result_diff = rng.difference(other, sort)
|
| 347 |
+
if sort is None and len(other):
|
| 348 |
+
# We dont sort (yet?) when empty GH#24959
|
| 349 |
+
expected = expected.sort_values()
|
| 350 |
+
tm.assert_index_equal(result_diff, expected)
|
| 351 |
+
|
| 352 |
+
def test_difference_freq(self, sort):
|
| 353 |
+
# GH14323: difference of DatetimeIndex should not preserve frequency
|
| 354 |
+
|
| 355 |
+
index = date_range("20160920", "20160925", freq="D")
|
| 356 |
+
other = date_range("20160921", "20160924", freq="D")
|
| 357 |
+
expected = DatetimeIndex(["20160920", "20160925"], dtype="M8[ns]", freq=None)
|
| 358 |
+
idx_diff = index.difference(other, sort)
|
| 359 |
+
tm.assert_index_equal(idx_diff, expected)
|
| 360 |
+
tm.assert_attr_equal("freq", idx_diff, expected)
|
| 361 |
+
|
| 362 |
+
# preserve frequency when the difference is a contiguous
|
| 363 |
+
# subset of the original range
|
| 364 |
+
other = date_range("20160922", "20160925", freq="D")
|
| 365 |
+
idx_diff = index.difference(other, sort)
|
| 366 |
+
expected = DatetimeIndex(["20160920", "20160921"], dtype="M8[ns]", freq="D")
|
| 367 |
+
tm.assert_index_equal(idx_diff, expected)
|
| 368 |
+
tm.assert_attr_equal("freq", idx_diff, expected)
|
| 369 |
+
|
| 370 |
+
def test_datetimeindex_diff(self, sort):
|
| 371 |
+
dti1 = date_range(freq="QE-JAN", start=datetime(1997, 12, 31), periods=100)
|
| 372 |
+
dti2 = date_range(freq="QE-JAN", start=datetime(1997, 12, 31), periods=98)
|
| 373 |
+
assert len(dti1.difference(dti2, sort)) == 2
|
| 374 |
+
|
| 375 |
+
@pytest.mark.parametrize("tz", [None, "Asia/Tokyo", "US/Eastern"])
|
| 376 |
+
def test_setops_preserve_freq(self, tz):
|
| 377 |
+
rng = date_range("1/1/2000", "1/1/2002", name="idx", tz=tz)
|
| 378 |
+
|
| 379 |
+
result = rng[:50].union(rng[50:100])
|
| 380 |
+
assert result.name == rng.name
|
| 381 |
+
assert result.freq == rng.freq
|
| 382 |
+
assert result.tz == rng.tz
|
| 383 |
+
|
| 384 |
+
result = rng[:50].union(rng[30:100])
|
| 385 |
+
assert result.name == rng.name
|
| 386 |
+
assert result.freq == rng.freq
|
| 387 |
+
assert result.tz == rng.tz
|
| 388 |
+
|
| 389 |
+
result = rng[:50].union(rng[60:100])
|
| 390 |
+
assert result.name == rng.name
|
| 391 |
+
assert result.freq is None
|
| 392 |
+
assert result.tz == rng.tz
|
| 393 |
+
|
| 394 |
+
result = rng[:50].intersection(rng[25:75])
|
| 395 |
+
assert result.name == rng.name
|
| 396 |
+
assert result.freqstr == "D"
|
| 397 |
+
assert result.tz == rng.tz
|
| 398 |
+
|
| 399 |
+
nofreq = DatetimeIndex(list(rng[25:75]), name="other")
|
| 400 |
+
result = rng[:50].union(nofreq)
|
| 401 |
+
assert result.name is None
|
| 402 |
+
assert result.freq == rng.freq
|
| 403 |
+
assert result.tz == rng.tz
|
| 404 |
+
|
| 405 |
+
result = rng[:50].intersection(nofreq)
|
| 406 |
+
assert result.name is None
|
| 407 |
+
assert result.freq == rng.freq
|
| 408 |
+
assert result.tz == rng.tz
|
| 409 |
+
|
| 410 |
+
def test_intersection_non_tick_no_fastpath(self):
|
| 411 |
+
# GH#42104
|
| 412 |
+
dti = DatetimeIndex(
|
| 413 |
+
[
|
| 414 |
+
"2018-12-31",
|
| 415 |
+
"2019-03-31",
|
| 416 |
+
"2019-06-30",
|
| 417 |
+
"2019-09-30",
|
| 418 |
+
"2019-12-31",
|
| 419 |
+
"2020-03-31",
|
| 420 |
+
],
|
| 421 |
+
freq="QE-DEC",
|
| 422 |
+
)
|
| 423 |
+
result = dti[::2].intersection(dti[1::2])
|
| 424 |
+
expected = dti[:0]
|
| 425 |
+
tm.assert_index_equal(result, expected)
|
| 426 |
+
|
| 427 |
+
def test_dti_intersection(self):
|
| 428 |
+
rng = date_range("1/1/2011", periods=100, freq="h", tz="utc")
|
| 429 |
+
|
| 430 |
+
left = rng[10:90][::-1]
|
| 431 |
+
right = rng[20:80][::-1]
|
| 432 |
+
|
| 433 |
+
assert left.tz == rng.tz
|
| 434 |
+
result = left.intersection(right)
|
| 435 |
+
assert result.tz == left.tz
|
| 436 |
+
|
| 437 |
+
# Note: not difference, as there is no symmetry requirement there
|
| 438 |
+
@pytest.mark.parametrize("setop", ["union", "intersection", "symmetric_difference"])
|
| 439 |
+
def test_dti_setop_aware(self, setop):
|
| 440 |
+
# non-overlapping
|
| 441 |
+
# GH#39328 as of 2.0 we cast these to UTC instead of object
|
| 442 |
+
rng = date_range("2012-11-15 00:00:00", periods=6, freq="h", tz="US/Central")
|
| 443 |
+
|
| 444 |
+
rng2 = date_range("2012-11-15 12:00:00", periods=6, freq="h", tz="US/Eastern")
|
| 445 |
+
|
| 446 |
+
result = getattr(rng, setop)(rng2)
|
| 447 |
+
|
| 448 |
+
left = rng.tz_convert("UTC")
|
| 449 |
+
right = rng2.tz_convert("UTC")
|
| 450 |
+
expected = getattr(left, setop)(right)
|
| 451 |
+
tm.assert_index_equal(result, expected)
|
| 452 |
+
assert result.tz == left.tz
|
| 453 |
+
if len(result):
|
| 454 |
+
assert result[0].tz is timezone.utc
|
| 455 |
+
assert result[-1].tz is timezone.utc
|
| 456 |
+
|
| 457 |
+
def test_dti_union_mixed(self):
|
| 458 |
+
# GH#21671
|
| 459 |
+
rng = DatetimeIndex([Timestamp("2011-01-01"), pd.NaT])
|
| 460 |
+
rng2 = DatetimeIndex(["2012-01-01", "2012-01-02"], tz="Asia/Tokyo")
|
| 461 |
+
result = rng.union(rng2)
|
| 462 |
+
expected = Index(
|
| 463 |
+
[
|
| 464 |
+
Timestamp("2011-01-01"),
|
| 465 |
+
pd.NaT,
|
| 466 |
+
Timestamp("2012-01-01", tz="Asia/Tokyo"),
|
| 467 |
+
Timestamp("2012-01-02", tz="Asia/Tokyo"),
|
| 468 |
+
],
|
| 469 |
+
dtype=object,
|
| 470 |
+
)
|
| 471 |
+
tm.assert_index_equal(result, expected)
|
| 472 |
+
|
| 473 |
+
|
| 474 |
+
class TestBusinessDatetimeIndex:
|
| 475 |
+
def test_union(self, sort):
|
| 476 |
+
rng = bdate_range(START, END)
|
| 477 |
+
# overlapping
|
| 478 |
+
left = rng[:10]
|
| 479 |
+
right = rng[5:10]
|
| 480 |
+
|
| 481 |
+
the_union = left.union(right, sort=sort)
|
| 482 |
+
assert isinstance(the_union, DatetimeIndex)
|
| 483 |
+
|
| 484 |
+
# non-overlapping, gap in middle
|
| 485 |
+
left = rng[:5]
|
| 486 |
+
right = rng[10:]
|
| 487 |
+
|
| 488 |
+
the_union = left.union(right, sort=sort)
|
| 489 |
+
assert isinstance(the_union, Index)
|
| 490 |
+
|
| 491 |
+
# non-overlapping, no gap
|
| 492 |
+
left = rng[:5]
|
| 493 |
+
right = rng[5:10]
|
| 494 |
+
|
| 495 |
+
the_union = left.union(right, sort=sort)
|
| 496 |
+
assert isinstance(the_union, DatetimeIndex)
|
| 497 |
+
|
| 498 |
+
# order does not matter
|
| 499 |
+
if sort is None:
|
| 500 |
+
tm.assert_index_equal(right.union(left, sort=sort), the_union)
|
| 501 |
+
else:
|
| 502 |
+
expected = DatetimeIndex(list(right) + list(left))
|
| 503 |
+
tm.assert_index_equal(right.union(left, sort=sort), expected)
|
| 504 |
+
|
| 505 |
+
# overlapping, but different offset
|
| 506 |
+
rng = date_range(START, END, freq=BMonthEnd())
|
| 507 |
+
|
| 508 |
+
the_union = rng.union(rng, sort=sort)
|
| 509 |
+
assert isinstance(the_union, DatetimeIndex)
|
| 510 |
+
|
| 511 |
+
def test_union_not_cacheable(self, sort):
|
| 512 |
+
rng = date_range("1/1/2000", periods=50, freq=Minute())
|
| 513 |
+
rng1 = rng[10:]
|
| 514 |
+
rng2 = rng[:25]
|
| 515 |
+
the_union = rng1.union(rng2, sort=sort)
|
| 516 |
+
if sort is None:
|
| 517 |
+
tm.assert_index_equal(the_union, rng)
|
| 518 |
+
else:
|
| 519 |
+
expected = DatetimeIndex(list(rng[10:]) + list(rng[:10]))
|
| 520 |
+
tm.assert_index_equal(the_union, expected)
|
| 521 |
+
|
| 522 |
+
rng1 = rng[10:]
|
| 523 |
+
rng2 = rng[15:35]
|
| 524 |
+
the_union = rng1.union(rng2, sort=sort)
|
| 525 |
+
expected = rng[10:]
|
| 526 |
+
tm.assert_index_equal(the_union, expected)
|
| 527 |
+
|
| 528 |
+
def test_intersection(self):
|
| 529 |
+
rng = date_range("1/1/2000", periods=50, freq=Minute())
|
| 530 |
+
rng1 = rng[10:]
|
| 531 |
+
rng2 = rng[:25]
|
| 532 |
+
the_int = rng1.intersection(rng2)
|
| 533 |
+
expected = rng[10:25]
|
| 534 |
+
tm.assert_index_equal(the_int, expected)
|
| 535 |
+
assert isinstance(the_int, DatetimeIndex)
|
| 536 |
+
assert the_int.freq == rng.freq
|
| 537 |
+
|
| 538 |
+
the_int = rng1.intersection(rng2)
|
| 539 |
+
tm.assert_index_equal(the_int, expected)
|
| 540 |
+
|
| 541 |
+
# non-overlapping
|
| 542 |
+
the_int = rng[:10].intersection(rng[10:])
|
| 543 |
+
expected = DatetimeIndex([]).as_unit("ns")
|
| 544 |
+
tm.assert_index_equal(the_int, expected)
|
| 545 |
+
|
| 546 |
+
def test_intersection_bug(self):
|
| 547 |
+
# GH #771
|
| 548 |
+
a = bdate_range("11/30/2011", "12/31/2011")
|
| 549 |
+
b = bdate_range("12/10/2011", "12/20/2011")
|
| 550 |
+
result = a.intersection(b)
|
| 551 |
+
tm.assert_index_equal(result, b)
|
| 552 |
+
assert result.freq == b.freq
|
| 553 |
+
|
| 554 |
+
def test_intersection_list(self):
|
| 555 |
+
# GH#35876
|
| 556 |
+
# values is not an Index -> no name -> retain "a"
|
| 557 |
+
values = [Timestamp("2020-01-01"), Timestamp("2020-02-01")]
|
| 558 |
+
idx = DatetimeIndex(values, name="a")
|
| 559 |
+
res = idx.intersection(values)
|
| 560 |
+
tm.assert_index_equal(res, idx)
|
| 561 |
+
|
| 562 |
+
def test_month_range_union_tz_pytz(self, sort):
|
| 563 |
+
tz = pytz.timezone("US/Eastern")
|
| 564 |
+
|
| 565 |
+
early_start = datetime(2011, 1, 1)
|
| 566 |
+
early_end = datetime(2011, 3, 1)
|
| 567 |
+
|
| 568 |
+
late_start = datetime(2011, 3, 1)
|
| 569 |
+
late_end = datetime(2011, 5, 1)
|
| 570 |
+
|
| 571 |
+
early_dr = date_range(start=early_start, end=early_end, tz=tz, freq=MonthEnd())
|
| 572 |
+
late_dr = date_range(start=late_start, end=late_end, tz=tz, freq=MonthEnd())
|
| 573 |
+
|
| 574 |
+
early_dr.union(late_dr, sort=sort)
|
| 575 |
+
|
| 576 |
+
@td.skip_if_windows
|
| 577 |
+
def test_month_range_union_tz_dateutil(self, sort):
|
| 578 |
+
from pandas._libs.tslibs.timezones import dateutil_gettz
|
| 579 |
+
|
| 580 |
+
tz = dateutil_gettz("US/Eastern")
|
| 581 |
+
|
| 582 |
+
early_start = datetime(2011, 1, 1)
|
| 583 |
+
early_end = datetime(2011, 3, 1)
|
| 584 |
+
|
| 585 |
+
late_start = datetime(2011, 3, 1)
|
| 586 |
+
late_end = datetime(2011, 5, 1)
|
| 587 |
+
|
| 588 |
+
early_dr = date_range(start=early_start, end=early_end, tz=tz, freq=MonthEnd())
|
| 589 |
+
late_dr = date_range(start=late_start, end=late_end, tz=tz, freq=MonthEnd())
|
| 590 |
+
|
| 591 |
+
early_dr.union(late_dr, sort=sort)
|
| 592 |
+
|
| 593 |
+
@pytest.mark.parametrize("sort", [False, None])
|
| 594 |
+
def test_intersection_duplicates(self, sort):
|
| 595 |
+
# GH#38196
|
| 596 |
+
idx1 = Index(
|
| 597 |
+
[
|
| 598 |
+
Timestamp("2019-12-13"),
|
| 599 |
+
Timestamp("2019-12-12"),
|
| 600 |
+
Timestamp("2019-12-12"),
|
| 601 |
+
]
|
| 602 |
+
)
|
| 603 |
+
result = idx1.intersection(idx1, sort=sort)
|
| 604 |
+
expected = Index([Timestamp("2019-12-13"), Timestamp("2019-12-12")])
|
| 605 |
+
tm.assert_index_equal(result, expected)
|
| 606 |
+
|
| 607 |
+
|
| 608 |
+
class TestCustomDatetimeIndex:
|
| 609 |
+
def test_union(self, sort):
|
| 610 |
+
# overlapping
|
| 611 |
+
rng = bdate_range(START, END, freq="C")
|
| 612 |
+
left = rng[:10]
|
| 613 |
+
right = rng[5:10]
|
| 614 |
+
|
| 615 |
+
the_union = left.union(right, sort=sort)
|
| 616 |
+
assert isinstance(the_union, DatetimeIndex)
|
| 617 |
+
|
| 618 |
+
# non-overlapping, gap in middle
|
| 619 |
+
left = rng[:5]
|
| 620 |
+
right = rng[10:]
|
| 621 |
+
|
| 622 |
+
the_union = left.union(right, sort)
|
| 623 |
+
assert isinstance(the_union, Index)
|
| 624 |
+
|
| 625 |
+
# non-overlapping, no gap
|
| 626 |
+
left = rng[:5]
|
| 627 |
+
right = rng[5:10]
|
| 628 |
+
|
| 629 |
+
the_union = left.union(right, sort=sort)
|
| 630 |
+
assert isinstance(the_union, DatetimeIndex)
|
| 631 |
+
|
| 632 |
+
# order does not matter
|
| 633 |
+
if sort is None:
|
| 634 |
+
tm.assert_index_equal(right.union(left, sort=sort), the_union)
|
| 635 |
+
|
| 636 |
+
# overlapping, but different offset
|
| 637 |
+
rng = date_range(START, END, freq=BMonthEnd())
|
| 638 |
+
|
| 639 |
+
the_union = rng.union(rng, sort=sort)
|
| 640 |
+
assert isinstance(the_union, DatetimeIndex)
|
| 641 |
+
|
| 642 |
+
def test_intersection_bug(self):
|
| 643 |
+
# GH #771
|
| 644 |
+
a = bdate_range("11/30/2011", "12/31/2011", freq="C")
|
| 645 |
+
b = bdate_range("12/10/2011", "12/20/2011", freq="C")
|
| 646 |
+
result = a.intersection(b)
|
| 647 |
+
tm.assert_index_equal(result, b)
|
| 648 |
+
assert result.freq == b.freq
|
| 649 |
+
|
| 650 |
+
@pytest.mark.parametrize(
|
| 651 |
+
"tz", [None, "UTC", "Europe/Berlin", pytz.FixedOffset(-60)]
|
| 652 |
+
)
|
| 653 |
+
def test_intersection_dst_transition(self, tz):
|
| 654 |
+
# GH 46702: Europe/Berlin has DST transition
|
| 655 |
+
idx1 = date_range("2020-03-27", periods=5, freq="D", tz=tz)
|
| 656 |
+
idx2 = date_range("2020-03-30", periods=5, freq="D", tz=tz)
|
| 657 |
+
result = idx1.intersection(idx2)
|
| 658 |
+
expected = date_range("2020-03-30", periods=2, freq="D", tz=tz)
|
| 659 |
+
tm.assert_index_equal(result, expected)
|
| 660 |
+
|
| 661 |
+
# GH#45863 same problem for union
|
| 662 |
+
index1 = date_range("2021-10-28", periods=3, freq="D", tz="Europe/London")
|
| 663 |
+
index2 = date_range("2021-10-30", periods=4, freq="D", tz="Europe/London")
|
| 664 |
+
result = index1.union(index2)
|
| 665 |
+
expected = date_range("2021-10-28", periods=6, freq="D", tz="Europe/London")
|
| 666 |
+
tm.assert_index_equal(result, expected)
|