JustinTX commited on Apr 19

Commit

a7e0016

verified ·

1 Parent(s): 31ac681

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/__init__.py +0 -0
py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_can_hold_element.py +79 -0
py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_construct_from_scalar.py +55 -0
py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_construct_ndarray.py +36 -0
py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_construct_object_arr.py +20 -0
py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_dict_compat.py +14 -0
py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_downcast.py +97 -0
py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_find_common_type.py +175 -0
py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_infer_datetimelike.py +28 -0
py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_infer_dtype.py +216 -0
py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_maybe_box_native.py +40 -0
py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_promote.py +530 -0
py311/lib/python3.11/site-packages/pandas/tests/extension/base/casting.py +87 -0
py311/lib/python3.11/site-packages/pandas/tests/extension/base/constructors.py +142 -0
py311/lib/python3.11/site-packages/pandas/tests/extension/base/dim2.py +345 -0
py311/lib/python3.11/site-packages/pandas/tests/extension/base/setitem.py +451 -0
py311/lib/python3.11/site-packages/pandas/tests/extension/decimal/__init__.py +8 -0
py311/lib/python3.11/site-packages/pandas/tests/extension/decimal/array.py +311 -0
py311/lib/python3.11/site-packages/pandas/tests/extension/decimal/test_decimal.py +587 -0
py311/lib/python3.11/site-packages/pandas/tests/extension/list/__init__.py +7 -0
py311/lib/python3.11/site-packages/pandas/tests/extension/list/array.py +137 -0
py311/lib/python3.11/site-packages/pandas/tests/extension/list/test_list.py +33 -0
py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/__init__.py +0 -0
py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_append.py +62 -0
py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_category.py +391 -0
py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_constructors.py +142 -0
py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_equals.py +96 -0
py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_fillna.py +54 -0
py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_formats.py +120 -0
py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_indexing.py +420 -0
py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_map.py +144 -0
py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_reindex.py +78 -0
py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_setops.py +18 -0
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/__init__.py +0 -0
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_arithmetic.py +56 -0
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_constructors.py +1204 -0
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_date_range.py +1721 -0
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_datetime.py +216 -0
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_formats.py +356 -0
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_freq_attr.py +61 -0
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_indexing.py +717 -0
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_iter.py +76 -0
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_join.py +153 -0
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_npfuncs.py +13 -0
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_ops.py +56 -0
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_partial_slicing.py +466 -0
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_pickle.py +45 -0
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_reindex.py +56 -0
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_scalar_compat.py +329 -0
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_setops.py +666 -0

py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/__init__.py ADDED Viewed

File without changes

py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_can_hold_element.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import numpy as np
+from pandas.core.dtypes.cast import can_hold_element
+def test_can_hold_element_range(any_int_numpy_dtype):
+    # GH#44261
+    dtype = np.dtype(any_int_numpy_dtype)
+    arr = np.array([], dtype=dtype)
+    rng = range(2, 127)
+    assert can_hold_element(arr, rng)
+    # negatives -> can't be held by uint dtypes
+    rng = range(-2, 127)
+    if dtype.kind == "i":
+        assert can_hold_element(arr, rng)
+    else:
+        assert not can_hold_element(arr, rng)
+    rng = range(2, 255)
+    if dtype == "int8":
+        assert not can_hold_element(arr, rng)
+    else:
+        assert can_hold_element(arr, rng)
+    rng = range(-255, 65537)
+    if dtype.kind == "u":
+        assert not can_hold_element(arr, rng)
+    elif dtype.itemsize < 4:
+        assert not can_hold_element(arr, rng)
+    else:
+        assert can_hold_element(arr, rng)
+    # empty
+    rng = range(-(10**10), -(10**10))
+    assert len(rng) == 0
+    # assert can_hold_element(arr, rng)
+    rng = range(10**10, 10**10)
+    assert len(rng) == 0
+    assert can_hold_element(arr, rng)
+def test_can_hold_element_int_values_float_ndarray():
+    arr = np.array([], dtype=np.int64)
+    element = np.array([1.0, 2.0])
+    assert can_hold_element(arr, element)
+    assert not can_hold_element(arr, element + 0.5)
+    # integer but not losslessly castable to int64
+    element = np.array([3, 2**65], dtype=np.float64)
+    assert not can_hold_element(arr, element)
+def test_can_hold_element_int8_int():
+    arr = np.array([], dtype=np.int8)
+    element = 2
+    assert can_hold_element(arr, element)
+    assert can_hold_element(arr, np.int8(element))
+    assert can_hold_element(arr, np.uint8(element))
+    assert can_hold_element(arr, np.int16(element))
+    assert can_hold_element(arr, np.uint16(element))
+    assert can_hold_element(arr, np.int32(element))
+    assert can_hold_element(arr, np.uint32(element))
+    assert can_hold_element(arr, np.int64(element))
+    assert can_hold_element(arr, np.uint64(element))
+    element = 2**9
+    assert not can_hold_element(arr, element)
+    assert not can_hold_element(arr, np.int16(element))
+    assert not can_hold_element(arr, np.uint16(element))
+    assert not can_hold_element(arr, np.int32(element))
+    assert not can_hold_element(arr, np.uint32(element))
+    assert not can_hold_element(arr, np.int64(element))
+    assert not can_hold_element(arr, np.uint64(element))

py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_construct_from_scalar.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import numpy as np
+import pytest
+from pandas.core.dtypes.cast import construct_1d_arraylike_from_scalar
+from pandas.core.dtypes.dtypes import CategoricalDtype
+from pandas import (
+    Categorical,
+    Timedelta,
+)
+import pandas._testing as tm
+def test_cast_1d_array_like_from_scalar_categorical():
+    # see gh-19565
+    #
+    # Categorical result from scalar did not maintain
+    # categories and ordering of the passed dtype.
+    cats = ["a", "b", "c"]
+    cat_type = CategoricalDtype(categories=cats, ordered=False)
+    expected = Categorical(["a", "a"], categories=cats)
+    result = construct_1d_arraylike_from_scalar("a", len(expected), cat_type)
+    tm.assert_categorical_equal(result, expected)
+def test_cast_1d_array_like_from_timestamp(fixed_now_ts):
+    # check we dont lose nanoseconds
+    ts = fixed_now_ts + Timedelta(1)
+    res = construct_1d_arraylike_from_scalar(ts, 2, np.dtype("M8[ns]"))
+    assert res[0] == ts
+def test_cast_1d_array_like_from_timedelta():
+    # check we dont lose nanoseconds
+    td = Timedelta(1)
+    res = construct_1d_arraylike_from_scalar(td, 2, np.dtype("m8[ns]"))
+    assert res[0] == td
+def test_cast_1d_array_like_mismatched_datetimelike():
+    td = np.timedelta64("NaT", "ns")
+    dt = np.datetime64("NaT", "ns")
+    with pytest.raises(TypeError, match="Cannot cast"):
+        construct_1d_arraylike_from_scalar(td, 2, dt.dtype)
+    with pytest.raises(TypeError, match="Cannot cast"):
+        construct_1d_arraylike_from_scalar(np.timedelta64(4, "ns"), 2, dt.dtype)
+    with pytest.raises(TypeError, match="Cannot cast"):
+        construct_1d_arraylike_from_scalar(dt, 2, td.dtype)
+    with pytest.raises(TypeError, match="Cannot cast"):
+        construct_1d_arraylike_from_scalar(np.datetime64(4, "ns"), 2, td.dtype)

py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_construct_ndarray.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import numpy as np
+import pytest
+import pandas as pd
+import pandas._testing as tm
+from pandas.core.construction import sanitize_array
+@pytest.mark.parametrize(
+    "values, dtype, expected",
+    [
+        ([1, 2, 3], None, np.array([1, 2, 3], dtype=np.int64)),
+        (np.array([1, 2, 3]), None, np.array([1, 2, 3])),
+        (["1", "2", None], None, np.array(["1", "2", None])),
+        (["1", "2", None], np.dtype("str"), np.array(["1", "2", None])),
+        ([1, 2, None], np.dtype("str"), np.array(["1", "2", None])),
+    ],
+)
+def test_construct_1d_ndarray_preserving_na(
+    values, dtype, expected, using_infer_string
+):
+    result = sanitize_array(values, index=None, dtype=dtype)
+    if using_infer_string and expected.dtype == object and dtype is None:
+        tm.assert_extension_array_equal(result, pd.array(expected, dtype="str"))
+    else:
+        tm.assert_numpy_array_equal(result, expected)
+@pytest.mark.parametrize("dtype", ["m8[ns]", "M8[ns]"])
+def test_construct_1d_ndarray_preserving_na_datetimelike(dtype):
+    arr = np.arange(5, dtype=np.int64).view(dtype)
+    expected = np.array(list(arr), dtype=object)
+    assert all(isinstance(x, type(arr[0])) for x in expected)
+    result = sanitize_array(arr, index=None, dtype=np.dtype(object))
+    tm.assert_numpy_array_equal(result, expected)

py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_construct_object_arr.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import pytest
+from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
+@pytest.mark.parametrize("datum1", [1, 2.0, "3", (4, 5), [6, 7], None])
+@pytest.mark.parametrize("datum2", [8, 9.0, "10", (11, 12), [13, 14], None])
+def test_cast_1d_array(datum1, datum2):
+    data = [datum1, datum2]
+    result = construct_1d_object_array_from_listlike(data)
+    # Direct comparison fails: https://github.com/numpy/numpy/issues/10218
+    assert result.dtype == "object"
+    assert list(result) == data
+@pytest.mark.parametrize("val", [1, 2.0, None])
+def test_cast_1d_array_invalid_scalar(val):
+    with pytest.raises(TypeError, match="has no len()"):
+        construct_1d_object_array_from_listlike(val)

py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_dict_compat.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import numpy as np
+from pandas.core.dtypes.cast import dict_compat
+from pandas import Timestamp
+def test_dict_compat():
+    data_datetime64 = {np.datetime64("1990-03-15"): 1, np.datetime64("2015-03-15"): 2}
+    data_unchanged = {1: 2, 3: 4, 5: 6}
+    expected = {Timestamp("1990-3-15"): 1, Timestamp("2015-03-15"): 2}
+    assert dict_compat(data_datetime64) == expected
+    assert dict_compat(expected) == expected
+    assert dict_compat(data_unchanged) == data_unchanged

py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_downcast.py ADDED Viewed

	@@ -0,0 +1,97 @@

+import decimal
+import numpy as np
+import pytest
+from pandas.core.dtypes.cast import maybe_downcast_to_dtype
+from pandas import (
+    Series,
+    Timedelta,
+)
+import pandas._testing as tm
+@pytest.mark.parametrize(
+    "arr,dtype,expected",
+    [
+        (
+            np.array([8.5, 8.6, 8.7, 8.8, 8.9999999999995]),
+            "infer",
+            np.array([8.5, 8.6, 8.7, 8.8, 8.9999999999995]),
+        ),
+        (
+            np.array([8.0, 8.0, 8.0, 8.0, 8.9999999999995]),
+            "infer",
+            np.array([8, 8, 8, 8, 9], dtype=np.int64),
+        ),
+        (
+            np.array([8.0, 8.0, 8.0, 8.0, 9.0000000000005]),
+            "infer",
+            np.array([8, 8, 8, 8, 9], dtype=np.int64),
+        ),
+        (
+            # This is a judgement call, but we do _not_ downcast Decimal
+            #  objects
+            np.array([decimal.Decimal(0.0)]),
+            "int64",
+            np.array([decimal.Decimal(0.0)]),
+        ),
+        (
+            # GH#45837
+            np.array([Timedelta(days=1), Timedelta(days=2)], dtype=object),
+            "infer",
+            np.array([1, 2], dtype="m8[D]").astype("m8[ns]"),
+        ),
+        # TODO: similar for dt64, dt64tz, Period, Interval?
+    ],
+)
+def test_downcast(arr, expected, dtype):
+    result = maybe_downcast_to_dtype(arr, dtype)
+    tm.assert_numpy_array_equal(result, expected)
+def test_downcast_booleans():
+    # see gh-16875: coercing of booleans.
+    ser = Series([True, True, False])
+    result = maybe_downcast_to_dtype(ser, np.dtype(np.float64))
+    expected = ser.values
+    tm.assert_numpy_array_equal(result, expected)
+def test_downcast_conversion_no_nan(any_real_numpy_dtype):
+    dtype = any_real_numpy_dtype
+    expected = np.array([1, 2])
+    arr = np.array([1.0, 2.0], dtype=dtype)
+    result = maybe_downcast_to_dtype(arr, "infer")
+    tm.assert_almost_equal(result, expected, check_dtype=False)
+def test_downcast_conversion_nan(float_numpy_dtype):
+    dtype = float_numpy_dtype
+    data = [1.0, 2.0, np.nan]
+    expected = np.array(data, dtype=dtype)
+    arr = np.array(data, dtype=dtype)
+    result = maybe_downcast_to_dtype(arr, "infer")
+    tm.assert_almost_equal(result, expected)
+def test_downcast_conversion_empty(any_real_numpy_dtype):
+    dtype = any_real_numpy_dtype
+    arr = np.array([], dtype=dtype)
+    result = maybe_downcast_to_dtype(arr, np.dtype("int64"))
+    tm.assert_numpy_array_equal(result, np.array([], dtype=np.int64))
+@pytest.mark.parametrize("klass", [np.datetime64, np.timedelta64])
+def test_datetime_likes_nan(klass):
+    dtype = klass.__name__ + "[ns]"
+    arr = np.array([1, 2, np.nan])
+    exp = np.array([1, 2, klass("NaT")], dtype)
+    res = maybe_downcast_to_dtype(arr, dtype)
+    tm.assert_numpy_array_equal(res, exp)

py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_find_common_type.py ADDED Viewed

	@@ -0,0 +1,175 @@

+import numpy as np
+import pytest
+from pandas.core.dtypes.cast import find_common_type
+from pandas.core.dtypes.common import pandas_dtype
+from pandas.core.dtypes.dtypes import (
+    CategoricalDtype,
+    DatetimeTZDtype,
+    IntervalDtype,
+    PeriodDtype,
+)
+from pandas import (
+    Categorical,
+    Index,
+)
+@pytest.mark.parametrize(
+    "source_dtypes,expected_common_dtype",
+    [
+        ((np.int64,), np.int64),
+        ((np.uint64,), np.uint64),
+        ((np.float32,), np.float32),
+        ((object,), object),
+        # Into ints.
+        ((np.int16, np.int64), np.int64),
+        ((np.int32, np.uint32), np.int64),
+        ((np.uint16, np.uint64), np.uint64),
+        # Into floats.
+        ((np.float16, np.float32), np.float32),
+        ((np.float16, np.int16), np.float32),
+        ((np.float32, np.int16), np.float32),
+        ((np.uint64, np.int64), np.float64),
+        ((np.int16, np.float64), np.float64),
+        ((np.float16, np.int64), np.float64),
+        # Into others.
+        ((np.complex128, np.int32), np.complex128),
+        ((object, np.float32), object),
+        ((object, np.int16), object),
+        # Bool with int.
+        ((np.dtype("bool"), np.int64), object),
+        ((np.dtype("bool"), np.int32), object),
+        ((np.dtype("bool"), np.int16), object),
+        ((np.dtype("bool"), np.int8), object),
+        ((np.dtype("bool"), np.uint64), object),
+        ((np.dtype("bool"), np.uint32), object),
+        ((np.dtype("bool"), np.uint16), object),
+        ((np.dtype("bool"), np.uint8), object),
+        # Bool with float.
+        ((np.dtype("bool"), np.float64), object),
+        ((np.dtype("bool"), np.float32), object),
+        (
+            (np.dtype("datetime64[ns]"), np.dtype("datetime64[ns]")),
+            np.dtype("datetime64[ns]"),
+        ),
+        (
+            (np.dtype("timedelta64[ns]"), np.dtype("timedelta64[ns]")),
+            np.dtype("timedelta64[ns]"),
+        ),
+        (
+            (np.dtype("datetime64[ns]"), np.dtype("datetime64[ms]")),
+            np.dtype("datetime64[ns]"),
+        ),
+        (
+            (np.dtype("timedelta64[ms]"), np.dtype("timedelta64[ns]")),
+            np.dtype("timedelta64[ns]"),
+        ),
+        ((np.dtype("datetime64[ns]"), np.dtype("timedelta64[ns]")), object),
+        ((np.dtype("datetime64[ns]"), np.int64), object),
+    ],
+)
+def test_numpy_dtypes(source_dtypes, expected_common_dtype):
+    source_dtypes = [pandas_dtype(x) for x in source_dtypes]
+    assert find_common_type(source_dtypes) == expected_common_dtype
+def test_raises_empty_input():
+    with pytest.raises(ValueError, match="no types given"):
+        find_common_type([])
+@pytest.mark.parametrize(
+    "dtypes,exp_type",
+    [
+        ([CategoricalDtype()], "category"),
+        ([object, CategoricalDtype()], object),
+        ([CategoricalDtype(), CategoricalDtype()], "category"),
+    ],
+)
+def test_categorical_dtype(dtypes, exp_type):
+    assert find_common_type(dtypes) == exp_type
+def test_datetimetz_dtype_match():
+    dtype = DatetimeTZDtype(unit="ns", tz="US/Eastern")
+    assert find_common_type([dtype, dtype]) == "datetime64[ns, US/Eastern]"
+@pytest.mark.parametrize(
+    "dtype2",
+    [
+        DatetimeTZDtype(unit="ns", tz="Asia/Tokyo"),
+        np.dtype("datetime64[ns]"),
+        object,
+        np.int64,
+    ],
+)
+def test_datetimetz_dtype_mismatch(dtype2):
+    dtype = DatetimeTZDtype(unit="ns", tz="US/Eastern")
+    assert find_common_type([dtype, dtype2]) == object
+    assert find_common_type([dtype2, dtype]) == object
+def test_period_dtype_match():
+    dtype = PeriodDtype(freq="D")
+    assert find_common_type([dtype, dtype]) == "period[D]"
+@pytest.mark.parametrize(
+    "dtype2",
+    [
+        DatetimeTZDtype(unit="ns", tz="Asia/Tokyo"),
+        PeriodDtype(freq="2D"),
+        PeriodDtype(freq="h"),
+        np.dtype("datetime64[ns]"),
+        object,
+        np.int64,
+    ],
+)
+def test_period_dtype_mismatch(dtype2):
+    dtype = PeriodDtype(freq="D")
+    assert find_common_type([dtype, dtype2]) == object
+    assert find_common_type([dtype2, dtype]) == object
+interval_dtypes = [
+    IntervalDtype(np.int64, "right"),
+    IntervalDtype(np.float64, "right"),
+    IntervalDtype(np.uint64, "right"),
+    IntervalDtype(DatetimeTZDtype(unit="ns", tz="US/Eastern"), "right"),
+    IntervalDtype("M8[ns]", "right"),
+    IntervalDtype("m8[ns]", "right"),
+]
+@pytest.mark.parametrize("left", interval_dtypes)
+@pytest.mark.parametrize("right", interval_dtypes)
+def test_interval_dtype(left, right):
+    result = find_common_type([left, right])
+    if left is right:
+        assert result is left
+    elif left.subtype.kind in ["i", "u", "f"]:
+        # i.e. numeric
+        if right.subtype.kind in ["i", "u", "f"]:
+            # both numeric -> common numeric subtype
+            expected = IntervalDtype(np.float64, "right")
+            assert result == expected
+        else:
+            assert result == object
+    else:
+        assert result == object
+@pytest.mark.parametrize("dtype", interval_dtypes)
+def test_interval_dtype_with_categorical(dtype):
+    obj = Index([], dtype=dtype)
+    cat = Categorical([], categories=obj)
+    result = find_common_type([dtype, cat.dtype])
+    assert result == dtype

py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_infer_datetimelike.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import numpy as np
+import pytest
+from pandas import (
+    DataFrame,
+    NaT,
+    Series,
+    Timestamp,
+)
+@pytest.mark.parametrize(
+    "data,exp_size",
+    [
+        # see gh-16362.
+        ([[NaT, "a", "b", 0], [NaT, "b", "c", 1]], 8),
+        ([[NaT, "a", 0], [NaT, "b", 1]], 6),
+    ],
+)
+def test_maybe_infer_to_datetimelike_df_construct(data, exp_size):
+    result = DataFrame(np.array(data))
+    assert result.size == exp_size
+def test_maybe_infer_to_datetimelike_ser_construct():
+    # see gh-19671.
+    result = Series(["M1701", Timestamp("20130101")])
+    assert result.dtype.kind == "O"

py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_infer_dtype.py ADDED Viewed

	@@ -0,0 +1,216 @@

+from datetime import (
+    date,
+    datetime,
+    timedelta,
+)
+import numpy as np
+import pytest
+from pandas.core.dtypes.cast import (
+    infer_dtype_from,
+    infer_dtype_from_array,
+    infer_dtype_from_scalar,
+)
+from pandas.core.dtypes.common import is_dtype_equal
+from pandas import (
+    Categorical,
+    Interval,
+    Period,
+    Series,
+    Timedelta,
+    Timestamp,
+    date_range,
+)
+def test_infer_dtype_from_int_scalar(any_int_numpy_dtype):
+    # Test that infer_dtype_from_scalar is
+    # returning correct dtype for int and float.
+    data = np.dtype(any_int_numpy_dtype).type(12)
+    dtype, val = infer_dtype_from_scalar(data)
+    assert dtype == type(data)
+def test_infer_dtype_from_float_scalar(float_numpy_dtype):
+    float_numpy_dtype = np.dtype(float_numpy_dtype).type
+    data = float_numpy_dtype(12)
+    dtype, val = infer_dtype_from_scalar(data)
+    assert dtype == float_numpy_dtype
+@pytest.mark.parametrize(
+    "data,exp_dtype", [(12, np.int64), (np.float64(12), np.float64)]
+)
+def test_infer_dtype_from_python_scalar(data, exp_dtype):
+    dtype, val = infer_dtype_from_scalar(data)
+    assert dtype == exp_dtype
+@pytest.mark.parametrize("bool_val", [True, False])
+def test_infer_dtype_from_boolean(bool_val):
+    dtype, val = infer_dtype_from_scalar(bool_val)
+    assert dtype == np.bool_
+def test_infer_dtype_from_complex(complex_dtype):
+    data = np.dtype(complex_dtype).type(1)
+    dtype, val = infer_dtype_from_scalar(data)
+    assert dtype == np.complex128
+def test_infer_dtype_from_datetime():
+    dt64 = np.datetime64(1, "ns")
+    dtype, val = infer_dtype_from_scalar(dt64)
+    assert dtype == "M8[ns]"
+    ts = Timestamp(1)
+    dtype, val = infer_dtype_from_scalar(ts)
+    assert dtype == "M8[ns]"
+    dt = datetime(2000, 1, 1, 0, 0)
+    dtype, val = infer_dtype_from_scalar(dt)
+    assert dtype == "M8[us]"
+def test_infer_dtype_from_timedelta():
+    td64 = np.timedelta64(1, "ns")
+    dtype, val = infer_dtype_from_scalar(td64)
+    assert dtype == "m8[ns]"
+    pytd = timedelta(1)
+    dtype, val = infer_dtype_from_scalar(pytd)
+    assert dtype == "m8[us]"
+    td = Timedelta(1)
+    dtype, val = infer_dtype_from_scalar(td)
+    assert dtype == "m8[ns]"
+@pytest.mark.parametrize("freq", ["M", "D"])
+def test_infer_dtype_from_period(freq):
+    p = Period("2011-01-01", freq=freq)
+    dtype, val = infer_dtype_from_scalar(p)
+    exp_dtype = f"period[{freq}]"
+    assert dtype == exp_dtype
+    assert val == p
+def test_infer_dtype_misc():
+    dt = date(2000, 1, 1)
+    dtype, val = infer_dtype_from_scalar(dt)
+    assert dtype == np.object_
+    ts = Timestamp(1, tz="US/Eastern")
+    dtype, val = infer_dtype_from_scalar(ts)
+    assert dtype == "datetime64[ns, US/Eastern]"
+@pytest.mark.parametrize("tz", ["UTC", "US/Eastern", "Asia/Tokyo"])
+def test_infer_from_scalar_tz(tz):
+    dt = Timestamp(1, tz=tz)
+    dtype, val = infer_dtype_from_scalar(dt)
+    exp_dtype = f"datetime64[ns, {tz}]"
+    assert dtype == exp_dtype
+    assert val == dt
+@pytest.mark.parametrize(
+    "left, right, subtype",
+    [
+        (0, 1, "int64"),
+        (0.0, 1.0, "float64"),
+        (Timestamp(0), Timestamp(1), "datetime64[ns]"),
+        (Timestamp(0, tz="UTC"), Timestamp(1, tz="UTC"), "datetime64[ns, UTC]"),
+        (Timedelta(0), Timedelta(1), "timedelta64[ns]"),
+    ],
+)
+def test_infer_from_interval(left, right, subtype, closed):
+    # GH 30337
+    interval = Interval(left, right, closed)
+    result_dtype, result_value = infer_dtype_from_scalar(interval)
+    expected_dtype = f"interval[{subtype}, {closed}]"
+    assert result_dtype == expected_dtype
+    assert result_value == interval
+def test_infer_dtype_from_scalar_errors():
+    msg = "invalid ndarray passed to infer_dtype_from_scalar"
+    with pytest.raises(ValueError, match=msg):
+        infer_dtype_from_scalar(np.array([1]))
+@pytest.mark.parametrize(
+    "value, expected",
+    [
+        ("foo", np.object_),
+        (b"foo", np.object_),
+        (1, np.int64),
+        (1.5, np.float64),
+        (np.datetime64("2016-01-01"), np.dtype("M8[s]")),
+        (Timestamp("20160101"), np.dtype("M8[s]")),
+        (Timestamp("20160101", tz="UTC"), "datetime64[s, UTC]"),
+    ],
+)
+def test_infer_dtype_from_scalar(value, expected, using_infer_string):
+    dtype, _ = infer_dtype_from_scalar(value)
+    if using_infer_string and value == "foo":
+        expected = "string"
+    assert is_dtype_equal(dtype, expected)
+    with pytest.raises(TypeError, match="must be list-like"):
+        infer_dtype_from_array(value)
+@pytest.mark.parametrize(
+    "arr, expected",
+    [
+        ([1], np.dtype(int)),
+        (np.array([1], dtype=np.int64), np.int64),
+        ([np.nan, 1, ""], np.object_),
+        (np.array([[1.0, 2.0]]), np.float64),
+        (Categorical(list("aabc")), "category"),
+        (Categorical([1, 2, 3]), "category"),
+        (date_range("20160101", periods=3), np.dtype("=M8[ns]")),
+        (
+            date_range("20160101", periods=3, tz="US/Eastern"),
+            "datetime64[ns, US/Eastern]",
+        ),
+        (Series([1.0, 2, 3]), np.float64),
+        (Series(list("abc")), np.object_),
+        (
+            Series(date_range("20160101", periods=3, tz="US/Eastern")),
+            "datetime64[ns, US/Eastern]",
+        ),
+    ],
+)
+def test_infer_dtype_from_array(arr, expected, using_infer_string):
+    dtype, _ = infer_dtype_from_array(arr)
+    if (
+        using_infer_string
+        and isinstance(arr, Series)
+        and arr.tolist() == ["a", "b", "c"]
+    ):
+        expected = "string"
+    assert is_dtype_equal(dtype, expected)
+@pytest.mark.parametrize("cls", [np.datetime64, np.timedelta64])
+def test_infer_dtype_from_scalar_zerodim_datetimelike(cls):
+    # ndarray.item() can incorrectly return int instead of td64/dt64
+    val = cls(1234, "ns")
+    arr = np.array(val)
+    dtype, res = infer_dtype_from_scalar(arr)
+    assert dtype.type is cls
+    assert isinstance(res, cls)
+    dtype, res = infer_dtype_from(arr)
+    assert dtype.type is cls

py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_maybe_box_native.py ADDED Viewed

	@@ -0,0 +1,40 @@

+from datetime import datetime
+import numpy as np
+import pytest
+from pandas.core.dtypes.cast import maybe_box_native
+from pandas import (
+    Interval,
+    Period,
+    Timedelta,
+    Timestamp,
+)
+@pytest.mark.parametrize(
+    "obj,expected_dtype",
+    [
+        (b"\x00\x10", bytes),
+        (int(4), int),
+        (np.uint(4), int),
+        (np.int32(-4), int),
+        (np.uint8(4), int),
+        (float(454.98), float),
+        (np.float16(0.4), float),
+        (np.float64(1.4), float),
+        (np.bool_(False), bool),
+        (datetime(2005, 2, 25), datetime),
+        (np.datetime64("2005-02-25"), Timestamp),
+        (Timestamp("2005-02-25"), Timestamp),
+        (np.timedelta64(1, "D"), Timedelta),
+        (Timedelta(1, "D"), Timedelta),
+        (Interval(0, 1), Interval),
+        (Period("4Q2005"), Period),
+    ],
+)
+def test_maybe_box_native(obj, expected_dtype):
+    boxed_obj = maybe_box_native(obj)
+    result_dtype = type(boxed_obj)
+    assert result_dtype is expected_dtype

py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_promote.py ADDED Viewed

	@@ -0,0 +1,530 @@

+"""
+These test the method maybe_promote from core/dtypes/cast.py
+"""
+import datetime
+from decimal import Decimal
+import numpy as np
+import pytest
+from pandas._libs.tslibs import NaT
+from pandas.core.dtypes.cast import maybe_promote
+from pandas.core.dtypes.common import is_scalar
+from pandas.core.dtypes.dtypes import DatetimeTZDtype
+from pandas.core.dtypes.missing import isna
+import pandas as pd
+def _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar=None):
+    """
+    Auxiliary function to unify testing of scalar/array promotion.
+    Parameters
+    ----------
+    dtype : dtype
+        The value to pass on as the first argument to maybe_promote.
+    fill_value : scalar
+        The value to pass on as the second argument to maybe_promote as
+        a scalar.
+    expected_dtype : dtype
+        The expected dtype returned by maybe_promote (by design this is the
+        same regardless of whether fill_value was passed as a scalar or in an
+        array!).
+    exp_val_for_scalar : scalar
+        The expected value for the (potentially upcast) fill_value returned by
+        maybe_promote.
+    """
+    assert is_scalar(fill_value)
+    # here, we pass on fill_value as a scalar directly; the expected value
+    # returned from maybe_promote is fill_value, potentially upcast to the
+    # returned dtype.
+    result_dtype, result_fill_value = maybe_promote(dtype, fill_value)
+    expected_fill_value = exp_val_for_scalar
+    assert result_dtype == expected_dtype
+    _assert_match(result_fill_value, expected_fill_value)
+def _assert_match(result_fill_value, expected_fill_value):
+    # GH#23982/25425 require the same type in addition to equality/NA-ness
+    res_type = type(result_fill_value)
+    ex_type = type(expected_fill_value)
+    if hasattr(result_fill_value, "dtype"):
+        # Compare types in a way that is robust to platform-specific
+        #  idiosyncrasies where e.g. sometimes we get "ulonglong" as an alias
+        #  for "uint64" or "intc" as an alias for "int32"
+        assert result_fill_value.dtype.kind == expected_fill_value.dtype.kind
+        assert result_fill_value.dtype.itemsize == expected_fill_value.dtype.itemsize
+    else:
+        # On some builds, type comparison fails, e.g. np.int32 != np.int32
+        assert res_type == ex_type or res_type.__name__ == ex_type.__name__
+    match_value = result_fill_value == expected_fill_value
+    if match_value is pd.NA:
+        match_value = False
+    # Note: type check above ensures that we have the _same_ NA value
+    # for missing values, None == None (which is checked
+    # through match_value above), but np.nan != np.nan and pd.NaT != pd.NaT
+    match_missing = isna(result_fill_value) and isna(expected_fill_value)
+    assert match_value or match_missing
+@pytest.mark.parametrize(
+    "dtype, fill_value, expected_dtype",
+    [
+        # size 8
+        ("int8", 1, "int8"),
+        ("int8", np.iinfo("int8").max + 1, "int16"),
+        ("int8", np.iinfo("int16").max + 1, "int32"),
+        ("int8", np.iinfo("int32").max + 1, "int64"),
+        ("int8", np.iinfo("int64").max + 1, "object"),
+        ("int8", -1, "int8"),
+        ("int8", np.iinfo("int8").min - 1, "int16"),
+        ("int8", np.iinfo("int16").min - 1, "int32"),
+        ("int8", np.iinfo("int32").min - 1, "int64"),
+        ("int8", np.iinfo("int64").min - 1, "object"),
+        # keep signed-ness as long as possible
+        ("uint8", 1, "uint8"),
+        ("uint8", np.iinfo("int8").max + 1, "uint8"),
+        ("uint8", np.iinfo("uint8").max + 1, "uint16"),
+        ("uint8", np.iinfo("int16").max + 1, "uint16"),
+        ("uint8", np.iinfo("uint16").max + 1, "uint32"),
+        ("uint8", np.iinfo("int32").max + 1, "uint32"),
+        ("uint8", np.iinfo("uint32").max + 1, "uint64"),
+        ("uint8", np.iinfo("int64").max + 1, "uint64"),
+        ("uint8", np.iinfo("uint64").max + 1, "object"),
+        # max of uint8 cannot be contained in int8
+        ("uint8", -1, "int16"),
+        ("uint8", np.iinfo("int8").min - 1, "int16"),
+        ("uint8", np.iinfo("int16").min - 1, "int32"),
+        ("uint8", np.iinfo("int32").min - 1, "int64"),
+        ("uint8", np.iinfo("int64").min - 1, "object"),
+        # size 16
+        ("int16", 1, "int16"),
+        ("int16", np.iinfo("int8").max + 1, "int16"),
+        ("int16", np.iinfo("int16").max + 1, "int32"),
+        ("int16", np.iinfo("int32").max + 1, "int64"),
+        ("int16", np.iinfo("int64").max + 1, "object"),
+        ("int16", -1, "int16"),
+        ("int16", np.iinfo("int8").min - 1, "int16"),
+        ("int16", np.iinfo("int16").min - 1, "int32"),
+        ("int16", np.iinfo("int32").min - 1, "int64"),
+        ("int16", np.iinfo("int64").min - 1, "object"),
+        ("uint16", 1, "uint16"),
+        ("uint16", np.iinfo("int8").max + 1, "uint16"),
+        ("uint16", np.iinfo("uint8").max + 1, "uint16"),
+        ("uint16", np.iinfo("int16").max + 1, "uint16"),
+        ("uint16", np.iinfo("uint16").max + 1, "uint32"),
+        ("uint16", np.iinfo("int32").max + 1, "uint32"),
+        ("uint16", np.iinfo("uint32").max + 1, "uint64"),
+        ("uint16", np.iinfo("int64").max + 1, "uint64"),
+        ("uint16", np.iinfo("uint64").max + 1, "object"),
+        ("uint16", -1, "int32"),
+        ("uint16", np.iinfo("int8").min - 1, "int32"),
+        ("uint16", np.iinfo("int16").min - 1, "int32"),
+        ("uint16", np.iinfo("int32").min - 1, "int64"),
+        ("uint16", np.iinfo("int64").min - 1, "object"),
+        # size 32
+        ("int32", 1, "int32"),
+        ("int32", np.iinfo("int8").max + 1, "int32"),
+        ("int32", np.iinfo("int16").max + 1, "int32"),
+        ("int32", np.iinfo("int32").max + 1, "int64"),
+        ("int32", np.iinfo("int64").max + 1, "object"),
+        ("int32", -1, "int32"),
+        ("int32", np.iinfo("int8").min - 1, "int32"),
+        ("int32", np.iinfo("int16").min - 1, "int32"),
+        ("int32", np.iinfo("int32").min - 1, "int64"),
+        ("int32", np.iinfo("int64").min - 1, "object"),
+        ("uint32", 1, "uint32"),
+        ("uint32", np.iinfo("int8").max + 1, "uint32"),
+        ("uint32", np.iinfo("uint8").max + 1, "uint32"),
+        ("uint32", np.iinfo("int16").max + 1, "uint32"),
+        ("uint32", np.iinfo("uint16").max + 1, "uint32"),
+        ("uint32", np.iinfo("int32").max + 1, "uint32"),
+        ("uint32", np.iinfo("uint32").max + 1, "uint64"),
+        ("uint32", np.iinfo("int64").max + 1, "uint64"),
+        ("uint32", np.iinfo("uint64").max + 1, "object"),
+        ("uint32", -1, "int64"),
+        ("uint32", np.iinfo("int8").min - 1, "int64"),
+        ("uint32", np.iinfo("int16").min - 1, "int64"),
+        ("uint32", np.iinfo("int32").min - 1, "int64"),
+        ("uint32", np.iinfo("int64").min - 1, "object"),
+        # size 64
+        ("int64", 1, "int64"),
+        ("int64", np.iinfo("int8").max + 1, "int64"),
+        ("int64", np.iinfo("int16").max + 1, "int64"),
+        ("int64", np.iinfo("int32").max + 1, "int64"),
+        ("int64", np.iinfo("int64").max + 1, "object"),
+        ("int64", -1, "int64"),
+        ("int64", np.iinfo("int8").min - 1, "int64"),
+        ("int64", np.iinfo("int16").min - 1, "int64"),
+        ("int64", np.iinfo("int32").min - 1, "int64"),
+        ("int64", np.iinfo("int64").min - 1, "object"),
+        ("uint64", 1, "uint64"),
+        ("uint64", np.iinfo("int8").max + 1, "uint64"),
+        ("uint64", np.iinfo("uint8").max + 1, "uint64"),
+        ("uint64", np.iinfo("int16").max + 1, "uint64"),
+        ("uint64", np.iinfo("uint16").max + 1, "uint64"),
+        ("uint64", np.iinfo("int32").max + 1, "uint64"),
+        ("uint64", np.iinfo("uint32").max + 1, "uint64"),
+        ("uint64", np.iinfo("int64").max + 1, "uint64"),
+        ("uint64", np.iinfo("uint64").max + 1, "object"),
+        ("uint64", -1, "object"),
+        ("uint64", np.iinfo("int8").min - 1, "object"),
+        ("uint64", np.iinfo("int16").min - 1, "object"),
+        ("uint64", np.iinfo("int32").min - 1, "object"),
+        ("uint64", np.iinfo("int64").min - 1, "object"),
+    ],
+)
+def test_maybe_promote_int_with_int(dtype, fill_value, expected_dtype):
+    dtype = np.dtype(dtype)
+    expected_dtype = np.dtype(expected_dtype)
+    # output is not a generic int, but corresponds to expected_dtype
+    exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0]
+    _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
+def test_maybe_promote_int_with_float(any_int_numpy_dtype, float_numpy_dtype):
+    dtype = np.dtype(any_int_numpy_dtype)
+    fill_dtype = np.dtype(float_numpy_dtype)
+    # create array of given dtype; casts "1" to correct dtype
+    fill_value = np.array([1], dtype=fill_dtype)[0]
+    # filling int with float always upcasts to float64
+    expected_dtype = np.float64
+    # fill_value can be different float type
+    exp_val_for_scalar = np.float64(fill_value)
+    _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
+def test_maybe_promote_float_with_int(float_numpy_dtype, any_int_numpy_dtype):
+    dtype = np.dtype(float_numpy_dtype)
+    fill_dtype = np.dtype(any_int_numpy_dtype)
+    # create array of given dtype; casts "1" to correct dtype
+    fill_value = np.array([1], dtype=fill_dtype)[0]
+    # filling float with int always keeps float dtype
+    # because: np.finfo('float32').max > np.iinfo('uint64').max
+    expected_dtype = dtype
+    # output is not a generic float, but corresponds to expected_dtype
+    exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0]
+    _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
+@pytest.mark.parametrize(
+    "dtype, fill_value, expected_dtype",
+    [
+        # float filled with float
+        ("float32", 1, "float32"),
+        ("float32", float(np.finfo("float32").max) * 1.1, "float64"),
+        ("float64", 1, "float64"),
+        ("float64", float(np.finfo("float32").max) * 1.1, "float64"),
+        # complex filled with float
+        ("complex64", 1, "complex64"),
+        ("complex64", float(np.finfo("float32").max) * 1.1, "complex128"),
+        ("complex128", 1, "complex128"),
+        ("complex128", float(np.finfo("float32").max) * 1.1, "complex128"),
+        # float filled with complex
+        ("float32", 1 + 1j, "complex64"),
+        ("float32", float(np.finfo("float32").max) * (1.1 + 1j), "complex128"),
+        ("float64", 1 + 1j, "complex128"),
+        ("float64", float(np.finfo("float32").max) * (1.1 + 1j), "complex128"),
+        # complex filled with complex
+        ("complex64", 1 + 1j, "complex64"),
+        ("complex64", float(np.finfo("float32").max) * (1.1 + 1j), "complex128"),
+        ("complex128", 1 + 1j, "complex128"),
+        ("complex128", float(np.finfo("float32").max) * (1.1 + 1j), "complex128"),
+    ],
+)
+def test_maybe_promote_float_with_float(dtype, fill_value, expected_dtype):
+    dtype = np.dtype(dtype)
+    expected_dtype = np.dtype(expected_dtype)
+    # output is not a generic float, but corresponds to expected_dtype
+    exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0]
+    _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
+def test_maybe_promote_bool_with_any(any_numpy_dtype):
+    dtype = np.dtype(bool)
+    fill_dtype = np.dtype(any_numpy_dtype)
+    # create array of given dtype; casts "1" to correct dtype
+    fill_value = np.array([1], dtype=fill_dtype)[0]
+    # filling bool with anything but bool casts to object
+    expected_dtype = np.dtype(object) if fill_dtype != bool else fill_dtype
+    exp_val_for_scalar = fill_value
+    _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
+def test_maybe_promote_any_with_bool(any_numpy_dtype):
+    dtype = np.dtype(any_numpy_dtype)
+    fill_value = True
+    # filling anything but bool with bool casts to object
+    expected_dtype = np.dtype(object) if dtype != bool else dtype
+    # output is not a generic bool, but corresponds to expected_dtype
+    exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0]
+    _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
+def test_maybe_promote_bytes_with_any(bytes_dtype, any_numpy_dtype):
+    dtype = np.dtype(bytes_dtype)
+    fill_dtype = np.dtype(any_numpy_dtype)
+    # create array of given dtype; casts "1" to correct dtype
+    fill_value = np.array([1], dtype=fill_dtype)[0]
+    # we never use bytes dtype internally, always promote to object
+    expected_dtype = np.dtype(np.object_)
+    exp_val_for_scalar = fill_value
+    _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
+def test_maybe_promote_any_with_bytes(any_numpy_dtype):
+    dtype = np.dtype(any_numpy_dtype)
+    # create array of given dtype
+    fill_value = b"abc"
+    # we never use bytes dtype internally, always promote to object
+    expected_dtype = np.dtype(np.object_)
+    # output is not a generic bytes, but corresponds to expected_dtype
+    exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0]
+    _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
+def test_maybe_promote_datetime64_with_any(datetime64_dtype, any_numpy_dtype):
+    dtype = np.dtype(datetime64_dtype)
+    fill_dtype = np.dtype(any_numpy_dtype)
+    # create array of given dtype; casts "1" to correct dtype
+    fill_value = np.array([1], dtype=fill_dtype)[0]
+    # filling datetime with anything but datetime casts to object
+    if fill_dtype.kind == "M":
+        expected_dtype = dtype
+        # for datetime dtypes, scalar values get cast to to_datetime64
+        exp_val_for_scalar = pd.Timestamp(fill_value).to_datetime64()
+    else:
+        expected_dtype = np.dtype(object)
+        exp_val_for_scalar = fill_value
+    _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
+@pytest.mark.parametrize(
+    "fill_value",
+    [
+        pd.Timestamp("now"),
+        np.datetime64("now"),
+        datetime.datetime.now(),
+        datetime.date.today(),
+    ],
+    ids=["pd.Timestamp", "np.datetime64", "datetime.datetime", "datetime.date"],
+)
+def test_maybe_promote_any_with_datetime64(any_numpy_dtype, fill_value):
+    dtype = np.dtype(any_numpy_dtype)
+    # filling datetime with anything but datetime casts to object
+    if dtype.kind == "M":
+        expected_dtype = dtype
+        # for datetime dtypes, scalar values get cast to pd.Timestamp.value
+        exp_val_for_scalar = pd.Timestamp(fill_value).to_datetime64()
+    else:
+        expected_dtype = np.dtype(object)
+        exp_val_for_scalar = fill_value
+    if type(fill_value) is datetime.date and dtype.kind == "M":
+        # Casting date to dt64 is deprecated, in 2.0 enforced to cast to object
+        expected_dtype = np.dtype(object)
+        exp_val_for_scalar = fill_value
+    _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
+@pytest.mark.parametrize(
+    "fill_value",
+    [
+        pd.Timestamp(2023, 1, 1),
+        np.datetime64("2023-01-01"),
+        datetime.datetime(2023, 1, 1),
+        datetime.date(2023, 1, 1),
+    ],
+    ids=["pd.Timestamp", "np.datetime64", "datetime.datetime", "datetime.date"],
+)
+def test_maybe_promote_any_numpy_dtype_with_datetimetz(
+    any_numpy_dtype, tz_aware_fixture, fill_value
+):
+    dtype = np.dtype(any_numpy_dtype)
+    fill_dtype = DatetimeTZDtype(tz=tz_aware_fixture)
+    fill_value = pd.Series([fill_value], dtype=fill_dtype)[0]
+    # filling any numpy dtype with datetimetz casts to object
+    expected_dtype = np.dtype(object)
+    exp_val_for_scalar = fill_value
+    _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
+def test_maybe_promote_timedelta64_with_any(timedelta64_dtype, any_numpy_dtype):
+    dtype = np.dtype(timedelta64_dtype)
+    fill_dtype = np.dtype(any_numpy_dtype)
+    # create array of given dtype; casts "1" to correct dtype
+    fill_value = np.array([1], dtype=fill_dtype)[0]
+    # filling timedelta with anything but timedelta casts to object
+    if fill_dtype.kind == "m":
+        expected_dtype = dtype
+        # for timedelta dtypes, scalar values get cast to pd.Timedelta.value
+        exp_val_for_scalar = pd.Timedelta(fill_value).to_timedelta64()
+    else:
+        expected_dtype = np.dtype(object)
+        exp_val_for_scalar = fill_value
+    _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
+@pytest.mark.parametrize(
+    "fill_value",
+    [pd.Timedelta(days=1), np.timedelta64(24, "h"), datetime.timedelta(1)],
+    ids=["pd.Timedelta", "np.timedelta64", "datetime.timedelta"],
+)
+def test_maybe_promote_any_with_timedelta64(any_numpy_dtype, fill_value):
+    dtype = np.dtype(any_numpy_dtype)
+    # filling anything but timedelta with timedelta casts to object
+    if dtype.kind == "m":
+        expected_dtype = dtype
+        # for timedelta dtypes, scalar values get cast to pd.Timedelta.value
+        exp_val_for_scalar = pd.Timedelta(fill_value).to_timedelta64()
+    else:
+        expected_dtype = np.dtype(object)
+        exp_val_for_scalar = fill_value
+    _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
+def test_maybe_promote_string_with_any(string_dtype, any_numpy_dtype):
+    dtype = np.dtype(string_dtype)
+    fill_dtype = np.dtype(any_numpy_dtype)
+    # create array of given dtype; casts "1" to correct dtype
+    fill_value = np.array([1], dtype=fill_dtype)[0]
+    # filling string with anything casts to object
+    expected_dtype = np.dtype(object)
+    exp_val_for_scalar = fill_value
+    _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
+def test_maybe_promote_any_with_string(any_numpy_dtype):
+    dtype = np.dtype(any_numpy_dtype)
+    # create array of given dtype
+    fill_value = "abc"
+    # filling anything with a string casts to object
+    expected_dtype = np.dtype(object)
+    exp_val_for_scalar = fill_value
+    _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
+def test_maybe_promote_object_with_any(object_dtype, any_numpy_dtype):
+    dtype = np.dtype(object_dtype)
+    fill_dtype = np.dtype(any_numpy_dtype)
+    # create array of given dtype; casts "1" to correct dtype
+    fill_value = np.array([1], dtype=fill_dtype)[0]
+    # filling object with anything stays object
+    expected_dtype = np.dtype(object)
+    exp_val_for_scalar = fill_value
+    _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
+def test_maybe_promote_any_with_object(any_numpy_dtype):
+    dtype = np.dtype(any_numpy_dtype)
+    # create array of object dtype from a scalar value (i.e. passing
+    # dtypes.common.is_scalar), which can however not be cast to int/float etc.
+    fill_value = pd.DateOffset(1)
+    # filling object with anything stays object
+    expected_dtype = np.dtype(object)
+    exp_val_for_scalar = fill_value
+    _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
+def test_maybe_promote_any_numpy_dtype_with_na(any_numpy_dtype, nulls_fixture):
+    fill_value = nulls_fixture
+    dtype = np.dtype(any_numpy_dtype)
+    if isinstance(fill_value, Decimal):
+        # Subject to change, but ATM (When Decimal(NAN) is being added to nulls_fixture)
+        #  this is the existing behavior in maybe_promote,
+        #  hinges on is_valid_na_for_dtype
+        if dtype.kind in "iufc":
+            if dtype.kind in "iu":
+                expected_dtype = np.dtype(np.float64)
+            else:
+                expected_dtype = dtype
+            exp_val_for_scalar = np.nan
+        else:
+            expected_dtype = np.dtype(object)
+            exp_val_for_scalar = fill_value
+    elif dtype.kind in "iu" and fill_value is not NaT:
+        # integer + other missing value (np.nan / None) casts to float
+        expected_dtype = np.float64
+        exp_val_for_scalar = np.nan
+    elif dtype == object and fill_value is NaT:
+        # inserting into object does not cast the value
+        # but *does* cast None to np.nan
+        expected_dtype = np.dtype(object)
+        exp_val_for_scalar = fill_value
+    elif dtype.kind in "mM":
+        # datetime / timedelta cast all missing values to dtyped-NaT
+        expected_dtype = dtype
+        exp_val_for_scalar = dtype.type("NaT", "ns")
+    elif fill_value is NaT:
+        # NaT upcasts everything that's not datetime/timedelta to object
+        expected_dtype = np.dtype(object)
+        exp_val_for_scalar = NaT
+    elif dtype.kind in "fc":
+        # float / complex + missing value (!= NaT) stays the same
+        expected_dtype = dtype
+        exp_val_for_scalar = np.nan
+    else:
+        # all other cases cast to object, and use np.nan as missing value
+        expected_dtype = np.dtype(object)
+        if fill_value is pd.NA:
+            exp_val_for_scalar = pd.NA
+        else:
+            exp_val_for_scalar = np.nan
+    _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)

py311/lib/python3.11/site-packages/pandas/tests/extension/base/casting.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import numpy as np
+import pytest
+import pandas.util._test_decorators as td
+import pandas as pd
+import pandas._testing as tm
+from pandas.core.internals.blocks import NumpyBlock
+class BaseCastingTests:
+    """Casting to and from ExtensionDtypes"""
+    def test_astype_object_series(self, all_data):
+        ser = pd.Series(all_data, name="A")
+        result = ser.astype(object)
+        assert result.dtype == np.dtype(object)
+        if hasattr(result._mgr, "blocks"):
+            blk = result._mgr.blocks[0]
+            assert isinstance(blk, NumpyBlock)
+            assert blk.is_object
+        assert isinstance(result._mgr.array, np.ndarray)
+        assert result._mgr.array.dtype == np.dtype(object)
+    def test_astype_object_frame(self, all_data):
+        df = pd.DataFrame({"A": all_data})
+        result = df.astype(object)
+        if hasattr(result._mgr, "blocks"):
+            blk = result._mgr.blocks[0]
+            assert isinstance(blk, NumpyBlock), type(blk)
+            assert blk.is_object
+        assert isinstance(result._mgr.arrays[0], np.ndarray)
+        assert result._mgr.arrays[0].dtype == np.dtype(object)
+        # check that we can compare the dtypes
+        comp = result.dtypes == df.dtypes
+        assert not comp.any()
+    def test_tolist(self, data):
+        result = pd.Series(data).tolist()
+        expected = list(data)
+        assert result == expected
+    def test_astype_str(self, data):
+        result = pd.Series(data[:2]).astype(str)
+        expected = pd.Series([str(x) for x in data[:2]], dtype=str)
+        tm.assert_series_equal(result, expected)
+    @pytest.mark.parametrize(
+        "nullable_string_dtype",
+        [
+            "string[python]",
+            pytest.param("string[pyarrow]", marks=td.skip_if_no("pyarrow")),
+        ],
+    )
+    def test_astype_string(self, data, nullable_string_dtype):
+        # GH-33465, GH#45326 as of 2.0 we decode bytes instead of calling str(obj)
+        result = pd.Series(data[:5]).astype(nullable_string_dtype)
+        expected = pd.Series(
+            [str(x) if not isinstance(x, bytes) else x.decode() for x in data[:5]],
+            dtype=nullable_string_dtype,
+        )
+        tm.assert_series_equal(result, expected)
+    def test_to_numpy(self, data):
+        expected = np.asarray(data)
+        result = data.to_numpy()
+        tm.assert_equal(result, expected)
+        result = pd.Series(data).to_numpy()
+        tm.assert_equal(result, expected)
+    def test_astype_empty_dataframe(self, dtype):
+        # https://github.com/pandas-dev/pandas/issues/33113
+        df = pd.DataFrame()
+        result = df.astype(dtype)
+        tm.assert_frame_equal(result, df)
+    @pytest.mark.parametrize("copy", [True, False])
+    def test_astype_own_type(self, data, copy):
+        # ensure that astype returns the original object for equal dtype and copy=False
+        # https://github.com/pandas-dev/pandas/issues/28488
+        result = data.astype(data.dtype, copy=copy)
+        assert (result is data) is (not copy)
+        tm.assert_extension_array_equal(result, data)

py311/lib/python3.11/site-packages/pandas/tests/extension/base/constructors.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import numpy as np
+import pytest
+import pandas as pd
+import pandas._testing as tm
+from pandas.api.extensions import ExtensionArray
+from pandas.core.internals.blocks import EABackedBlock
+class BaseConstructorsTests:
+    def test_from_sequence_from_cls(self, data):
+        result = type(data)._from_sequence(data, dtype=data.dtype)
+        tm.assert_extension_array_equal(result, data)
+        data = data[:0]
+        result = type(data)._from_sequence(data, dtype=data.dtype)
+        tm.assert_extension_array_equal(result, data)
+    def test_array_from_scalars(self, data):
+        scalars = [data[0], data[1], data[2]]
+        result = data._from_sequence(scalars, dtype=data.dtype)
+        assert isinstance(result, type(data))
+    def test_series_constructor(self, data):
+        result = pd.Series(data, copy=False)
+        assert result.dtype == data.dtype
+        assert len(result) == len(data)
+        if hasattr(result._mgr, "blocks"):
+            assert isinstance(result._mgr.blocks[0], EABackedBlock)
+        assert result._mgr.array is data
+        # Series[EA] is unboxed / boxed correctly
+        result2 = pd.Series(result)
+        assert result2.dtype == data.dtype
+        if hasattr(result._mgr, "blocks"):
+            assert isinstance(result2._mgr.blocks[0], EABackedBlock)
+    def test_series_constructor_no_data_with_index(self, dtype, na_value):
+        result = pd.Series(index=[1, 2, 3], dtype=dtype)
+        expected = pd.Series([na_value] * 3, index=[1, 2, 3], dtype=dtype)
+        tm.assert_series_equal(result, expected)
+        # GH 33559 - empty index
+        result = pd.Series(index=[], dtype=dtype)
+        expected = pd.Series([], index=pd.Index([], dtype="object"), dtype=dtype)
+        tm.assert_series_equal(result, expected)
+    def test_series_constructor_scalar_na_with_index(self, dtype, na_value):
+        result = pd.Series(na_value, index=[1, 2, 3], dtype=dtype)
+        expected = pd.Series([na_value] * 3, index=[1, 2, 3], dtype=dtype)
+        tm.assert_series_equal(result, expected)
+    def test_series_constructor_scalar_with_index(self, data, dtype):
+        scalar = data[0]
+        result = pd.Series(scalar, index=[1, 2, 3], dtype=dtype)
+        expected = pd.Series([scalar] * 3, index=[1, 2, 3], dtype=dtype)
+        tm.assert_series_equal(result, expected)
+        result = pd.Series(scalar, index=["foo"], dtype=dtype)
+        expected = pd.Series([scalar], index=["foo"], dtype=dtype)
+        tm.assert_series_equal(result, expected)
+    @pytest.mark.parametrize("from_series", [True, False])
+    def test_dataframe_constructor_from_dict(self, data, from_series):
+        if from_series:
+            data = pd.Series(data)
+        result = pd.DataFrame({"A": data})
+        assert result.dtypes["A"] == data.dtype
+        assert result.shape == (len(data), 1)
+        if hasattr(result._mgr, "blocks"):
+            assert isinstance(result._mgr.blocks[0], EABackedBlock)
+        assert isinstance(result._mgr.arrays[0], ExtensionArray)
+    def test_dataframe_from_series(self, data):
+        result = pd.DataFrame(pd.Series(data))
+        assert result.dtypes[0] == data.dtype
+        assert result.shape == (len(data), 1)
+        if hasattr(result._mgr, "blocks"):
+            assert isinstance(result._mgr.blocks[0], EABackedBlock)
+        assert isinstance(result._mgr.arrays[0], ExtensionArray)
+    def test_series_given_mismatched_index_raises(self, data):
+        msg = r"Length of values \(3\) does not match length of index \(5\)"
+        with pytest.raises(ValueError, match=msg):
+            pd.Series(data[:3], index=[0, 1, 2, 3, 4])
+    def test_from_dtype(self, data):
+        # construct from our dtype & string dtype
+        dtype = data.dtype
+        expected = pd.Series(data)
+        result = pd.Series(list(data), dtype=dtype)
+        tm.assert_series_equal(result, expected)
+        result = pd.Series(list(data), dtype=str(dtype))
+        tm.assert_series_equal(result, expected)
+        # gh-30280
+        expected = pd.DataFrame(data).astype(dtype)
+        result = pd.DataFrame(list(data), dtype=dtype)
+        tm.assert_frame_equal(result, expected)
+        result = pd.DataFrame(list(data), dtype=str(dtype))
+        tm.assert_frame_equal(result, expected)
+    def test_pandas_array(self, data):
+        # pd.array(extension_array) should be idempotent...
+        result = pd.array(data)
+        tm.assert_extension_array_equal(result, data)
+    def test_pandas_array_dtype(self, data):
+        # ... but specifying dtype will override idempotency
+        result = pd.array(data, dtype=np.dtype(object))
+        expected = pd.arrays.NumpyExtensionArray(np.asarray(data, dtype=object))
+        tm.assert_equal(result, expected)
+    def test_construct_empty_dataframe(self, dtype):
+        # GH 33623
+        result = pd.DataFrame(columns=["a"], dtype=dtype)
+        expected = pd.DataFrame(
+            {"a": pd.array([], dtype=dtype)}, index=pd.RangeIndex(0)
+        )
+        tm.assert_frame_equal(result, expected)
+    def test_empty(self, dtype):
+        cls = dtype.construct_array_type()
+        result = cls._empty((4,), dtype=dtype)
+        assert isinstance(result, cls)
+        assert result.dtype == dtype
+        assert result.shape == (4,)
+        # GH#19600 method on ExtensionDtype
+        result2 = dtype.empty((4,))
+        assert isinstance(result2, cls)
+        assert result2.dtype == dtype
+        assert result2.shape == (4,)
+        result2 = dtype.empty(4)
+        assert isinstance(result2, cls)
+        assert result2.dtype == dtype
+        assert result2.shape == (4,)

py311/lib/python3.11/site-packages/pandas/tests/extension/base/dim2.py ADDED Viewed

	@@ -0,0 +1,345 @@

+"""
+Tests for 2D compatibility.
+"""
+import numpy as np
+import pytest
+from pandas._libs.missing import is_matching_na
+from pandas.core.dtypes.common import (
+    is_bool_dtype,
+    is_integer_dtype,
+)
+import pandas as pd
+import pandas._testing as tm
+from pandas.core.arrays.integer import NUMPY_INT_TO_DTYPE
+class Dim2CompatTests:
+    # Note: these are ONLY for ExtensionArray subclasses that support 2D arrays.
+    #  i.e. not for pyarrow-backed EAs.
+    @pytest.fixture(autouse=True)
+    def skip_if_doesnt_support_2d(self, dtype, request):
+        if not dtype._supports_2d:
+            node = request.node
+            # In cases where we are mixed in to ExtensionTests, we only want to
+            #  skip tests that are defined in Dim2CompatTests
+            test_func = node._obj
+            if test_func.__qualname__.startswith("Dim2CompatTests"):
+                # TODO: is there a less hacky way of checking this?
+                pytest.skip(f"{dtype} does not support 2D.")
+    def test_transpose(self, data):
+        arr2d = data.repeat(2).reshape(-1, 2)
+        shape = arr2d.shape
+        assert shape[0] != shape[-1]  # otherwise the rest of the test is useless
+        assert arr2d.T.shape == shape[::-1]
+    def test_frame_from_2d_array(self, data):
+        arr2d = data.repeat(2).reshape(-1, 2)
+        df = pd.DataFrame(arr2d)
+        expected = pd.DataFrame({0: arr2d[:, 0], 1: arr2d[:, 1]})
+        tm.assert_frame_equal(df, expected)
+    def test_swapaxes(self, data):
+        arr2d = data.repeat(2).reshape(-1, 2)
+        result = arr2d.swapaxes(0, 1)
+        expected = arr2d.T
+        tm.assert_extension_array_equal(result, expected)
+    def test_delete_2d(self, data):
+        arr2d = data.repeat(3).reshape(-1, 3)
+        # axis = 0
+        result = arr2d.delete(1, axis=0)
+        expected = data.delete(1).repeat(3).reshape(-1, 3)
+        tm.assert_extension_array_equal(result, expected)
+        # axis = 1
+        result = arr2d.delete(1, axis=1)
+        expected = data.repeat(2).reshape(-1, 2)
+        tm.assert_extension_array_equal(result, expected)
+    def test_take_2d(self, data):
+        arr2d = data.reshape(-1, 1)
+        result = arr2d.take([0, 0, -1], axis=0)
+        expected = data.take([0, 0, -1]).reshape(-1, 1)
+        tm.assert_extension_array_equal(result, expected)
+    def test_repr_2d(self, data):
+        # this could fail in a corner case where an element contained the name
+        res = repr(data.reshape(1, -1))
+        assert res.count(f"<{type(data).__name__}") == 1
+        res = repr(data.reshape(-1, 1))
+        assert res.count(f"<{type(data).__name__}") == 1
+    def test_reshape(self, data):
+        arr2d = data.reshape(-1, 1)
+        assert arr2d.shape == (data.size, 1)
+        assert len(arr2d) == len(data)
+        arr2d = data.reshape((-1, 1))
+        assert arr2d.shape == (data.size, 1)
+        assert len(arr2d) == len(data)
+        with pytest.raises(ValueError):
+            data.reshape((data.size, 2))
+        with pytest.raises(ValueError):
+            data.reshape(data.size, 2)
+    def test_getitem_2d(self, data):
+        arr2d = data.reshape(1, -1)
+        result = arr2d[0]
+        tm.assert_extension_array_equal(result, data)
+        with pytest.raises(IndexError):
+            arr2d[1]
+        with pytest.raises(IndexError):
+            arr2d[-2]
+        result = arr2d[:]
+        tm.assert_extension_array_equal(result, arr2d)
+        result = arr2d[:, :]
+        tm.assert_extension_array_equal(result, arr2d)
+        result = arr2d[:, 0]
+        expected = data[[0]]
+        tm.assert_extension_array_equal(result, expected)
+        # dimension-expanding getitem on 1D
+        result = data[:, np.newaxis]
+        tm.assert_extension_array_equal(result, arr2d.T)
+    def test_iter_2d(self, data):
+        arr2d = data.reshape(1, -1)
+        objs = list(iter(arr2d))
+        assert len(objs) == arr2d.shape[0]
+        for obj in objs:
+            assert isinstance(obj, type(data))
+            assert obj.dtype == data.dtype
+            assert obj.ndim == 1
+            assert len(obj) == arr2d.shape[1]
+    def test_tolist_2d(self, data):
+        arr2d = data.reshape(1, -1)
+        result = arr2d.tolist()
+        expected = [data.tolist()]
+        assert isinstance(result, list)
+        assert all(isinstance(x, list) for x in result)
+        assert result == expected
+    def test_concat_2d(self, data):
+        left = type(data)._concat_same_type([data, data]).reshape(-1, 2)
+        right = left.copy()
+        # axis=0
+        result = left._concat_same_type([left, right], axis=0)
+        expected = data._concat_same_type([data] * 4).reshape(-1, 2)
+        tm.assert_extension_array_equal(result, expected)
+        # axis=1
+        result = left._concat_same_type([left, right], axis=1)
+        assert result.shape == (len(data), 4)
+        tm.assert_extension_array_equal(result[:, :2], left)
+        tm.assert_extension_array_equal(result[:, 2:], right)
+        # axis > 1 -> invalid
+        msg = "axis 2 is out of bounds for array of dimension 2"
+        with pytest.raises(ValueError, match=msg):
+            left._concat_same_type([left, right], axis=2)
+    @pytest.mark.parametrize("method", ["backfill", "pad"])
+    def test_fillna_2d_method(self, data_missing, method):
+        # pad_or_backfill is always along axis=0
+        arr = data_missing.repeat(2).reshape(2, 2)
+        assert arr[0].isna().all()
+        assert not arr[1].isna().any()
+        result = arr._pad_or_backfill(method=method, limit=None)
+        expected = data_missing._pad_or_backfill(method=method).repeat(2).reshape(2, 2)
+        tm.assert_extension_array_equal(result, expected)
+        # Reverse so that backfill is not a no-op.
+        arr2 = arr[::-1]
+        assert not arr2[0].isna().any()
+        assert arr2[1].isna().all()
+        result2 = arr2._pad_or_backfill(method=method, limit=None)
+        expected2 = (
+            data_missing[::-1]._pad_or_backfill(method=method).repeat(2).reshape(2, 2)
+        )
+        tm.assert_extension_array_equal(result2, expected2)
+    @pytest.mark.parametrize("method", ["mean", "median", "var", "std", "sum", "prod"])
+    def test_reductions_2d_axis_none(self, data, method):
+        arr2d = data.reshape(1, -1)
+        err_expected = None
+        err_result = None
+        try:
+            expected = getattr(data, method)()
+        except Exception as err:
+            # if the 1D reduction is invalid, the 2D reduction should be as well
+            err_expected = err
+            try:
+                result = getattr(arr2d, method)(axis=None)
+            except Exception as err2:
+                err_result = err2
+        else:
+            result = getattr(arr2d, method)(axis=None)
+        if err_result is not None or err_expected is not None:
+            assert type(err_result) == type(err_expected)
+            return
+        assert is_matching_na(result, expected) or result == expected
+    @pytest.mark.parametrize("method", ["mean", "median", "var", "std", "sum", "prod"])
+    @pytest.mark.parametrize("min_count", [0, 1])
+    def test_reductions_2d_axis0(self, data, method, min_count):
+        if min_count == 1 and method not in ["sum", "prod"]:
+            pytest.skip(f"min_count not relevant for {method}")
+        arr2d = data.reshape(1, -1)
+        kwargs = {}
+        if method in ["std", "var"]:
+            # pass ddof=0 so we get all-zero std instead of all-NA std
+            kwargs["ddof"] = 0
+        elif method in ["prod", "sum"]:
+            kwargs["min_count"] = min_count
+        try:
+            result = getattr(arr2d, method)(axis=0, **kwargs)
+        except Exception as err:
+            try:
+                getattr(data, method)()
+            except Exception as err2:
+                assert type(err) == type(err2)
+                return
+            else:
+                raise AssertionError("Both reductions should raise or neither")
+        def get_reduction_result_dtype(dtype):
+            # windows and 32bit builds will in some cases have int32/uint32
+            #  where other builds will have int64/uint64.
+            if dtype.itemsize == 8:
+                return dtype
+            elif dtype.kind in "ib":
+                return NUMPY_INT_TO_DTYPE[np.dtype(int)]
+            else:
+                # i.e. dtype.kind == "u"
+                return NUMPY_INT_TO_DTYPE[np.dtype("uint")]
+        if method in ["sum", "prod"]:
+            # std and var are not dtype-preserving
+            expected = data
+            if data.dtype.kind in "iub":
+                dtype = get_reduction_result_dtype(data.dtype)
+                expected = data.astype(dtype)
+                assert dtype == expected.dtype
+            if min_count == 0:
+                fill_value = 1 if method == "prod" else 0
+                expected = expected.fillna(fill_value)
+            tm.assert_extension_array_equal(result, expected)
+        elif method == "median":
+            # std and var are not dtype-preserving
+            expected = data
+            tm.assert_extension_array_equal(result, expected)
+        elif method in ["mean", "std", "var"]:
+            if is_integer_dtype(data) or is_bool_dtype(data):
+                data = data.astype("Float64")
+            if method == "mean":
+                tm.assert_extension_array_equal(result, data)
+            else:
+                tm.assert_extension_array_equal(result, data - data)
+    @pytest.mark.parametrize("method", ["mean", "median", "var", "std", "sum", "prod"])
+    def test_reductions_2d_axis1(self, data, method):
+        arr2d = data.reshape(1, -1)
+        try:
+            result = getattr(arr2d, method)(axis=1)
+        except Exception as err:
+            try:
+                getattr(data, method)()
+            except Exception as err2:
+                assert type(err) == type(err2)
+                return
+            else:
+                raise AssertionError("Both reductions should raise or neither")
+        # not necessarily type/dtype-preserving, so weaker assertions
+        assert result.shape == (1,)
+        expected_scalar = getattr(data, method)()
+        res = result[0]
+        assert is_matching_na(res, expected_scalar) or res == expected_scalar
+class NDArrayBacked2DTests(Dim2CompatTests):
+    # More specific tests for NDArrayBackedExtensionArray subclasses
+    def test_copy_order(self, data):
+        # We should be matching numpy semantics for the "order" keyword in 'copy'
+        arr2d = data.repeat(2).reshape(-1, 2)
+        assert arr2d._ndarray.flags["C_CONTIGUOUS"]
+        res = arr2d.copy()
+        assert res._ndarray.flags["C_CONTIGUOUS"]
+        res = arr2d[::2, ::2].copy()
+        assert res._ndarray.flags["C_CONTIGUOUS"]
+        res = arr2d.copy("F")
+        assert not res._ndarray.flags["C_CONTIGUOUS"]
+        assert res._ndarray.flags["F_CONTIGUOUS"]
+        res = arr2d.copy("K")
+        assert res._ndarray.flags["C_CONTIGUOUS"]
+        res = arr2d.T.copy("K")
+        assert not res._ndarray.flags["C_CONTIGUOUS"]
+        assert res._ndarray.flags["F_CONTIGUOUS"]
+        # order not accepted by numpy
+        msg = r"order must be one of 'C', 'F', 'A', or 'K' \(got 'Q'\)"
+        with pytest.raises(ValueError, match=msg):
+            arr2d.copy("Q")
+        # neither contiguity
+        arr_nc = arr2d[::2]
+        assert not arr_nc._ndarray.flags["C_CONTIGUOUS"]
+        assert not arr_nc._ndarray.flags["F_CONTIGUOUS"]
+        assert arr_nc.copy()._ndarray.flags["C_CONTIGUOUS"]
+        assert not arr_nc.copy()._ndarray.flags["F_CONTIGUOUS"]
+        assert arr_nc.copy("C")._ndarray.flags["C_CONTIGUOUS"]
+        assert not arr_nc.copy("C")._ndarray.flags["F_CONTIGUOUS"]
+        assert not arr_nc.copy("F")._ndarray.flags["C_CONTIGUOUS"]
+        assert arr_nc.copy("F")._ndarray.flags["F_CONTIGUOUS"]
+        assert arr_nc.copy("K")._ndarray.flags["C_CONTIGUOUS"]
+        assert not arr_nc.copy("K")._ndarray.flags["F_CONTIGUOUS"]

py311/lib/python3.11/site-packages/pandas/tests/extension/base/setitem.py ADDED Viewed

	@@ -0,0 +1,451 @@

+import numpy as np
+import pytest
+import pandas as pd
+import pandas._testing as tm
+class BaseSetitemTests:
+    @pytest.fixture(
+        params=[
+            lambda x: x.index,
+            lambda x: list(x.index),
+            lambda x: slice(None),
+            lambda x: slice(0, len(x)),
+            lambda x: range(len(x)),
+            lambda x: list(range(len(x))),
+            lambda x: np.ones(len(x), dtype=bool),
+        ],
+        ids=[
+            "index",
+            "list[index]",
+            "null_slice",
+            "full_slice",
+            "range",
+            "list(range)",
+            "mask",
+        ],
+    )
+    def full_indexer(self, request):
+        """
+        Fixture for an indexer to pass to obj.loc to get/set the full length of the
+        object.
+        In some cases, assumes that obj.index is the default RangeIndex.
+        """
+        return request.param
+    @pytest.fixture(autouse=True)
+    def skip_if_immutable(self, dtype, request):
+        if dtype._is_immutable:
+            node = request.node
+            if node.name.split("[")[0] == "test_is_immutable":
+                # This fixture is auto-used, but we want to not-skip
+                # test_is_immutable.
+                return
+            # When BaseSetitemTests is mixed into ExtensionTests, we only
+            #  want this fixture to operate on the tests defined in this
+            #  class/file.
+            defined_in = node.function.__qualname__.split(".")[0]
+            if defined_in == "BaseSetitemTests":
+                pytest.skip("__setitem__ test not applicable with immutable dtype")
+    def test_is_immutable(self, data):
+        if data.dtype._is_immutable:
+            with pytest.raises(TypeError):
+                data[0] = data[0]
+        else:
+            data[0] = data[1]
+            assert data[0] == data[1]
+    def test_setitem_scalar_series(self, data, box_in_series):
+        if box_in_series:
+            data = pd.Series(data)
+        data[0] = data[1]
+        assert data[0] == data[1]
+    def test_setitem_sequence(self, data, box_in_series):
+        if box_in_series:
+            data = pd.Series(data)
+        original = data.copy()
+        data[[0, 1]] = [data[1], data[0]]
+        assert data[0] == original[1]
+        assert data[1] == original[0]
+    def test_setitem_sequence_mismatched_length_raises(self, data, as_array):
+        ser = pd.Series(data)
+        original = ser.copy()
+        value = [data[0]]
+        if as_array:
+            value = data._from_sequence(value, dtype=data.dtype)
+        xpr = "cannot set using a {} indexer with a different length"
+        with pytest.raises(ValueError, match=xpr.format("list-like")):
+            ser[[0, 1]] = value
+        # Ensure no modifications made before the exception
+        tm.assert_series_equal(ser, original)
+        with pytest.raises(ValueError, match=xpr.format("slice")):
+            ser[slice(3)] = value
+        tm.assert_series_equal(ser, original)
+    def test_setitem_empty_indexer(self, data, box_in_series):
+        if box_in_series:
+            data = pd.Series(data)
+        original = data.copy()
+        data[np.array([], dtype=int)] = []
+        tm.assert_equal(data, original)
+    def test_setitem_sequence_broadcasts(self, data, box_in_series):
+        if box_in_series:
+            data = pd.Series(data)
+        data[[0, 1]] = data[2]
+        assert data[0] == data[2]
+        assert data[1] == data[2]
+    @pytest.mark.parametrize("setter", ["loc", "iloc"])
+    def test_setitem_scalar(self, data, setter):
+        arr = pd.Series(data)
+        setter = getattr(arr, setter)
+        setter[0] = data[1]
+        assert arr[0] == data[1]
+    def test_setitem_loc_scalar_mixed(self, data):
+        df = pd.DataFrame({"A": np.arange(len(data)), "B": data})
+        df.loc[0, "B"] = data[1]
+        assert df.loc[0, "B"] == data[1]
+    def test_setitem_loc_scalar_single(self, data):
+        df = pd.DataFrame({"B": data})
+        df.loc[10, "B"] = data[1]
+        assert df.loc[10, "B"] == data[1]
+    def test_setitem_loc_scalar_multiple_homogoneous(self, data):
+        df = pd.DataFrame({"A": data, "B": data})
+        df.loc[10, "B"] = data[1]
+        assert df.loc[10, "B"] == data[1]
+    def test_setitem_iloc_scalar_mixed(self, data):
+        df = pd.DataFrame({"A": np.arange(len(data)), "B": data})
+        df.iloc[0, 1] = data[1]
+        assert df.loc[0, "B"] == data[1]
+    def test_setitem_iloc_scalar_single(self, data):
+        df = pd.DataFrame({"B": data})
+        df.iloc[10, 0] = data[1]
+        assert df.loc[10, "B"] == data[1]
+    def test_setitem_iloc_scalar_multiple_homogoneous(self, data):
+        df = pd.DataFrame({"A": data, "B": data})
+        df.iloc[10, 1] = data[1]
+        assert df.loc[10, "B"] == data[1]
+    @pytest.mark.parametrize(
+        "mask",
+        [
+            np.array([True, True, True, False, False]),
+            pd.array([True, True, True, False, False], dtype="boolean"),
+            pd.array([True, True, True, pd.NA, pd.NA], dtype="boolean"),
+        ],
+        ids=["numpy-array", "boolean-array", "boolean-array-na"],
+    )
+    def test_setitem_mask(self, data, mask, box_in_series):
+        arr = data[:5].copy()
+        expected = arr.take([0, 0, 0, 3, 4])
+        if box_in_series:
+            arr = pd.Series(arr)
+            expected = pd.Series(expected)
+        arr[mask] = data[0]
+        tm.assert_equal(expected, arr)
+    def test_setitem_mask_raises(self, data, box_in_series):
+        # wrong length
+        mask = np.array([True, False])
+        if box_in_series:
+            data = pd.Series(data)
+        with pytest.raises(IndexError, match="wrong length"):
+            data[mask] = data[0]
+        mask = pd.array(mask, dtype="boolean")
+        with pytest.raises(IndexError, match="wrong length"):
+            data[mask] = data[0]
+    def test_setitem_mask_boolean_array_with_na(self, data, box_in_series):
+        mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean")
+        mask[:3] = True
+        mask[3:5] = pd.NA
+        if box_in_series:
+            data = pd.Series(data)
+        data[mask] = data[0]
+        assert (data[:3] == data[0]).all()
+    @pytest.mark.parametrize(
+        "idx",
+        [[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])],
+        ids=["list", "integer-array", "numpy-array"],
+    )
+    def test_setitem_integer_array(self, data, idx, box_in_series):
+        arr = data[:5].copy()
+        expected = data.take([0, 0, 0, 3, 4])
+        if box_in_series:
+            arr = pd.Series(arr)
+            expected = pd.Series(expected)
+        arr[idx] = arr[0]
+        tm.assert_equal(arr, expected)
+    @pytest.mark.parametrize(
+        "idx, box_in_series",
+        [
+            ([0, 1, 2, pd.NA], False),
+            pytest.param(
+                [0, 1, 2, pd.NA], True, marks=pytest.mark.xfail(reason="GH-31948")
+            ),
+            (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False),
+            (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False),
+        ],
+        ids=["list-False", "list-True", "integer-array-False", "integer-array-True"],
+    )
+    def test_setitem_integer_with_missing_raises(self, data, idx, box_in_series):
+        arr = data.copy()
+        # TODO(xfail) this raises KeyError about labels not found (it tries label-based)
+        # for list of labels with Series
+        if box_in_series:
+            arr = pd.Series(data, index=[chr(100 + i) for i in range(len(data))])
+        msg = "Cannot index with an integer indexer containing NA values"
+        with pytest.raises(ValueError, match=msg):
+            arr[idx] = arr[0]
+    @pytest.mark.parametrize("as_callable", [True, False])
+    @pytest.mark.parametrize("setter", ["loc", None])
+    def test_setitem_mask_aligned(self, data, as_callable, setter):
+        ser = pd.Series(data)
+        mask = np.zeros(len(data), dtype=bool)
+        mask[:2] = True
+        if as_callable:
+            mask2 = lambda x: mask
+        else:
+            mask2 = mask
+        if setter:
+            # loc
+            target = getattr(ser, setter)
+        else:
+            # Series.__setitem__
+            target = ser
+        target[mask2] = data[5:7]
+        ser[mask2] = data[5:7]
+        assert ser[0] == data[5]
+        assert ser[1] == data[6]
+    @pytest.mark.parametrize("setter", ["loc", None])
+    def test_setitem_mask_broadcast(self, data, setter):
+        ser = pd.Series(data)
+        mask = np.zeros(len(data), dtype=bool)
+        mask[:2] = True
+        if setter:  # loc
+            target = getattr(ser, setter)
+        else:  # __setitem__
+            target = ser
+        target[mask] = data[10]
+        assert ser[0] == data[10]
+        assert ser[1] == data[10]
+    def test_setitem_expand_columns(self, data):
+        df = pd.DataFrame({"A": data})
+        result = df.copy()
+        result["B"] = 1
+        expected = pd.DataFrame({"A": data, "B": [1] * len(data)})
+        tm.assert_frame_equal(result, expected)
+        result = df.copy()
+        result.loc[:, "B"] = 1
+        tm.assert_frame_equal(result, expected)
+        # overwrite with new type
+        result["B"] = data
+        expected = pd.DataFrame({"A": data, "B": data})
+        tm.assert_frame_equal(result, expected)
+    def test_setitem_expand_with_extension(self, data):
+        df = pd.DataFrame({"A": [1] * len(data)})
+        result = df.copy()
+        result["B"] = data
+        expected = pd.DataFrame({"A": [1] * len(data), "B": data})
+        tm.assert_frame_equal(result, expected)
+        result = df.copy()
+        result.loc[:, "B"] = data
+        tm.assert_frame_equal(result, expected)
+    def test_setitem_frame_invalid_length(self, data):
+        df = pd.DataFrame({"A": [1] * len(data)})
+        xpr = (
+            rf"Length of values \({len(data[:5])}\) "
+            rf"does not match length of index \({len(df)}\)"
+        )
+        with pytest.raises(ValueError, match=xpr):
+            df["B"] = data[:5]
+    def test_setitem_tuple_index(self, data):
+        ser = pd.Series(data[:2], index=[(0, 0), (0, 1)])
+        expected = pd.Series(data.take([1, 1]), index=ser.index)
+        ser[(0, 0)] = data[1]
+        tm.assert_series_equal(ser, expected)
+    def test_setitem_slice(self, data, box_in_series):
+        arr = data[:5].copy()
+        expected = data.take([0, 0, 0, 3, 4])
+        if box_in_series:
+            arr = pd.Series(arr)
+            expected = pd.Series(expected)
+        arr[:3] = data[0]
+        tm.assert_equal(arr, expected)
+    def test_setitem_loc_iloc_slice(self, data):
+        arr = data[:5].copy()
+        s = pd.Series(arr, index=["a", "b", "c", "d", "e"])
+        expected = pd.Series(data.take([0, 0, 0, 3, 4]), index=s.index)
+        result = s.copy()
+        result.iloc[:3] = data[0]
+        tm.assert_equal(result, expected)
+        result = s.copy()
+        result.loc[:"c"] = data[0]
+        tm.assert_equal(result, expected)
+    def test_setitem_slice_mismatch_length_raises(self, data):
+        arr = data[:5]
+        with pytest.raises(ValueError):
+            arr[:1] = arr[:2]
+    def test_setitem_slice_array(self, data):
+        arr = data[:5].copy()
+        arr[:5] = data[-5:]
+        tm.assert_extension_array_equal(arr, data[-5:])
+    def test_setitem_scalar_key_sequence_raise(self, data):
+        arr = data[:5].copy()
+        with pytest.raises(ValueError):
+            arr[0] = arr[[0, 1]]
+    def test_setitem_preserves_views(self, data):
+        # GH#28150 setitem shouldn't swap the underlying data
+        view1 = data.view()
+        view2 = data[:]
+        data[0] = data[1]
+        assert view1[0] == data[1]
+        assert view2[0] == data[1]
+    def test_setitem_with_expansion_dataframe_column(self, data, full_indexer):
+        # https://github.com/pandas-dev/pandas/issues/32395
+        df = expected = pd.DataFrame({0: pd.Series(data)})
+        result = pd.DataFrame(index=df.index)
+        key = full_indexer(df)
+        result.loc[key, 0] = df[0]
+        tm.assert_frame_equal(result, expected)
+    def test_setitem_with_expansion_row(self, data, na_value):
+        df = pd.DataFrame({"data": data[:1]})
+        df.loc[1, "data"] = data[1]
+        expected = pd.DataFrame({"data": data[:2]})
+        tm.assert_frame_equal(df, expected)
+        # https://github.com/pandas-dev/pandas/issues/47284
+        df.loc[2, "data"] = na_value
+        expected = pd.DataFrame(
+            {"data": pd.Series([data[0], data[1], na_value], dtype=data.dtype)}
+        )
+        tm.assert_frame_equal(df, expected)
+    def test_setitem_series(self, data, full_indexer):
+        # https://github.com/pandas-dev/pandas/issues/32395
+        ser = pd.Series(data, name="data")
+        result = pd.Series(index=ser.index, dtype=object, name="data")
+        # because result has object dtype, the attempt to do setting inplace
+        #  is successful, and object dtype is retained
+        key = full_indexer(ser)
+        result.loc[key] = ser
+        expected = pd.Series(
+            data.astype(object), index=ser.index, name="data", dtype=object
+        )
+        tm.assert_series_equal(result, expected)
+    def test_setitem_frame_2d_values(self, data):
+        # GH#44514
+        df = pd.DataFrame({"A": data})
+        # Avoiding using_array_manager fixture
+        #  https://github.com/pandas-dev/pandas/pull/44514#discussion_r754002410
+        using_array_manager = isinstance(df._mgr, pd.core.internals.ArrayManager)
+        using_copy_on_write = pd.options.mode.copy_on_write
+        blk_data = df._mgr.arrays[0]
+        orig = df.copy()
+        df.iloc[:] = df.copy()
+        tm.assert_frame_equal(df, orig)
+        df.iloc[:-1] = df.iloc[:-1].copy()
+        tm.assert_frame_equal(df, orig)
+        df.iloc[:] = df.values
+        tm.assert_frame_equal(df, orig)
+        if not using_array_manager and not using_copy_on_write:
+            # GH#33457 Check that this setting occurred in-place
+            # FIXME(ArrayManager): this should work there too
+            assert df._mgr.arrays[0] is blk_data
+        df.iloc[:-1] = df.values[:-1]
+        tm.assert_frame_equal(df, orig)
+    def test_delitem_series(self, data):
+        # GH#40763
+        ser = pd.Series(data, name="data")
+        taker = np.arange(len(ser))
+        taker = np.delete(taker, 1)
+        expected = ser[taker]
+        del ser[1]
+        tm.assert_series_equal(ser, expected)
+    def test_setitem_invalid(self, data, invalid_scalar):
+        msg = ""  # messages vary by subclass, so we do not test it
+        with pytest.raises((ValueError, TypeError), match=msg):
+            data[0] = invalid_scalar
+        with pytest.raises((ValueError, TypeError), match=msg):
+            data[:] = invalid_scalar
+    def test_setitem_2d_values(self, data):
+        # GH50085
+        original = data.copy()
+        df = pd.DataFrame({"a": data, "b": data})
+        df.loc[[0, 1], :] = df.loc[[1, 0], :].values
+        assert (df.loc[0, :] == original[1]).all()
+        assert (df.loc[1, :] == original[0]).all()

py311/lib/python3.11/site-packages/pandas/tests/extension/decimal/__init__.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from pandas.tests.extension.decimal.array import (
+    DecimalArray,
+    DecimalDtype,
+    make_data,
+    to_decimal,
+)
+__all__ = ["DecimalArray", "DecimalDtype", "to_decimal", "make_data"]

py311/lib/python3.11/site-packages/pandas/tests/extension/decimal/array.py ADDED Viewed

	@@ -0,0 +1,311 @@

+from __future__ import annotations
+import decimal
+import numbers
+import sys
+from typing import TYPE_CHECKING
+import numpy as np
+from pandas.core.dtypes.base import ExtensionDtype
+from pandas.core.dtypes.common import (
+    is_dtype_equal,
+    is_float,
+    is_integer,
+    pandas_dtype,
+)
+import pandas as pd
+from pandas.api.extensions import (
+    no_default,
+    register_extension_dtype,
+)
+from pandas.api.types import (
+    is_list_like,
+    is_scalar,
+)
+from pandas.core import arraylike
+from pandas.core.algorithms import value_counts_internal as value_counts
+from pandas.core.arraylike import OpsMixin
+from pandas.core.arrays import (
+    ExtensionArray,
+    ExtensionScalarOpsMixin,
+)
+from pandas.core.indexers import check_array_indexer
+if TYPE_CHECKING:
+    from pandas._typing import type_t
+@register_extension_dtype
+class DecimalDtype(ExtensionDtype):
+    type = decimal.Decimal
+    name = "decimal"
+    na_value = decimal.Decimal("NaN")
+    _metadata = ("context",)
+    def __init__(self, context=None) -> None:
+        self.context = context or decimal.getcontext()
+    def __repr__(self) -> str:
+        return f"DecimalDtype(context={self.context})"
+    @classmethod
+    def construct_array_type(cls) -> type_t[DecimalArray]:
+        """
+        Return the array type associated with this dtype.
+        Returns
+        -------
+        type
+        """
+        return DecimalArray
+    @property
+    def _is_numeric(self) -> bool:
+        return True
+class DecimalArray(OpsMixin, ExtensionScalarOpsMixin, ExtensionArray):
+    __array_priority__ = 1000
+    def __init__(self, values, dtype=None, copy=False, context=None) -> None:
+        for i, val in enumerate(values):
+            if is_float(val) or is_integer(val):
+                if np.isnan(val):
+                    values[i] = DecimalDtype.na_value
+                else:
+                    # error: Argument 1 has incompatible type "float | int |
+                    # integer[Any]"; expected "Decimal | float | str | tuple[int,
+                    # Sequence[int], int]"
+                    values[i] = DecimalDtype.type(val)  # type: ignore[arg-type]
+            elif not isinstance(val, decimal.Decimal):
+                raise TypeError("All values must be of type " + str(decimal.Decimal))
+        values = np.asarray(values, dtype=object)
+        self._data = values
+        # Some aliases for common attribute names to ensure pandas supports
+        # these
+        self._items = self.data = self._data
+        # those aliases are currently not working due to assumptions
+        # in internal code (GH-20735)
+        # self._values = self.values = self.data
+        self._dtype = DecimalDtype(context)
+    @property
+    def dtype(self):
+        return self._dtype
+    @classmethod
+    def _from_sequence(cls, scalars, *, dtype=None, copy=False):
+        return cls(scalars)
+    @classmethod
+    def _from_sequence_of_strings(cls, strings, dtype=None, copy=False):
+        return cls._from_sequence(
+            [decimal.Decimal(x) for x in strings], dtype=dtype, copy=copy
+        )
+    @classmethod
+    def _from_factorized(cls, values, original):
+        return cls(values)
+    _HANDLED_TYPES = (decimal.Decimal, numbers.Number, np.ndarray)
+    def to_numpy(
+        self,
+        dtype=None,
+        copy: bool = False,
+        na_value: object = no_default,
+        decimals=None,
+    ) -> np.ndarray:
+        result = np.asarray(self, dtype=dtype)
+        if decimals is not None:
+            result = np.asarray([round(x, decimals) for x in result])
+        return result
+    def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
+        #
+        if not all(
+            isinstance(t, self._HANDLED_TYPES + (DecimalArray,)) for t in inputs
+        ):
+            return NotImplemented
+        result = arraylike.maybe_dispatch_ufunc_to_dunder_op(
+            self, ufunc, method, *inputs, **kwargs
+        )
+        if result is not NotImplemented:
+            # e.g. test_array_ufunc_series_scalar_other
+            return result
+        if "out" in kwargs:
+            return arraylike.dispatch_ufunc_with_out(
+                self, ufunc, method, *inputs, **kwargs
+            )
+        inputs = tuple(x._data if isinstance(x, DecimalArray) else x for x in inputs)
+        result = getattr(ufunc, method)(*inputs, **kwargs)
+        if method == "reduce":
+            result = arraylike.dispatch_reduction_ufunc(
+                self, ufunc, method, *inputs, **kwargs
+            )
+            if result is not NotImplemented:
+                return result
+        def reconstruct(x):
+            if isinstance(x, (decimal.Decimal, numbers.Number)):
+                return x
+            else:
+                return type(self)._from_sequence(x, dtype=self.dtype)
+        if ufunc.nout > 1:
+            return tuple(reconstruct(x) for x in result)
+        else:
+            return reconstruct(result)
+    def __getitem__(self, item):
+        if isinstance(item, numbers.Integral):
+            return self._data[item]
+        else:
+            # array, slice.
+            item = pd.api.indexers.check_array_indexer(self, item)
+            return type(self)(self._data[item])
+    def take(self, indexer, allow_fill=False, fill_value=None):
+        from pandas.api.extensions import take
+        data = self._data
+        if allow_fill and fill_value is None:
+            fill_value = self.dtype.na_value
+        result = take(data, indexer, fill_value=fill_value, allow_fill=allow_fill)
+        return self._from_sequence(result, dtype=self.dtype)
+    def copy(self):
+        return type(self)(self._data.copy(), dtype=self.dtype)
+    def astype(self, dtype, copy=True):
+        if is_dtype_equal(dtype, self._dtype):
+            if not copy:
+                return self
+        dtype = pandas_dtype(dtype)
+        if isinstance(dtype, type(self.dtype)):
+            return type(self)(self._data, copy=copy, context=dtype.context)
+        return super().astype(dtype, copy=copy)
+    def __setitem__(self, key, value) -> None:
+        if is_list_like(value):
+            if is_scalar(key):
+                raise ValueError("setting an array element with a sequence.")
+            value = [decimal.Decimal(v) for v in value]
+        else:
+            value = decimal.Decimal(value)
+        key = check_array_indexer(self, key)
+        self._data[key] = value
+    def __len__(self) -> int:
+        return len(self._data)
+    def __contains__(self, item) -> bool | np.bool_:
+        if not isinstance(item, decimal.Decimal):
+            return False
+        elif item.is_nan():
+            return self.isna().any()
+        else:
+            return super().__contains__(item)
+    @property
+    def nbytes(self) -> int:
+        n = len(self)
+        if n:
+            return n * sys.getsizeof(self[0])
+        return 0
+    def isna(self):
+        return np.array([x.is_nan() for x in self._data], dtype=bool)
+    @property
+    def _na_value(self):
+        return decimal.Decimal("NaN")
+    def _formatter(self, boxed=False):
+        if boxed:
+            return "Decimal: {}".format
+        return repr
+    @classmethod
+    def _concat_same_type(cls, to_concat):
+        return cls(np.concatenate([x._data for x in to_concat]))
+    def _reduce(
+        self, name: str, *, skipna: bool = True, keepdims: bool = False, **kwargs
+    ):
+        if skipna and self.isna().any():
+            # If we don't have any NAs, we can ignore skipna
+            other = self[~self.isna()]
+            result = other._reduce(name, **kwargs)
+        elif name == "sum" and len(self) == 0:
+            # GH#29630 avoid returning int 0 or np.bool_(False) on old numpy
+            result = decimal.Decimal(0)
+        else:
+            try:
+                op = getattr(self.data, name)
+            except AttributeError as err:
+                raise NotImplementedError(
+                    f"decimal does not support the {name} operation"
+                ) from err
+            result = op(axis=0)
+        if keepdims:
+            return type(self)([result])
+        else:
+            return result
+    def _cmp_method(self, other, op):
+        # For use with OpsMixin
+        def convert_values(param):
+            if isinstance(param, ExtensionArray) or is_list_like(param):
+                ovalues = param
+            else:
+                # Assume it's an object
+                ovalues = [param] * len(self)
+            return ovalues
+        lvalues = self
+        rvalues = convert_values(other)
+        # If the operator is not defined for the underlying objects,
+        # a TypeError should be raised
+        res = [op(a, b) for (a, b) in zip(lvalues, rvalues)]
+        return np.asarray(res, dtype=bool)
+    def value_counts(self, dropna: bool = True):
+        return value_counts(self.to_numpy(), dropna=dropna)
+    # We override fillna here to simulate a 3rd party EA that has done so. This
+    #  lets us test the deprecation telling authors to implement _pad_or_backfill
+    # Simulate a 3rd-party EA that has not yet updated to include a "copy"
+    #  keyword in its fillna method.
+    # error: Signature of "fillna" incompatible with supertype "ExtensionArray"
+    def fillna(  # type: ignore[override]
+        self,
+        value=None,
+        method=None,
+        limit: int | None = None,
+    ):
+        return super().fillna(value=value, method=method, limit=limit, copy=True)
+def to_decimal(values, context=None):
+    return DecimalArray([decimal.Decimal(x) for x in values], context=context)
+def make_data():
+    return [decimal.Decimal(val) for val in np.random.default_rng(2).random(100)]
+DecimalArray._add_arithmetic_ops()

py311/lib/python3.11/site-packages/pandas/tests/extension/decimal/test_decimal.py ADDED Viewed

	@@ -0,0 +1,587 @@

+from __future__ import annotations
+import decimal
+import operator
+import numpy as np
+import pytest
+from pandas.compat.numpy import np_version_gt2
+import pandas as pd
+import pandas._testing as tm
+from pandas.tests.extension import base
+from pandas.tests.extension.decimal.array import (
+    DecimalArray,
+    DecimalDtype,
+    make_data,
+    to_decimal,
+)
+@pytest.fixture
+def dtype():
+    return DecimalDtype()
+@pytest.fixture
+def data():
+    return DecimalArray(make_data())
+@pytest.fixture
+def data_for_twos():
+    return DecimalArray([decimal.Decimal(2) for _ in range(100)])
+@pytest.fixture
+def data_missing():
+    return DecimalArray([decimal.Decimal("NaN"), decimal.Decimal(1)])
+@pytest.fixture
+def data_for_sorting():
+    return DecimalArray(
+        [decimal.Decimal("1"), decimal.Decimal("2"), decimal.Decimal("0")]
+    )
+@pytest.fixture
+def data_missing_for_sorting():
+    return DecimalArray(
+        [decimal.Decimal("1"), decimal.Decimal("NaN"), decimal.Decimal("0")]
+    )
+@pytest.fixture
+def na_cmp():
+    return lambda x, y: x.is_nan() and y.is_nan()
+@pytest.fixture
+def data_for_grouping():
+    b = decimal.Decimal("1.0")
+    a = decimal.Decimal("0.0")
+    c = decimal.Decimal("2.0")
+    na = decimal.Decimal("NaN")
+    return DecimalArray([b, b, na, na, a, a, b, c])
+class TestDecimalArray(base.ExtensionTests):
+    def _get_expected_exception(
+        self, op_name: str, obj, other
+    ) -> type[Exception] | tuple[type[Exception], ...] | None:
+        return None
+    def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool:
+        return True
+    def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool):
+        if op_name == "count":
+            return super().check_reduce(ser, op_name, skipna)
+        else:
+            result = getattr(ser, op_name)(skipna=skipna)
+            expected = getattr(np.asarray(ser), op_name)()
+            tm.assert_almost_equal(result, expected)
+    def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna, request):
+        if all_numeric_reductions in ["kurt", "skew", "sem", "median"]:
+            mark = pytest.mark.xfail(raises=NotImplementedError)
+            request.applymarker(mark)
+        super().test_reduce_series_numeric(data, all_numeric_reductions, skipna)
+    def test_reduce_frame(self, data, all_numeric_reductions, skipna, request):
+        op_name = all_numeric_reductions
+        if op_name in ["skew", "median"]:
+            mark = pytest.mark.xfail(raises=NotImplementedError)
+            request.applymarker(mark)
+        return super().test_reduce_frame(data, all_numeric_reductions, skipna)
+    def test_compare_scalar(self, data, comparison_op):
+        ser = pd.Series(data)
+        self._compare_other(ser, data, comparison_op, 0.5)
+    def test_compare_array(self, data, comparison_op):
+        ser = pd.Series(data)
+        alter = np.random.default_rng(2).choice([-1, 0, 1], len(data))
+        # Randomly double, halve or keep same value
+        other = pd.Series(data) * [decimal.Decimal(pow(2.0, i)) for i in alter]
+        self._compare_other(ser, data, comparison_op, other)
+    def test_arith_series_with_array(self, data, all_arithmetic_operators):
+        op_name = all_arithmetic_operators
+        ser = pd.Series(data)
+        context = decimal.getcontext()
+        divbyzerotrap = context.traps[decimal.DivisionByZero]
+        invalidoptrap = context.traps[decimal.InvalidOperation]
+        context.traps[decimal.DivisionByZero] = 0
+        context.traps[decimal.InvalidOperation] = 0
+        # Decimal supports ops with int, but not float
+        other = pd.Series([int(d * 100) for d in data])
+        self.check_opname(ser, op_name, other)
+        if "mod" not in op_name:
+            self.check_opname(ser, op_name, ser * 2)
+        self.check_opname(ser, op_name, 0)
+        self.check_opname(ser, op_name, 5)
+        context.traps[decimal.DivisionByZero] = divbyzerotrap
+        context.traps[decimal.InvalidOperation] = invalidoptrap
+    def test_fillna_frame(self, data_missing):
+        msg = "ExtensionArray.fillna added a 'copy' keyword"
+        with tm.assert_produces_warning(
+            DeprecationWarning, match=msg, check_stacklevel=False
+        ):
+            super().test_fillna_frame(data_missing)
+    def test_fillna_limit_pad(self, data_missing):
+        msg = "ExtensionArray.fillna 'method' keyword is deprecated"
+        with tm.assert_produces_warning(
+            DeprecationWarning,
+            match=msg,
+            check_stacklevel=False,
+            raise_on_extra_warnings=False,
+        ):
+            super().test_fillna_limit_pad(data_missing)
+        msg = "The 'method' keyword in DecimalArray.fillna is deprecated"
+        with tm.assert_produces_warning(
+            FutureWarning,
+            match=msg,
+            check_stacklevel=False,
+            raise_on_extra_warnings=False,
+        ):
+            super().test_fillna_limit_pad(data_missing)
+    @pytest.mark.parametrize(
+        "limit_area, input_ilocs, expected_ilocs",
+        [
+            ("outside", [1, 0, 0, 0, 1], [1, 0, 0, 0, 1]),
+            ("outside", [1, 0, 1, 0, 1], [1, 0, 1, 0, 1]),
+            ("outside", [0, 1, 1, 1, 0], [0, 1, 1, 1, 1]),
+            ("outside", [0, 1, 0, 1, 0], [0, 1, 0, 1, 1]),
+            ("inside", [1, 0, 0, 0, 1], [1, 1, 1, 1, 1]),
+            ("inside", [1, 0, 1, 0, 1], [1, 1, 1, 1, 1]),
+            ("inside", [0, 1, 1, 1, 0], [0, 1, 1, 1, 0]),
+            ("inside", [0, 1, 0, 1, 0], [0, 1, 1, 1, 0]),
+        ],
+    )
+    def test_ffill_limit_area(
+        self, data_missing, limit_area, input_ilocs, expected_ilocs
+    ):
+        # GH#56616
+        msg = "ExtensionArray.fillna 'method' keyword is deprecated"
+        with tm.assert_produces_warning(
+            DeprecationWarning,
+            match=msg,
+            check_stacklevel=False,
+            raise_on_extra_warnings=False,
+        ):
+            msg = "DecimalArray does not implement limit_area"
+            with pytest.raises(NotImplementedError, match=msg):
+                super().test_ffill_limit_area(
+                    data_missing, limit_area, input_ilocs, expected_ilocs
+                )
+    def test_fillna_limit_backfill(self, data_missing):
+        msg = "Series.fillna with 'method' is deprecated"
+        with tm.assert_produces_warning(
+            FutureWarning,
+            match=msg,
+            check_stacklevel=False,
+            raise_on_extra_warnings=False,
+        ):
+            super().test_fillna_limit_backfill(data_missing)
+        msg = "ExtensionArray.fillna 'method' keyword is deprecated"
+        with tm.assert_produces_warning(
+            DeprecationWarning,
+            match=msg,
+            check_stacklevel=False,
+            raise_on_extra_warnings=False,
+        ):
+            super().test_fillna_limit_backfill(data_missing)
+        msg = "The 'method' keyword in DecimalArray.fillna is deprecated"
+        with tm.assert_produces_warning(
+            FutureWarning,
+            match=msg,
+            check_stacklevel=False,
+            raise_on_extra_warnings=False,
+        ):
+            super().test_fillna_limit_backfill(data_missing)
+    def test_fillna_no_op_returns_copy(self, data):
+        msg = "|".join(
+            [
+                "ExtensionArray.fillna 'method' keyword is deprecated",
+                "The 'method' keyword in DecimalArray.fillna is deprecated",
+            ]
+        )
+        with tm.assert_produces_warning(
+            (FutureWarning, DeprecationWarning), match=msg, check_stacklevel=False
+        ):
+            super().test_fillna_no_op_returns_copy(data)
+    def test_fillna_series(self, data_missing):
+        msg = "ExtensionArray.fillna added a 'copy' keyword"
+        with tm.assert_produces_warning(
+            DeprecationWarning, match=msg, check_stacklevel=False
+        ):
+            super().test_fillna_series(data_missing)
+    def test_fillna_series_method(self, data_missing, fillna_method):
+        msg = "|".join(
+            [
+                "ExtensionArray.fillna 'method' keyword is deprecated",
+                "The 'method' keyword in DecimalArray.fillna is deprecated",
+            ]
+        )
+        with tm.assert_produces_warning(
+            (FutureWarning, DeprecationWarning), match=msg, check_stacklevel=False
+        ):
+            super().test_fillna_series_method(data_missing, fillna_method)
+    def test_fillna_copy_frame(self, data_missing, using_copy_on_write):
+        warn = DeprecationWarning if not using_copy_on_write else None
+        msg = "ExtensionArray.fillna added a 'copy' keyword"
+        with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False):
+            super().test_fillna_copy_frame(data_missing)
+    def test_fillna_copy_series(self, data_missing, using_copy_on_write):
+        warn = DeprecationWarning if not using_copy_on_write else None
+        msg = "ExtensionArray.fillna added a 'copy' keyword"
+        with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False):
+            super().test_fillna_copy_series(data_missing)
+    @pytest.mark.parametrize("dropna", [True, False])
+    def test_value_counts(self, all_data, dropna, request):
+        all_data = all_data[:10]
+        if dropna:
+            other = np.array(all_data[~all_data.isna()])
+        else:
+            other = all_data
+        vcs = pd.Series(all_data).value_counts(dropna=dropna)
+        vcs_ex = pd.Series(other).value_counts(dropna=dropna)
+        with decimal.localcontext() as ctx:
+            # avoid raising when comparing Decimal("NAN") < Decimal(2)
+            ctx.traps[decimal.InvalidOperation] = False
+            result = vcs.sort_index()
+            expected = vcs_ex.sort_index()
+        tm.assert_series_equal(result, expected)
+    def test_series_repr(self, data):
+        # Overriding this base test to explicitly test that
+        # the custom _formatter is used
+        ser = pd.Series(data)
+        assert data.dtype.name in repr(ser)
+        assert "Decimal: " in repr(ser)
+    @pytest.mark.xfail(reason="Inconsistent array-vs-scalar behavior")
+    @pytest.mark.parametrize("ufunc", [np.positive, np.negative, np.abs])
+    def test_unary_ufunc_dunder_equivalence(self, data, ufunc):
+        super().test_unary_ufunc_dunder_equivalence(data, ufunc)
+    def test_array_interface_copy(self, data):
+        result_copy1 = np.array(data, copy=True)
+        result_copy2 = np.array(data, copy=True)
+        assert not np.may_share_memory(result_copy1, result_copy2)
+        if not np_version_gt2:
+            # copy=False semantics are only supported in NumPy>=2.
+            return
+        try:
+            result_nocopy1 = np.array(data, copy=False)
+        except ValueError:
+            # An error is always acceptable for `copy=False`
+            return
+        result_nocopy2 = np.array(data, copy=False)
+        # If copy=False was given and did not raise, these must share the same data
+        assert np.may_share_memory(result_nocopy1, result_nocopy2)
+def test_take_na_value_other_decimal():
+    arr = DecimalArray([decimal.Decimal("1.0"), decimal.Decimal("2.0")])
+    result = arr.take([0, -1], allow_fill=True, fill_value=decimal.Decimal("-1.0"))
+    expected = DecimalArray([decimal.Decimal("1.0"), decimal.Decimal("-1.0")])
+    tm.assert_extension_array_equal(result, expected)
+def test_series_constructor_coerce_data_to_extension_dtype():
+    dtype = DecimalDtype()
+    ser = pd.Series([0, 1, 2], dtype=dtype)
+    arr = DecimalArray(
+        [decimal.Decimal(0), decimal.Decimal(1), decimal.Decimal(2)],
+        dtype=dtype,
+    )
+    exp = pd.Series(arr)
+    tm.assert_series_equal(ser, exp)
+def test_series_constructor_with_dtype():
+    arr = DecimalArray([decimal.Decimal("10.0")])
+    result = pd.Series(arr, dtype=DecimalDtype())
+    expected = pd.Series(arr)
+    tm.assert_series_equal(result, expected)
+    result = pd.Series(arr, dtype="int64")
+    expected = pd.Series([10])
+    tm.assert_series_equal(result, expected)
+def test_dataframe_constructor_with_dtype():
+    arr = DecimalArray([decimal.Decimal("10.0")])
+    result = pd.DataFrame({"A": arr}, dtype=DecimalDtype())
+    expected = pd.DataFrame({"A": arr})
+    tm.assert_frame_equal(result, expected)
+    arr = DecimalArray([decimal.Decimal("10.0")])
+    result = pd.DataFrame({"A": arr}, dtype="int64")
+    expected = pd.DataFrame({"A": [10]})
+    tm.assert_frame_equal(result, expected)
+@pytest.mark.parametrize("frame", [True, False])
+def test_astype_dispatches(frame):
+    # This is a dtype-specific test that ensures Series[decimal].astype
+    # gets all the way through to ExtensionArray.astype
+    # Designing a reliable smoke test that works for arbitrary data types
+    # is difficult.
+    data = pd.Series(DecimalArray([decimal.Decimal(2)]), name="a")
+    ctx = decimal.Context()
+    ctx.prec = 5
+    if frame:
+        data = data.to_frame()
+    result = data.astype(DecimalDtype(ctx))
+    if frame:
+        result = result["a"]
+    assert result.dtype.context.prec == ctx.prec
+class DecimalArrayWithoutFromSequence(DecimalArray):
+    """Helper class for testing error handling in _from_sequence."""
+    @classmethod
+    def _from_sequence(cls, scalars, *, dtype=None, copy=False):
+        raise KeyError("For the test")
+class DecimalArrayWithoutCoercion(DecimalArrayWithoutFromSequence):
+    @classmethod
+    def _create_arithmetic_method(cls, op):
+        return cls._create_method(op, coerce_to_dtype=False)
+DecimalArrayWithoutCoercion._add_arithmetic_ops()
+def test_combine_from_sequence_raises(monkeypatch):
+    # https://github.com/pandas-dev/pandas/issues/22850
+    cls = DecimalArrayWithoutFromSequence
+    @classmethod
+    def construct_array_type(cls):
+        return DecimalArrayWithoutFromSequence
+    monkeypatch.setattr(DecimalDtype, "construct_array_type", construct_array_type)
+    arr = cls([decimal.Decimal("1.0"), decimal.Decimal("2.0")])
+    ser = pd.Series(arr)
+    result = ser.combine(ser, operator.add)
+    # note: object dtype
+    expected = pd.Series(
+        [decimal.Decimal("2.0"), decimal.Decimal("4.0")], dtype="object"
+    )
+    tm.assert_series_equal(result, expected)
+@pytest.mark.parametrize(
+    "class_", [DecimalArrayWithoutFromSequence, DecimalArrayWithoutCoercion]
+)
+def test_scalar_ops_from_sequence_raises(class_):
+    # op(EA, EA) should return an EA, or an ndarray if it's not possible
+    # to return an EA with the return values.
+    arr = class_([decimal.Decimal("1.0"), decimal.Decimal("2.0")])
+    result = arr + arr
+    expected = np.array(
+        [decimal.Decimal("2.0"), decimal.Decimal("4.0")], dtype="object"
+    )
+    tm.assert_numpy_array_equal(result, expected)
+@pytest.mark.parametrize(
+    "reverse, expected_div, expected_mod",
+    [(False, [0, 1, 1, 2], [1, 0, 1, 0]), (True, [2, 1, 0, 0], [0, 0, 2, 2])],
+)
+def test_divmod_array(reverse, expected_div, expected_mod):
+    # https://github.com/pandas-dev/pandas/issues/22930
+    arr = to_decimal([1, 2, 3, 4])
+    if reverse:
+        div, mod = divmod(2, arr)
+    else:
+        div, mod = divmod(arr, 2)
+    expected_div = to_decimal(expected_div)
+    expected_mod = to_decimal(expected_mod)
+    tm.assert_extension_array_equal(div, expected_div)
+    tm.assert_extension_array_equal(mod, expected_mod)
+def test_ufunc_fallback(data):
+    a = data[:5]
+    s = pd.Series(a, index=range(3, 8))
+    result = np.abs(s)
+    expected = pd.Series(np.abs(a), index=range(3, 8))
+    tm.assert_series_equal(result, expected)
+def test_array_ufunc():
+    a = to_decimal([1, 2, 3])
+    result = np.exp(a)
+    expected = to_decimal(np.exp(a._data))
+    tm.assert_extension_array_equal(result, expected)
+def test_array_ufunc_series():
+    a = to_decimal([1, 2, 3])
+    s = pd.Series(a)
+    result = np.exp(s)
+    expected = pd.Series(to_decimal(np.exp(a._data)))
+    tm.assert_series_equal(result, expected)
+def test_array_ufunc_series_scalar_other():
+    # check _HANDLED_TYPES
+    a = to_decimal([1, 2, 3])
+    s = pd.Series(a)
+    result = np.add(s, decimal.Decimal(1))
+    expected = pd.Series(np.add(a, decimal.Decimal(1)))
+    tm.assert_series_equal(result, expected)
+def test_array_ufunc_series_defer():
+    a = to_decimal([1, 2, 3])
+    s = pd.Series(a)
+    expected = pd.Series(to_decimal([2, 4, 6]))
+    r1 = np.add(s, a)
+    r2 = np.add(a, s)
+    tm.assert_series_equal(r1, expected)
+    tm.assert_series_equal(r2, expected)
+def test_groupby_agg():
+    # Ensure that the result of agg is inferred to be decimal dtype
+    # https://github.com/pandas-dev/pandas/issues/29141
+    data = make_data()[:5]
+    df = pd.DataFrame(
+        {"id1": [0, 0, 0, 1, 1], "id2": [0, 1, 0, 1, 1], "decimals": DecimalArray(data)}
+    )
+    # single key, selected column
+    expected = pd.Series(to_decimal([data[0], data[3]]))
+    result = df.groupby("id1")["decimals"].agg(lambda x: x.iloc[0])
+    tm.assert_series_equal(result, expected, check_names=False)
+    result = df["decimals"].groupby(df["id1"]).agg(lambda x: x.iloc[0])
+    tm.assert_series_equal(result, expected, check_names=False)
+    # multiple keys, selected column
+    expected = pd.Series(
+        to_decimal([data[0], data[1], data[3]]),
+        index=pd.MultiIndex.from_tuples([(0, 0), (0, 1), (1, 1)]),
+    )
+    result = df.groupby(["id1", "id2"])["decimals"].agg(lambda x: x.iloc[0])
+    tm.assert_series_equal(result, expected, check_names=False)
+    result = df["decimals"].groupby([df["id1"], df["id2"]]).agg(lambda x: x.iloc[0])
+    tm.assert_series_equal(result, expected, check_names=False)
+    # multiple columns
+    expected = pd.DataFrame({"id2": [0, 1], "decimals": to_decimal([data[0], data[3]])})
+    result = df.groupby("id1").agg(lambda x: x.iloc[0])
+    tm.assert_frame_equal(result, expected, check_names=False)
+def test_groupby_agg_ea_method(monkeypatch):
+    # Ensure that the result of agg is inferred to be decimal dtype
+    # https://github.com/pandas-dev/pandas/issues/29141
+    def DecimalArray__my_sum(self):
+        return np.sum(np.array(self))
+    monkeypatch.setattr(DecimalArray, "my_sum", DecimalArray__my_sum, raising=False)
+    data = make_data()[:5]
+    df = pd.DataFrame({"id": [0, 0, 0, 1, 1], "decimals": DecimalArray(data)})
+    expected = pd.Series(to_decimal([data[0] + data[1] + data[2], data[3] + data[4]]))
+    result = df.groupby("id")["decimals"].agg(lambda x: x.values.my_sum())
+    tm.assert_series_equal(result, expected, check_names=False)
+    s = pd.Series(DecimalArray(data))
+    grouper = np.array([0, 0, 0, 1, 1], dtype=np.int64)
+    result = s.groupby(grouper).agg(lambda x: x.values.my_sum())
+    tm.assert_series_equal(result, expected, check_names=False)
+def test_indexing_no_materialize(monkeypatch):
+    # See https://github.com/pandas-dev/pandas/issues/29708
+    # Ensure that indexing operations do not materialize (convert to a numpy
+    # array) the ExtensionArray unnecessary
+    def DecimalArray__array__(self, dtype=None):
+        raise Exception("tried to convert a DecimalArray to a numpy array")
+    monkeypatch.setattr(DecimalArray, "__array__", DecimalArray__array__, raising=False)
+    data = make_data()
+    s = pd.Series(DecimalArray(data))
+    df = pd.DataFrame({"a": s, "b": range(len(s))})
+    # ensure the following operations do not raise an error
+    s[s > 0.5]
+    df[s > 0.5]
+    s.at[0]
+    df.at[0, "a"]
+def test_to_numpy_keyword():
+    # test the extra keyword
+    values = [decimal.Decimal("1.1111"), decimal.Decimal("2.2222")]
+    expected = np.array(
+        [decimal.Decimal("1.11"), decimal.Decimal("2.22")], dtype="object"
+    )
+    a = pd.array(values, dtype="decimal")
+    result = a.to_numpy(decimals=2)
+    tm.assert_numpy_array_equal(result, expected)
+    result = pd.Series(a).to_numpy(decimals=2)
+    tm.assert_numpy_array_equal(result, expected)
+def test_array_copy_on_write(using_copy_on_write):
+    df = pd.DataFrame({"a": [decimal.Decimal(2), decimal.Decimal(3)]}, dtype="object")
+    df2 = df.astype(DecimalDtype())
+    df.iloc[0, 0] = 0
+    if using_copy_on_write:
+        expected = pd.DataFrame(
+            {"a": [decimal.Decimal(2), decimal.Decimal(3)]}, dtype=DecimalDtype()
+        )
+        tm.assert_equal(df2.values, expected.values)

py311/lib/python3.11/site-packages/pandas/tests/extension/list/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from pandas.tests.extension.list.array import (
+    ListArray,
+    ListDtype,
+    make_data,
+)
+__all__ = ["ListArray", "ListDtype", "make_data"]

py311/lib/python3.11/site-packages/pandas/tests/extension/list/array.py ADDED Viewed

	@@ -0,0 +1,137 @@

+"""
+Test extension array for storing nested data in a pandas container.
+The ListArray stores an ndarray of lists.
+"""
+from __future__ import annotations
+import numbers
+import string
+from typing import TYPE_CHECKING
+import numpy as np
+from pandas.core.dtypes.base import ExtensionDtype
+import pandas as pd
+from pandas.api.types import (
+    is_object_dtype,
+    is_string_dtype,
+)
+from pandas.core.arrays import ExtensionArray
+if TYPE_CHECKING:
+    from pandas._typing import type_t
+class ListDtype(ExtensionDtype):
+    type = list
+    name = "list"
+    na_value = np.nan
+    @classmethod
+    def construct_array_type(cls) -> type_t[ListArray]:
+        """
+        Return the array type associated with this dtype.
+        Returns
+        -------
+        type
+        """
+        return ListArray
+class ListArray(ExtensionArray):
+    dtype = ListDtype()
+    __array_priority__ = 1000
+    def __init__(self, values, dtype=None, copy=False) -> None:
+        if not isinstance(values, np.ndarray):
+            raise TypeError("Need to pass a numpy array as values")
+        for val in values:
+            if not isinstance(val, self.dtype.type) and not pd.isna(val):
+                raise TypeError("All values must be of type " + str(self.dtype.type))
+        self.data = values
+    @classmethod
+    def _from_sequence(cls, scalars, *, dtype=None, copy=False):
+        data = np.empty(len(scalars), dtype=object)
+        data[:] = scalars
+        return cls(data)
+    def __getitem__(self, item):
+        if isinstance(item, numbers.Integral):
+            return self.data[item]
+        else:
+            # slice, list-like, mask
+            return type(self)(self.data[item])
+    def __len__(self) -> int:
+        return len(self.data)
+    def isna(self):
+        return np.array(
+            [not isinstance(x, list) and np.isnan(x) for x in self.data], dtype=bool
+        )
+    def take(self, indexer, allow_fill=False, fill_value=None):
+        # re-implement here, since NumPy has trouble setting
+        # sized objects like UserDicts into scalar slots of
+        # an ndarary.
+        indexer = np.asarray(indexer)
+        msg = (
+            "Index is out of bounds or cannot do a "
+            "non-empty take from an empty array."
+        )
+        if allow_fill:
+            if fill_value is None:
+                fill_value = self.dtype.na_value
+            # bounds check
+            if (indexer < -1).any():
+                raise ValueError
+            try:
+                output = [
+                    self.data[loc] if loc != -1 else fill_value for loc in indexer
+                ]
+            except IndexError as err:
+                raise IndexError(msg) from err
+        else:
+            try:
+                output = [self.data[loc] for loc in indexer]
+            except IndexError as err:
+                raise IndexError(msg) from err
+        return self._from_sequence(output)
+    def copy(self):
+        return type(self)(self.data[:])
+    def astype(self, dtype, copy=True):
+        if isinstance(dtype, type(self.dtype)) and dtype == self.dtype:
+            if copy:
+                return self.copy()
+            return self
+        elif is_string_dtype(dtype) and not is_object_dtype(dtype):
+            # numpy has problems with astype(str) for nested elements
+            return np.array([str(x) for x in self.data], dtype=dtype)
+        elif not copy:
+            return np.asarray(self.data, dtype=dtype)
+        else:
+            return np.array(self.data, dtype=dtype, copy=copy)
+    @classmethod
+    def _concat_same_type(cls, to_concat):
+        data = np.concatenate([x.data for x in to_concat])
+        return cls(data)
+def make_data():
+    # TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer
+    rng = np.random.default_rng(2)
+    data = np.empty(100, dtype=object)
+    data[:] = [
+        [rng.choice(list(string.ascii_letters)) for _ in range(rng.integers(0, 10))]
+        for _ in range(100)
+    ]
+    return data

py311/lib/python3.11/site-packages/pandas/tests/extension/list/test_list.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import pytest
+import pandas as pd
+from pandas.tests.extension.list.array import (
+    ListArray,
+    ListDtype,
+    make_data,
+)
+@pytest.fixture
+def dtype():
+    return ListDtype()
+@pytest.fixture
+def data():
+    """Length-100 ListArray for semantics test."""
+    data = make_data()
+    while len(data[0]) == len(data[1]):
+        data = make_data()
+    return ListArray(data)
+def test_to_csv(data):
+    # https://github.com/pandas-dev/pandas/issues/28840
+    # array with list-likes fail when doing astype(str) on the numpy array
+    # which was done in get_values_for_csv
+    df = pd.DataFrame({"a": data})
+    res = df.to_csv()
+    assert str(data[0]) in res

py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/__init__.py ADDED Viewed

File without changes

py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_append.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import pytest
+from pandas import (
+    CategoricalIndex,
+    Index,
+)
+import pandas._testing as tm
+class TestAppend:
+    @pytest.fixture
+    def ci(self):
+        categories = list("cab")
+        return CategoricalIndex(list("aabbca"), categories=categories, ordered=False)
+    def test_append(self, ci):
+        # append cats with the same categories
+        result = ci[:3].append(ci[3:])
+        tm.assert_index_equal(result, ci, exact=True)
+        foos = [ci[:1], ci[1:3], ci[3:]]
+        result = foos[0].append(foos[1:])
+        tm.assert_index_equal(result, ci, exact=True)
+    def test_append_empty(self, ci):
+        # empty
+        result = ci.append([])
+        tm.assert_index_equal(result, ci, exact=True)
+    def test_append_mismatched_categories(self, ci):
+        # appending with different categories or reordered is not ok
+        msg = "all inputs must be Index"
+        with pytest.raises(TypeError, match=msg):
+            ci.append(ci.values.set_categories(list("abcd")))
+        with pytest.raises(TypeError, match=msg):
+            ci.append(ci.values.reorder_categories(list("abc")))
+    def test_append_category_objects(self, ci):
+        # with objects
+        result = ci.append(Index(["c", "a"]))
+        expected = CategoricalIndex(list("aabbcaca"), categories=ci.categories)
+        tm.assert_index_equal(result, expected, exact=True)
+    def test_append_non_categories(self, ci):
+        # invalid objects -> cast to object via concat_compat
+        result = ci.append(Index(["a", "d"]))
+        expected = Index(["a", "a", "b", "b", "c", "a", "a", "d"])
+        tm.assert_index_equal(result, expected, exact=True)
+    def test_append_object(self, ci):
+        # GH#14298 - if base object is not categorical -> coerce to object
+        result = Index(["c", "a"]).append(ci)
+        expected = Index(list("caaabbca"))
+        tm.assert_index_equal(result, expected, exact=True)
+    def test_append_to_another(self):
+        # hits Index._concat
+        fst = Index(["a", "b"])
+        snd = CategoricalIndex(["d", "e"])
+        result = fst.append(snd)
+        expected = Index(["a", "b", "d", "e"])
+        tm.assert_index_equal(result, expected)

py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_category.py ADDED Viewed

	@@ -0,0 +1,391 @@

+import numpy as np
+import pytest
+from pandas._libs import index as libindex
+from pandas._libs.arrays import NDArrayBacked
+import pandas as pd
+from pandas import (
+    Categorical,
+    CategoricalDtype,
+)
+import pandas._testing as tm
+from pandas.core.indexes.api import (
+    CategoricalIndex,
+    Index,
+)
+class TestCategoricalIndex:
+    @pytest.fixture
+    def simple_index(self) -> CategoricalIndex:
+        return CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
+    def test_can_hold_identifiers(self):
+        idx = CategoricalIndex(list("aabbca"), categories=None, ordered=False)
+        key = idx[0]
+        assert idx._can_hold_identifiers_and_holds_name(key) is True
+    def test_insert(self, simple_index):
+        ci = simple_index
+        categories = ci.categories
+        # test 0th element
+        result = ci.insert(0, "a")
+        expected = CategoricalIndex(list("aaabbca"), categories=categories)
+        tm.assert_index_equal(result, expected, exact=True)
+        # test Nth element that follows Python list behavior
+        result = ci.insert(-1, "a")
+        expected = CategoricalIndex(list("aabbcaa"), categories=categories)
+        tm.assert_index_equal(result, expected, exact=True)
+        # test empty
+        result = CategoricalIndex([], categories=categories).insert(0, "a")
+        expected = CategoricalIndex(["a"], categories=categories)
+        tm.assert_index_equal(result, expected, exact=True)
+        # invalid -> cast to object
+        expected = ci.astype(object).insert(0, "d")
+        result = ci.insert(0, "d").astype(object)
+        tm.assert_index_equal(result, expected, exact=True)
+        # GH 18295 (test missing)
+        expected = CategoricalIndex(["a", np.nan, "a", "b", "c", "b"])
+        for na in (np.nan, pd.NaT, None):
+            result = CategoricalIndex(list("aabcb")).insert(1, na)
+            tm.assert_index_equal(result, expected)
+    def test_insert_na_mismatched_dtype(self):
+        ci = CategoricalIndex([0, 1, 1])
+        result = ci.insert(0, pd.NaT)
+        expected = Index([pd.NaT, 0, 1, 1], dtype=object)
+        tm.assert_index_equal(result, expected)
+    def test_delete(self, simple_index):
+        ci = simple_index
+        categories = ci.categories
+        result = ci.delete(0)
+        expected = CategoricalIndex(list("abbca"), categories=categories)
+        tm.assert_index_equal(result, expected, exact=True)
+        result = ci.delete(-1)
+        expected = CategoricalIndex(list("aabbc"), categories=categories)
+        tm.assert_index_equal(result, expected, exact=True)
+        with tm.external_error_raised((IndexError, ValueError)):
+            # Either depending on NumPy version
+            ci.delete(10)
+    @pytest.mark.parametrize(
+        "data, non_lexsorted_data",
+        [[[1, 2, 3], [9, 0, 1, 2, 3]], [list("abc"), list("fabcd")]],
+    )
+    def test_is_monotonic(self, data, non_lexsorted_data):
+        c = CategoricalIndex(data)
+        assert c.is_monotonic_increasing is True
+        assert c.is_monotonic_decreasing is False
+        c = CategoricalIndex(data, ordered=True)
+        assert c.is_monotonic_increasing is True
+        assert c.is_monotonic_decreasing is False
+        c = CategoricalIndex(data, categories=reversed(data))
+        assert c.is_monotonic_increasing is False
+        assert c.is_monotonic_decreasing is True
+        c = CategoricalIndex(data, categories=reversed(data), ordered=True)
+        assert c.is_monotonic_increasing is False
+        assert c.is_monotonic_decreasing is True
+        # test when data is neither monotonic increasing nor decreasing
+        reordered_data = [data[0], data[2], data[1]]
+        c = CategoricalIndex(reordered_data, categories=reversed(data))
+        assert c.is_monotonic_increasing is False
+        assert c.is_monotonic_decreasing is False
+        # non lexsorted categories
+        categories = non_lexsorted_data
+        c = CategoricalIndex(categories[:2], categories=categories)
+        assert c.is_monotonic_increasing is True
+        assert c.is_monotonic_decreasing is False
+        c = CategoricalIndex(categories[1:3], categories=categories)
+        assert c.is_monotonic_increasing is True
+        assert c.is_monotonic_decreasing is False
+    def test_has_duplicates(self):
+        idx = CategoricalIndex([0, 0, 0], name="foo")
+        assert idx.is_unique is False
+        assert idx.has_duplicates is True
+        idx = CategoricalIndex([0, 1], categories=[2, 3], name="foo")
+        assert idx.is_unique is False
+        assert idx.has_duplicates is True
+        idx = CategoricalIndex([0, 1, 2, 3], categories=[1, 2, 3], name="foo")
+        assert idx.is_unique is True
+        assert idx.has_duplicates is False
+    @pytest.mark.parametrize(
+        "data, categories, expected",
+        [
+            (
+                [1, 1, 1],
+                [1, 2, 3],
+                {
+                    "first": np.array([False, True, True]),
+                    "last": np.array([True, True, False]),
+                    False: np.array([True, True, True]),
+                },
+            ),
+            (
+                [1, 1, 1],
+                list("abc"),
+                {
+                    "first": np.array([False, True, True]),
+                    "last": np.array([True, True, False]),
+                    False: np.array([True, True, True]),
+                },
+            ),
+            (
+                [2, "a", "b"],
+                list("abc"),
+                {
+                    "first": np.zeros(shape=(3), dtype=np.bool_),
+                    "last": np.zeros(shape=(3), dtype=np.bool_),
+                    False: np.zeros(shape=(3), dtype=np.bool_),
+                },
+            ),
+            (
+                list("abb"),
+                list("abc"),
+                {
+                    "first": np.array([False, False, True]),
+                    "last": np.array([False, True, False]),
+                    False: np.array([False, True, True]),
+                },
+            ),
+        ],
+    )
+    def test_drop_duplicates(self, data, categories, expected):
+        idx = CategoricalIndex(data, categories=categories, name="foo")
+        for keep, e in expected.items():
+            tm.assert_numpy_array_equal(idx.duplicated(keep=keep), e)
+            e = idx[~e]
+            result = idx.drop_duplicates(keep=keep)
+            tm.assert_index_equal(result, e)
+    @pytest.mark.parametrize(
+        "data, categories, expected_data",
+        [
+            ([1, 1, 1], [1, 2, 3], [1]),
+            ([1, 1, 1], list("abc"), [np.nan]),
+            ([1, 2, "a"], [1, 2, 3], [1, 2, np.nan]),
+            ([2, "a", "b"], list("abc"), [np.nan, "a", "b"]),
+        ],
+    )
+    def test_unique(self, data, categories, expected_data, ordered):
+        dtype = CategoricalDtype(categories, ordered=ordered)
+        idx = CategoricalIndex(data, dtype=dtype)
+        expected = CategoricalIndex(expected_data, dtype=dtype)
+        tm.assert_index_equal(idx.unique(), expected)
+    def test_repr_roundtrip(self):
+        ci = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True)
+        str(ci)
+        tm.assert_index_equal(eval(repr(ci)), ci, exact=True)
+        # formatting
+        str(ci)
+        # long format
+        # this is not reprable
+        ci = CategoricalIndex(np.random.default_rng(2).integers(0, 5, size=100))
+        str(ci)
+    def test_isin(self):
+        ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"])
+        tm.assert_numpy_array_equal(
+            ci.isin(["c"]), np.array([False, False, False, True, False, False])
+        )
+        tm.assert_numpy_array_equal(
+            ci.isin(["c", "a", "b"]), np.array([True] * 5 + [False])
+        )
+        tm.assert_numpy_array_equal(
+            ci.isin(["c", "a", "b", np.nan]), np.array([True] * 6)
+        )
+        # mismatched categorical -> coerced to ndarray so doesn't matter
+        result = ci.isin(ci.set_categories(list("abcdefghi")))
+        expected = np.array([True] * 6)
+        tm.assert_numpy_array_equal(result, expected)
+        result = ci.isin(ci.set_categories(list("defghi")))
+        expected = np.array([False] * 5 + [True])
+        tm.assert_numpy_array_equal(result, expected)
+    def test_isin_overlapping_intervals(self):
+        # GH 34974
+        idx = pd.IntervalIndex([pd.Interval(0, 2), pd.Interval(0, 1)])
+        result = CategoricalIndex(idx).isin(idx)
+        expected = np.array([True, True])
+        tm.assert_numpy_array_equal(result, expected)
+    def test_identical(self):
+        ci1 = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True)
+        ci2 = CategoricalIndex(["a", "b"], categories=["a", "b", "c"], ordered=True)
+        assert ci1.identical(ci1)
+        assert ci1.identical(ci1.copy())
+        assert not ci1.identical(ci2)
+    def test_ensure_copied_data(self):
+        # gh-12309: Check the "copy" argument of each
+        # Index.__new__ is honored.
+        #
+        # Must be tested separately from other indexes because
+        # self.values is not an ndarray.
+        index = CategoricalIndex(list("ab") * 5)
+        result = CategoricalIndex(index.values, copy=True)
+        tm.assert_index_equal(index, result)
+        assert not np.shares_memory(result._data._codes, index._data._codes)
+        result = CategoricalIndex(index.values, copy=False)
+        assert result._data._codes is index._data._codes
+class TestCategoricalIndex2:
+    def test_view_i8(self):
+        # GH#25464
+        ci = CategoricalIndex(list("ab") * 50)
+        msg = "When changing to a larger dtype, its size must be a divisor"
+        with pytest.raises(ValueError, match=msg):
+            ci.view("i8")
+        with pytest.raises(ValueError, match=msg):
+            ci._data.view("i8")
+        ci = ci[:-4]  # length divisible by 8
+        res = ci.view("i8")
+        expected = ci._data.codes.view("i8")
+        tm.assert_numpy_array_equal(res, expected)
+        cat = ci._data
+        tm.assert_numpy_array_equal(cat.view("i8"), expected)
+    @pytest.mark.parametrize(
+        "dtype, engine_type",
+        [
+            (np.int8, libindex.Int8Engine),
+            (np.int16, libindex.Int16Engine),
+            (np.int32, libindex.Int32Engine),
+            (np.int64, libindex.Int64Engine),
+        ],
+    )
+    def test_engine_type(self, dtype, engine_type):
+        if dtype != np.int64:
+            # num. of uniques required to push CategoricalIndex.codes to a
+            # dtype (128 categories required for .codes dtype to be int16 etc.)
+            num_uniques = {np.int8: 1, np.int16: 128, np.int32: 32768}[dtype]
+            ci = CategoricalIndex(range(num_uniques))
+        else:
+            # having 2**32 - 2**31 categories would be very memory-intensive,
+            # so we cheat a bit with the dtype
+            ci = CategoricalIndex(range(32768))  # == 2**16 - 2**(16 - 1)
+            arr = ci.values._ndarray.astype("int64")
+            NDArrayBacked.__init__(ci._data, arr, ci.dtype)
+        assert np.issubdtype(ci.codes.dtype, dtype)
+        assert isinstance(ci._engine, engine_type)
+    @pytest.mark.parametrize(
+        "func,op_name",
+        [
+            (lambda idx: idx - idx, "__sub__"),
+            (lambda idx: idx + idx, "__add__"),
+            (lambda idx: idx - ["a", "b"], "__sub__"),
+            (lambda idx: idx + ["a", "b"], "__add__"),
+            (lambda idx: ["a", "b"] - idx, "__rsub__"),
+            (lambda idx: ["a", "b"] + idx, "__radd__"),
+        ],
+    )
+    def test_disallow_addsub_ops(self, func, op_name):
+        # GH 10039
+        # set ops (+/-) raise TypeError
+        idx = Index(Categorical(["a", "b"]))
+        cat_or_list = "'(Categorical|list)' and '(Categorical|list)'"
+        msg = "|".join(
+            [
+                f"cannot perform {op_name} with this index type: CategoricalIndex",
+                "can only concatenate list",
+                rf"unsupported operand type\(s\) for [\+-]: {cat_or_list}",
+            ]
+        )
+        with pytest.raises(TypeError, match=msg):
+            func(idx)
+    def test_method_delegation(self):
+        ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"))
+        result = ci.set_categories(list("cab"))
+        tm.assert_index_equal(
+            result, CategoricalIndex(list("aabbca"), categories=list("cab"))
+        )
+        ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
+        result = ci.rename_categories(list("efg"))
+        tm.assert_index_equal(
+            result, CategoricalIndex(list("ffggef"), categories=list("efg"))
+        )
+        # GH18862 (let rename_categories take callables)
+        result = ci.rename_categories(lambda x: x.upper())
+        tm.assert_index_equal(
+            result, CategoricalIndex(list("AABBCA"), categories=list("CAB"))
+        )
+        ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
+        result = ci.add_categories(["d"])
+        tm.assert_index_equal(
+            result, CategoricalIndex(list("aabbca"), categories=list("cabd"))
+        )
+        ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
+        result = ci.remove_categories(["c"])
+        tm.assert_index_equal(
+            result,
+            CategoricalIndex(list("aabb") + [np.nan] + ["a"], categories=list("ab")),
+        )
+        ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"))
+        result = ci.as_unordered()
+        tm.assert_index_equal(result, ci)
+        ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"))
+        result = ci.as_ordered()
+        tm.assert_index_equal(
+            result,
+            CategoricalIndex(list("aabbca"), categories=list("cabdef"), ordered=True),
+        )
+        # invalid
+        msg = "cannot use inplace with CategoricalIndex"
+        with pytest.raises(ValueError, match=msg):
+            ci.set_categories(list("cab"), inplace=True)
+    def test_remove_maintains_order(self):
+        ci = CategoricalIndex(list("abcdda"), categories=list("abcd"))
+        result = ci.reorder_categories(["d", "c", "b", "a"], ordered=True)
+        tm.assert_index_equal(
+            result,
+            CategoricalIndex(list("abcdda"), categories=list("dcba"), ordered=True),
+        )
+        result = result.remove_categories(["c"])
+        tm.assert_index_equal(
+            result,
+            CategoricalIndex(
+                ["a", "b", np.nan, "d", "d", "a"], categories=list("dba"), ordered=True
+            ),
+        )

py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_constructors.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import numpy as np
+import pytest
+from pandas import (
+    Categorical,
+    CategoricalDtype,
+    CategoricalIndex,
+    Index,
+)
+import pandas._testing as tm
+class TestCategoricalIndexConstructors:
+    def test_construction_disallows_scalar(self):
+        msg = "must be called with a collection of some kind"
+        with pytest.raises(TypeError, match=msg):
+            CategoricalIndex(data=1, categories=list("abcd"), ordered=False)
+        with pytest.raises(TypeError, match=msg):
+            CategoricalIndex(categories=list("abcd"), ordered=False)
+    def test_construction(self):
+        ci = CategoricalIndex(list("aabbca"), categories=list("abcd"), ordered=False)
+        categories = ci.categories
+        result = Index(ci)
+        tm.assert_index_equal(result, ci, exact=True)
+        assert not result.ordered
+        result = Index(ci.values)
+        tm.assert_index_equal(result, ci, exact=True)
+        assert not result.ordered
+        # empty
+        result = CategoricalIndex([], categories=categories)
+        tm.assert_index_equal(result.categories, Index(categories))
+        tm.assert_numpy_array_equal(result.codes, np.array([], dtype="int8"))
+        assert not result.ordered
+        # passing categories
+        result = CategoricalIndex(list("aabbca"), categories=categories)
+        tm.assert_index_equal(result.categories, Index(categories))
+        tm.assert_numpy_array_equal(
+            result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
+        )
+        c = Categorical(list("aabbca"))
+        result = CategoricalIndex(c)
+        tm.assert_index_equal(result.categories, Index(list("abc")))
+        tm.assert_numpy_array_equal(
+            result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
+        )
+        assert not result.ordered
+        result = CategoricalIndex(c, categories=categories)
+        tm.assert_index_equal(result.categories, Index(categories))
+        tm.assert_numpy_array_equal(
+            result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
+        )
+        assert not result.ordered
+        ci = CategoricalIndex(c, categories=list("abcd"))
+        result = CategoricalIndex(ci)
+        tm.assert_index_equal(result.categories, Index(categories))
+        tm.assert_numpy_array_equal(
+            result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
+        )
+        assert not result.ordered
+        result = CategoricalIndex(ci, categories=list("ab"))
+        tm.assert_index_equal(result.categories, Index(list("ab")))
+        tm.assert_numpy_array_equal(
+            result.codes, np.array([0, 0, 1, 1, -1, 0], dtype="int8")
+        )
+        assert not result.ordered
+        result = CategoricalIndex(ci, categories=list("ab"), ordered=True)
+        tm.assert_index_equal(result.categories, Index(list("ab")))
+        tm.assert_numpy_array_equal(
+            result.codes, np.array([0, 0, 1, 1, -1, 0], dtype="int8")
+        )
+        assert result.ordered
+        result = CategoricalIndex(ci, categories=list("ab"), ordered=True)
+        expected = CategoricalIndex(
+            ci, categories=list("ab"), ordered=True, dtype="category"
+        )
+        tm.assert_index_equal(result, expected, exact=True)
+        # turn me to an Index
+        result = Index(np.array(ci))
+        assert isinstance(result, Index)
+        assert not isinstance(result, CategoricalIndex)
+    def test_construction_with_dtype(self):
+        # specify dtype
+        ci = CategoricalIndex(list("aabbca"), categories=list("abc"), ordered=False)
+        result = Index(np.array(ci), dtype="category")
+        tm.assert_index_equal(result, ci, exact=True)
+        result = Index(np.array(ci).tolist(), dtype="category")
+        tm.assert_index_equal(result, ci, exact=True)
+        # these are generally only equal when the categories are reordered
+        ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
+        result = Index(np.array(ci), dtype="category").reorder_categories(ci.categories)
+        tm.assert_index_equal(result, ci, exact=True)
+        # make sure indexes are handled
+        idx = Index(range(3))
+        expected = CategoricalIndex([0, 1, 2], categories=idx, ordered=True)
+        result = CategoricalIndex(idx, categories=idx, ordered=True)
+        tm.assert_index_equal(result, expected, exact=True)
+    def test_construction_empty_with_bool_categories(self):
+        # see GH#22702
+        cat = CategoricalIndex([], categories=[True, False])
+        categories = sorted(cat.categories.tolist())
+        assert categories == [False, True]
+    def test_construction_with_categorical_dtype(self):
+        # construction with CategoricalDtype
+        # GH#18109
+        data, cats, ordered = "a a b b".split(), "c b a".split(), True
+        dtype = CategoricalDtype(categories=cats, ordered=ordered)
+        result = CategoricalIndex(data, dtype=dtype)
+        expected = CategoricalIndex(data, categories=cats, ordered=ordered)
+        tm.assert_index_equal(result, expected, exact=True)
+        # GH#19032
+        result = Index(data, dtype=dtype)
+        tm.assert_index_equal(result, expected, exact=True)
+        # error when combining categories/ordered and dtype kwargs
+        msg = "Cannot specify `categories` or `ordered` together with `dtype`."
+        with pytest.raises(ValueError, match=msg):
+            CategoricalIndex(data, categories=cats, dtype=dtype)
+        with pytest.raises(ValueError, match=msg):
+            CategoricalIndex(data, ordered=ordered, dtype=dtype)

py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_equals.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import numpy as np
+import pytest
+from pandas import (
+    Categorical,
+    CategoricalIndex,
+    Index,
+    MultiIndex,
+)
+class TestEquals:
+    def test_equals_categorical(self):
+        ci1 = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True)
+        ci2 = CategoricalIndex(["a", "b"], categories=["a", "b", "c"], ordered=True)
+        assert ci1.equals(ci1)
+        assert not ci1.equals(ci2)
+        assert ci1.equals(ci1.astype(object))
+        assert ci1.astype(object).equals(ci1)
+        assert (ci1 == ci1).all()
+        assert not (ci1 != ci1).all()
+        assert not (ci1 > ci1).all()
+        assert not (ci1 < ci1).all()
+        assert (ci1 <= ci1).all()
+        assert (ci1 >= ci1).all()
+        assert not (ci1 == 1).all()
+        assert (ci1 == Index(["a", "b"])).all()
+        assert (ci1 == ci1.values).all()
+        # invalid comparisons
+        with pytest.raises(ValueError, match="Lengths must match"):
+            ci1 == Index(["a", "b", "c"])
+        msg = "Categoricals can only be compared if 'categories' are the same"
+        with pytest.raises(TypeError, match=msg):
+            ci1 == ci2
+        with pytest.raises(TypeError, match=msg):
+            ci1 == Categorical(ci1.values, ordered=False)
+        with pytest.raises(TypeError, match=msg):
+            ci1 == Categorical(ci1.values, categories=list("abc"))
+        # tests
+        # make sure that we are testing for category inclusion properly
+        ci = CategoricalIndex(list("aabca"), categories=["c", "a", "b"])
+        assert not ci.equals(list("aabca"))
+        # Same categories, but different order
+        # Unordered
+        assert ci.equals(CategoricalIndex(list("aabca")))
+        # Ordered
+        assert not ci.equals(CategoricalIndex(list("aabca"), ordered=True))
+        assert ci.equals(ci.copy())
+        ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"])
+        assert not ci.equals(list("aabca"))
+        assert not ci.equals(CategoricalIndex(list("aabca")))
+        assert ci.equals(ci.copy())
+        ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"])
+        assert not ci.equals(list("aabca") + [np.nan])
+        assert ci.equals(CategoricalIndex(list("aabca") + [np.nan]))
+        assert not ci.equals(CategoricalIndex(list("aabca") + [np.nan], ordered=True))
+        assert ci.equals(ci.copy())
+    def test_equals_categorical_unordered(self):
+        # https://github.com/pandas-dev/pandas/issues/16603
+        a = CategoricalIndex(["A"], categories=["A", "B"])
+        b = CategoricalIndex(["A"], categories=["B", "A"])
+        c = CategoricalIndex(["C"], categories=["B", "A"])
+        assert a.equals(b)
+        assert not a.equals(c)
+        assert not b.equals(c)
+    def test_equals_non_category(self):
+        # GH#37667 Case where other contains a value not among ci's
+        #  categories ("D") and also contains np.nan
+        ci = CategoricalIndex(["A", "B", np.nan, np.nan])
+        other = Index(["A", "B", "D", np.nan])
+        assert not ci.equals(other)
+    def test_equals_multiindex(self):
+        # dont raise NotImplementedError when calling is_dtype_compat
+        mi = MultiIndex.from_arrays([["A", "B", "C", "D"], range(4)])
+        ci = mi.to_flat_index().astype("category")
+        assert not ci.equals(mi)
+    def test_equals_string_dtype(self, any_string_dtype):
+        # GH#55364
+        idx = CategoricalIndex(list("abc"), name="B")
+        other = Index(["a", "b", "c"], name="B", dtype=any_string_dtype)
+        assert idx.equals(other)

py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_fillna.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import numpy as np
+import pytest
+from pandas import CategoricalIndex
+import pandas._testing as tm
+class TestFillNA:
+    def test_fillna_categorical(self):
+        # GH#11343
+        idx = CategoricalIndex([1.0, np.nan, 3.0, 1.0], name="x")
+        # fill by value in categories
+        exp = CategoricalIndex([1.0, 1.0, 3.0, 1.0], name="x")
+        tm.assert_index_equal(idx.fillna(1.0), exp)
+        cat = idx._data
+        # fill by value not in categories raises TypeError on EA, casts on CI
+        msg = "Cannot setitem on a Categorical with a new category"
+        with pytest.raises(TypeError, match=msg):
+            cat.fillna(2.0)
+        result = idx.fillna(2.0)
+        expected = idx.astype(object).fillna(2.0)
+        tm.assert_index_equal(result, expected)
+    def test_fillna_copies_with_no_nas(self):
+        # Nothing to fill, should still get a copy for the Categorical method,
+        #  but OK to get a view on CategoricalIndex method
+        ci = CategoricalIndex([0, 1, 1])
+        result = ci.fillna(0)
+        assert result is not ci
+        assert tm.shares_memory(result, ci)
+        # But at the EA level we always get a copy.
+        cat = ci._data
+        result = cat.fillna(0)
+        assert result._ndarray is not cat._ndarray
+        assert result._ndarray.base is None
+        assert not tm.shares_memory(result, cat)
+    def test_fillna_validates_with_no_nas(self):
+        # We validate the fill value even if fillna is a no-op
+        ci = CategoricalIndex([2, 3, 3])
+        cat = ci._data
+        msg = "Cannot setitem on a Categorical with a new category"
+        res = ci.fillna(False)
+        # nothing to fill, so we dont cast
+        tm.assert_index_equal(res, ci)
+        # Same check directly on the Categorical
+        with pytest.raises(TypeError, match=msg):
+            cat.fillna(False)

py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_formats.py ADDED Viewed

	@@ -0,0 +1,120 @@

+"""
+Tests for CategoricalIndex.__repr__ and related methods.
+"""
+import pytest
+from pandas._config import using_string_dtype
+import pandas._config.config as cf
+from pandas import CategoricalIndex
+import pandas._testing as tm
+class TestCategoricalIndexRepr:
+    def test_format_different_scalar_lengths(self):
+        # GH#35439
+        idx = CategoricalIndex(["aaaaaaaaa", "b"])
+        expected = ["aaaaaaaaa", "b"]
+        msg = r"CategoricalIndex\.format is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            assert idx.format() == expected
+    @pytest.mark.xfail(using_string_dtype(), reason="repr different")
+    def test_string_categorical_index_repr(self):
+        # short
+        idx = CategoricalIndex(["a", "bb", "ccc"])
+        expected = """CategoricalIndex(['a', 'bb', 'ccc'], categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')"""  # noqa: E501
+        assert repr(idx) == expected
+        # multiple lines
+        idx = CategoricalIndex(["a", "bb", "ccc"] * 10)
+        expected = """CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',
+                  'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb',
+                  'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],
+                 categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')"""  # noqa: E501
+        assert repr(idx) == expected
+        # truncated
+        idx = CategoricalIndex(["a", "bb", "ccc"] * 100)
+        expected = """CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',
+                  ...
+                  'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],
+                 categories=['a', 'bb', 'ccc'], ordered=False, dtype='category', length=300)"""  # noqa: E501
+        assert repr(idx) == expected
+        # larger categories
+        idx = CategoricalIndex(list("abcdefghijklmmo"))
+        expected = """CategoricalIndex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
+                  'm', 'm', 'o'],
+                 categories=['a', 'b', 'c', 'd', ..., 'k', 'l', 'm', 'o'], ordered=False, dtype='category')"""  # noqa: E501
+        assert repr(idx) == expected
+        # short
+        idx = CategoricalIndex(["あ", "いい", "ううう"])
+        expected = """CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')"""  # noqa: E501
+        assert repr(idx) == expected
+        # multiple lines
+        idx = CategoricalIndex(["あ", "いい", "ううう"] * 10)
+        expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ',
+                  'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
+                  'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'],
+                 categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')"""  # noqa: E501
+        assert repr(idx) == expected
+        # truncated
+        idx = CategoricalIndex(["あ", "いい", "ううう"] * 100)
+        expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ',
+                  ...
+                  'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'],
+                 categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)"""  # noqa: E501
+        assert repr(idx) == expected
+        # larger categories
+        idx = CategoricalIndex(list("あいうえおかきくけこさしすせそ"))
+        expected = """CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', 'さ', 'し',
+                  'す', 'せ', 'そ'],
+                 categories=['あ', 'い', 'う', 'え', ..., 'し', 'す', 'せ', 'そ'], ordered=False, dtype='category')"""  # noqa: E501
+        assert repr(idx) == expected
+        # Enable Unicode option -----------------------------------------
+        with cf.option_context("display.unicode.east_asian_width", True):
+            # short
+            idx = CategoricalIndex(["あ", "いい", "ううう"])
+            expected = """CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')"""  # noqa: E501
+            assert repr(idx) == expected
+            # multiple lines
+            idx = CategoricalIndex(["あ", "いい", "ううう"] * 10)
+            expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
+                  'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',
+                  'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
+                  'う��う', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'],
+                 categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')"""  # noqa: E501
+            assert repr(idx) == expected
+            # truncated
+            idx = CategoricalIndex(["あ", "いい", "ううう"] * 100)
+            expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
+                  'ううう', 'あ',
+                  ...
+                  'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',
+                  'あ', 'いい', 'ううう'],
+                 categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)"""  # noqa: E501
+            assert repr(idx) == expected
+            # larger categories
+            idx = CategoricalIndex(list("あいうえおかきくけこさしすせそ"))
+            expected = """CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ',
+                  'さ', 'し', 'す', 'せ', 'そ'],
+                 categories=['あ', 'い', 'う', 'え', ..., 'し', 'す', 'せ', 'そ'], ordered=False, dtype='category')"""  # noqa: E501
+            assert repr(idx) == expected

py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_indexing.py ADDED Viewed

	@@ -0,0 +1,420 @@

+import numpy as np
+import pytest
+from pandas.errors import InvalidIndexError
+import pandas as pd
+from pandas import (
+    CategoricalIndex,
+    Index,
+    IntervalIndex,
+    Timestamp,
+)
+import pandas._testing as tm
+class TestTake:
+    def test_take_fill_value(self):
+        # GH 12631
+        # numeric category
+        idx = CategoricalIndex([1, 2, 3], name="xxx")
+        result = idx.take(np.array([1, 0, -1]))
+        expected = CategoricalIndex([2, 1, 3], name="xxx")
+        tm.assert_index_equal(result, expected)
+        tm.assert_categorical_equal(result.values, expected.values)
+        # fill_value
+        result = idx.take(np.array([1, 0, -1]), fill_value=True)
+        expected = CategoricalIndex([2, 1, np.nan], categories=[1, 2, 3], name="xxx")
+        tm.assert_index_equal(result, expected)
+        tm.assert_categorical_equal(result.values, expected.values)
+        # allow_fill=False
+        result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
+        expected = CategoricalIndex([2, 1, 3], name="xxx")
+        tm.assert_index_equal(result, expected)
+        tm.assert_categorical_equal(result.values, expected.values)
+        # object category
+        idx = CategoricalIndex(
+            list("CBA"), categories=list("ABC"), ordered=True, name="xxx"
+        )
+        result = idx.take(np.array([1, 0, -1]))
+        expected = CategoricalIndex(
+            list("BCA"), categories=list("ABC"), ordered=True, name="xxx"
+        )
+        tm.assert_index_equal(result, expected)
+        tm.assert_categorical_equal(result.values, expected.values)
+        # fill_value
+        result = idx.take(np.array([1, 0, -1]), fill_value=True)
+        expected = CategoricalIndex(
+            ["B", "C", np.nan], categories=list("ABC"), ordered=True, name="xxx"
+        )
+        tm.assert_index_equal(result, expected)
+        tm.assert_categorical_equal(result.values, expected.values)
+        # allow_fill=False
+        result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
+        expected = CategoricalIndex(
+            list("BCA"), categories=list("ABC"), ordered=True, name="xxx"
+        )
+        tm.assert_index_equal(result, expected)
+        tm.assert_categorical_equal(result.values, expected.values)
+        msg = (
+            "When allow_fill=True and fill_value is not None, "
+            "all indices must be >= -1"
+        )
+        with pytest.raises(ValueError, match=msg):
+            idx.take(np.array([1, 0, -2]), fill_value=True)
+        with pytest.raises(ValueError, match=msg):
+            idx.take(np.array([1, 0, -5]), fill_value=True)
+        msg = "index -5 is out of bounds for (axis 0 with )?size 3"
+        with pytest.raises(IndexError, match=msg):
+            idx.take(np.array([1, -5]))
+    def test_take_fill_value_datetime(self):
+        # datetime category
+        idx = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx")
+        idx = CategoricalIndex(idx)
+        result = idx.take(np.array([1, 0, -1]))
+        expected = pd.DatetimeIndex(
+            ["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx"
+        )
+        expected = CategoricalIndex(expected)
+        tm.assert_index_equal(result, expected)
+        # fill_value
+        result = idx.take(np.array([1, 0, -1]), fill_value=True)
+        expected = pd.DatetimeIndex(["2011-02-01", "2011-01-01", "NaT"], name="xxx")
+        exp_cats = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"])
+        expected = CategoricalIndex(expected, categories=exp_cats)
+        tm.assert_index_equal(result, expected)
+        # allow_fill=False
+        result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
+        expected = pd.DatetimeIndex(
+            ["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx"
+        )
+        expected = CategoricalIndex(expected)
+        tm.assert_index_equal(result, expected)
+        msg = (
+            "When allow_fill=True and fill_value is not None, "
+            "all indices must be >= -1"
+        )
+        with pytest.raises(ValueError, match=msg):
+            idx.take(np.array([1, 0, -2]), fill_value=True)
+        with pytest.raises(ValueError, match=msg):
+            idx.take(np.array([1, 0, -5]), fill_value=True)
+        msg = "index -5 is out of bounds for (axis 0 with )?size 3"
+        with pytest.raises(IndexError, match=msg):
+            idx.take(np.array([1, -5]))
+    def test_take_invalid_kwargs(self):
+        idx = CategoricalIndex([1, 2, 3], name="foo")
+        indices = [1, 0, -1]
+        msg = r"take\(\) got an unexpected keyword argument 'foo'"
+        with pytest.raises(TypeError, match=msg):
+            idx.take(indices, foo=2)
+        msg = "the 'out' parameter is not supported"
+        with pytest.raises(ValueError, match=msg):
+            idx.take(indices, out=indices)
+        msg = "the 'mode' parameter is not supported"
+        with pytest.raises(ValueError, match=msg):
+            idx.take(indices, mode="clip")
+class TestGetLoc:
+    def test_get_loc(self):
+        # GH 12531
+        cidx1 = CategoricalIndex(list("abcde"), categories=list("edabc"))
+        idx1 = Index(list("abcde"))
+        assert cidx1.get_loc("a") == idx1.get_loc("a")
+        assert cidx1.get_loc("e") == idx1.get_loc("e")
+        for i in [cidx1, idx1]:
+            with pytest.raises(KeyError, match="'NOT-EXIST'"):
+                i.get_loc("NOT-EXIST")
+        # non-unique
+        cidx2 = CategoricalIndex(list("aacded"), categories=list("edabc"))
+        idx2 = Index(list("aacded"))
+        # results in bool array
+        res = cidx2.get_loc("d")
+        tm.assert_numpy_array_equal(res, idx2.get_loc("d"))
+        tm.assert_numpy_array_equal(
+            res, np.array([False, False, False, True, False, True])
+        )
+        # unique element results in scalar
+        res = cidx2.get_loc("e")
+        assert res == idx2.get_loc("e")
+        assert res == 4
+        for i in [cidx2, idx2]:
+            with pytest.raises(KeyError, match="'NOT-EXIST'"):
+                i.get_loc("NOT-EXIST")
+        # non-unique, sliceable
+        cidx3 = CategoricalIndex(list("aabbb"), categories=list("abc"))
+        idx3 = Index(list("aabbb"))
+        # results in slice
+        res = cidx3.get_loc("a")
+        assert res == idx3.get_loc("a")
+        assert res == slice(0, 2, None)
+        res = cidx3.get_loc("b")
+        assert res == idx3.get_loc("b")
+        assert res == slice(2, 5, None)
+        for i in [cidx3, idx3]:
+            with pytest.raises(KeyError, match="'c'"):
+                i.get_loc("c")
+    def test_get_loc_unique(self):
+        cidx = CategoricalIndex(list("abc"))
+        result = cidx.get_loc("b")
+        assert result == 1
+    def test_get_loc_monotonic_nonunique(self):
+        cidx = CategoricalIndex(list("abbc"))
+        result = cidx.get_loc("b")
+        expected = slice(1, 3, None)
+        assert result == expected
+    def test_get_loc_nonmonotonic_nonunique(self):
+        cidx = CategoricalIndex(list("abcb"))
+        result = cidx.get_loc("b")
+        expected = np.array([False, True, False, True], dtype=bool)
+        tm.assert_numpy_array_equal(result, expected)
+    def test_get_loc_nan(self):
+        # GH#41933
+        ci = CategoricalIndex(["A", "B", np.nan])
+        res = ci.get_loc(np.nan)
+        assert res == 2
+class TestGetIndexer:
+    def test_get_indexer_base(self):
+        # Determined by cat ordering.
+        idx = CategoricalIndex(list("cab"), categories=list("cab"))
+        expected = np.arange(len(idx), dtype=np.intp)
+        actual = idx.get_indexer(idx)
+        tm.assert_numpy_array_equal(expected, actual)
+        with pytest.raises(ValueError, match="Invalid fill method"):
+            idx.get_indexer(idx, method="invalid")
+    def test_get_indexer_requires_unique(self):
+        ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
+        oidx = Index(np.array(ci))
+        msg = "Reindexing only valid with uniquely valued Index objects"
+        for n in [1, 2, 5, len(ci)]:
+            finder = oidx[np.random.default_rng(2).integers(0, len(ci), size=n)]
+            with pytest.raises(InvalidIndexError, match=msg):
+                ci.get_indexer(finder)
+        # see gh-17323
+        #
+        # Even when indexer is equal to the
+        # members in the index, we should
+        # respect duplicates instead of taking
+        # the fast-track path.
+        for finder in [list("aabbca"), list("aababca")]:
+            with pytest.raises(InvalidIndexError, match=msg):
+                ci.get_indexer(finder)
+    def test_get_indexer_non_unique(self):
+        idx1 = CategoricalIndex(list("aabcde"), categories=list("edabc"))
+        idx2 = CategoricalIndex(list("abf"))
+        for indexer in [idx2, list("abf"), Index(list("abf"))]:
+            msg = "Reindexing only valid with uniquely valued Index objects"
+            with pytest.raises(InvalidIndexError, match=msg):
+                idx1.get_indexer(indexer)
+            r1, _ = idx1.get_indexer_non_unique(indexer)
+            expected = np.array([0, 1, 2, -1], dtype=np.intp)
+            tm.assert_almost_equal(r1, expected)
+    def test_get_indexer_method(self):
+        idx1 = CategoricalIndex(list("aabcde"), categories=list("edabc"))
+        idx2 = CategoricalIndex(list("abf"))
+        msg = "method pad not yet implemented for CategoricalIndex"
+        with pytest.raises(NotImplementedError, match=msg):
+            idx2.get_indexer(idx1, method="pad")
+        msg = "method backfill not yet implemented for CategoricalIndex"
+        with pytest.raises(NotImplementedError, match=msg):
+            idx2.get_indexer(idx1, method="backfill")
+        msg = "method nearest not yet implemented for CategoricalIndex"
+        with pytest.raises(NotImplementedError, match=msg):
+            idx2.get_indexer(idx1, method="nearest")
+    def test_get_indexer_array(self):
+        arr = np.array(
+            [Timestamp("1999-12-31 00:00:00"), Timestamp("2000-12-31 00:00:00")],
+            dtype=object,
+        )
+        cats = [Timestamp("1999-12-31 00:00:00"), Timestamp("2000-12-31 00:00:00")]
+        ci = CategoricalIndex(cats, categories=cats, ordered=False, dtype="category")
+        result = ci.get_indexer(arr)
+        expected = np.array([0, 1], dtype="intp")
+        tm.assert_numpy_array_equal(result, expected)
+    def test_get_indexer_same_categories_same_order(self):
+        ci = CategoricalIndex(["a", "b"], categories=["a", "b"])
+        result = ci.get_indexer(CategoricalIndex(["b", "b"], categories=["a", "b"]))
+        expected = np.array([1, 1], dtype="intp")
+        tm.assert_numpy_array_equal(result, expected)
+    def test_get_indexer_same_categories_different_order(self):
+        # https://github.com/pandas-dev/pandas/issues/19551
+        ci = CategoricalIndex(["a", "b"], categories=["a", "b"])
+        result = ci.get_indexer(CategoricalIndex(["b", "b"], categories=["b", "a"]))
+        expected = np.array([1, 1], dtype="intp")
+        tm.assert_numpy_array_equal(result, expected)
+    def test_get_indexer_nans_in_index_and_target(self):
+        # GH 45361
+        ci = CategoricalIndex([1, 2, np.nan, 3])
+        other1 = [2, 3, 4, np.nan]
+        res1 = ci.get_indexer(other1)
+        expected1 = np.array([1, 3, -1, 2], dtype=np.intp)
+        tm.assert_numpy_array_equal(res1, expected1)
+        other2 = [1, 4, 2, 3]
+        res2 = ci.get_indexer(other2)
+        expected2 = np.array([0, -1, 1, 3], dtype=np.intp)
+        tm.assert_numpy_array_equal(res2, expected2)
+class TestWhere:
+    def test_where(self, listlike_box):
+        klass = listlike_box
+        i = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
+        cond = [True] * len(i)
+        expected = i
+        result = i.where(klass(cond))
+        tm.assert_index_equal(result, expected)
+        cond = [False] + [True] * (len(i) - 1)
+        expected = CategoricalIndex([np.nan] + i[1:].tolist(), categories=i.categories)
+        result = i.where(klass(cond))
+        tm.assert_index_equal(result, expected)
+    def test_where_non_categories(self):
+        ci = CategoricalIndex(["a", "b", "c", "d"])
+        mask = np.array([True, False, True, False])
+        result = ci.where(mask, 2)
+        expected = Index(["a", 2, "c", 2], dtype=object)
+        tm.assert_index_equal(result, expected)
+        msg = "Cannot setitem on a Categorical with a new category"
+        with pytest.raises(TypeError, match=msg):
+            # Test the Categorical method directly
+            ci._data._where(mask, 2)
+class TestContains:
+    def test_contains(self):
+        ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"), ordered=False)
+        assert "a" in ci
+        assert "z" not in ci
+        assert "e" not in ci
+        assert np.nan not in ci
+        # assert codes NOT in index
+        assert 0 not in ci
+        assert 1 not in ci
+    def test_contains_nan(self):
+        ci = CategoricalIndex(list("aabbca") + [np.nan], categories=list("cabdef"))
+        assert np.nan in ci
+    @pytest.mark.parametrize("unwrap", [True, False])
+    def test_contains_na_dtype(self, unwrap):
+        dti = pd.date_range("2016-01-01", periods=100).insert(0, pd.NaT)
+        pi = dti.to_period("D")
+        tdi = dti - dti[-1]
+        ci = CategoricalIndex(dti)
+        obj = ci
+        if unwrap:
+            obj = ci._data
+        assert np.nan in obj
+        assert None in obj
+        assert pd.NaT in obj
+        assert np.datetime64("NaT") in obj
+        assert np.timedelta64("NaT") not in obj
+        obj2 = CategoricalIndex(tdi)
+        if unwrap:
+            obj2 = obj2._data
+        assert np.nan in obj2
+        assert None in obj2
+        assert pd.NaT in obj2
+        assert np.datetime64("NaT") not in obj2
+        assert np.timedelta64("NaT") in obj2
+        obj3 = CategoricalIndex(pi)
+        if unwrap:
+            obj3 = obj3._data
+        assert np.nan in obj3
+        assert None in obj3
+        assert pd.NaT in obj3
+        assert np.datetime64("NaT") not in obj3
+        assert np.timedelta64("NaT") not in obj3
+    @pytest.mark.parametrize(
+        "item, expected",
+        [
+            (pd.Interval(0, 1), True),
+            (1.5, True),
+            (pd.Interval(0.5, 1.5), False),
+            ("a", False),
+            (Timestamp(1), False),
+            (pd.Timedelta(1), False),
+        ],
+        ids=str,
+    )
+    def test_contains_interval(self, item, expected):
+        # GH 23705
+        ci = CategoricalIndex(IntervalIndex.from_breaks(range(3)))
+        result = item in ci
+        assert result is expected
+    def test_contains_list(self):
+        # GH#21729
+        idx = CategoricalIndex([1, 2, 3])
+        assert "a" not in idx
+        with pytest.raises(TypeError, match="unhashable type"):
+            ["a"] in idx
+        with pytest.raises(TypeError, match="unhashable type"):
+            ["a", "b"] in idx

py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_map.py ADDED Viewed

	@@ -0,0 +1,144 @@

+import numpy as np
+import pytest
+import pandas as pd
+from pandas import (
+    CategoricalIndex,
+    Index,
+    Series,
+)
+import pandas._testing as tm
+@pytest.mark.parametrize(
+    "data, categories",
+    [
+        (list("abcbca"), list("cab")),
+        (pd.interval_range(0, 3).repeat(3), pd.interval_range(0, 3)),
+    ],
+    ids=["string", "interval"],
+)
+def test_map_str(data, categories, ordered):
+    # GH 31202 - override base class since we want to maintain categorical/ordered
+    index = CategoricalIndex(data, categories=categories, ordered=ordered)
+    result = index.map(str)
+    expected = CategoricalIndex(
+        map(str, data), categories=map(str, categories), ordered=ordered
+    )
+    tm.assert_index_equal(result, expected)
+def test_map():
+    ci = CategoricalIndex(list("ABABC"), categories=list("CBA"), ordered=True)
+    result = ci.map(lambda x: x.lower())
+    exp = CategoricalIndex(list("ababc"), categories=list("cba"), ordered=True)
+    tm.assert_index_equal(result, exp)
+    ci = CategoricalIndex(
+        list("ABABC"), categories=list("BAC"), ordered=False, name="XXX"
+    )
+    result = ci.map(lambda x: x.lower())
+    exp = CategoricalIndex(
+        list("ababc"), categories=list("bac"), ordered=False, name="XXX"
+    )
+    tm.assert_index_equal(result, exp)
+    # GH 12766: Return an index not an array
+    tm.assert_index_equal(
+        ci.map(lambda x: 1), Index(np.array([1] * 5, dtype=np.int64), name="XXX")
+    )
+    # change categories dtype
+    ci = CategoricalIndex(list("ABABC"), categories=list("BAC"), ordered=False)
+    def f(x):
+        return {"A": 10, "B": 20, "C": 30}.get(x)
+    result = ci.map(f)
+    exp = CategoricalIndex([10, 20, 10, 20, 30], categories=[20, 10, 30], ordered=False)
+    tm.assert_index_equal(result, exp)
+    result = ci.map(Series([10, 20, 30], index=["A", "B", "C"]))
+    tm.assert_index_equal(result, exp)
+    result = ci.map({"A": 10, "B": 20, "C": 30})
+    tm.assert_index_equal(result, exp)
+def test_map_with_categorical_series():
+    # GH 12756
+    a = Index([1, 2, 3, 4])
+    b = Series(["even", "odd", "even", "odd"], dtype="category")
+    c = Series(["even", "odd", "even", "odd"])
+    exp = CategoricalIndex(["odd", "even", "odd", np.nan])
+    tm.assert_index_equal(a.map(b), exp)
+    exp = Index(["odd", "even", "odd", np.nan])
+    tm.assert_index_equal(a.map(c), exp)
+@pytest.mark.parametrize(
+    ("data", "f", "expected"),
+    (
+        ([1, 1, np.nan], pd.isna, CategoricalIndex([False, False, np.nan])),
+        ([1, 2, np.nan], pd.isna, Index([False, False, np.nan])),
+        ([1, 1, np.nan], {1: False}, CategoricalIndex([False, False, np.nan])),
+        ([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])),
+        (
+            [1, 1, np.nan],
+            Series([False, False]),
+            CategoricalIndex([False, False, np.nan]),
+        ),
+        (
+            [1, 2, np.nan],
+            Series([False, False, False]),
+            Index([False, False, np.nan]),
+        ),
+    ),
+)
+def test_map_with_nan_ignore(data, f, expected):  # GH 24241
+    values = CategoricalIndex(data)
+    result = values.map(f, na_action="ignore")
+    tm.assert_index_equal(result, expected)
+@pytest.mark.parametrize(
+    ("data", "f", "expected"),
+    (
+        ([1, 1, np.nan], pd.isna, Index([False, False, True])),
+        ([1, 2, np.nan], pd.isna, Index([False, False, True])),
+        ([1, 1, np.nan], {1: False}, CategoricalIndex([False, False, np.nan])),
+        ([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])),
+        (
+            [1, 1, np.nan],
+            Series([False, False]),
+            CategoricalIndex([False, False, np.nan]),
+        ),
+        (
+            [1, 2, np.nan],
+            Series([False, False, False]),
+            Index([False, False, np.nan]),
+        ),
+    ),
+)
+def test_map_with_nan_none(data, f, expected):  # GH 24241
+    values = CategoricalIndex(data)
+    result = values.map(f, na_action=None)
+    tm.assert_index_equal(result, expected)
+def test_map_with_dict_or_series():
+    orig_values = ["a", "B", 1, "a"]
+    new_values = ["one", 2, 3.0, "one"]
+    cur_index = CategoricalIndex(orig_values, name="XXX")
+    expected = CategoricalIndex(new_values, name="XXX", categories=[3.0, 2, "one"])
+    mapper = Series(new_values[:-1], index=orig_values[:-1])
+    result = cur_index.map(mapper)
+    # Order of categories in result can be different
+    tm.assert_index_equal(result, expected)
+    mapper = dict(zip(orig_values[:-1], new_values[:-1]))
+    result = cur_index.map(mapper)
+    # Order of categories in result can be different
+    tm.assert_index_equal(result, expected)

py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_reindex.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import numpy as np
+import pytest
+from pandas import (
+    Categorical,
+    CategoricalIndex,
+    Index,
+    Interval,
+)
+import pandas._testing as tm
+class TestReindex:
+    def test_reindex_list_non_unique(self):
+        # GH#11586
+        msg = "cannot reindex on an axis with duplicate labels"
+        ci = CategoricalIndex(["a", "b", "c", "a"])
+        with pytest.raises(ValueError, match=msg):
+            ci.reindex(["a", "c"])
+    def test_reindex_categorical_non_unique(self):
+        msg = "cannot reindex on an axis with duplicate labels"
+        ci = CategoricalIndex(["a", "b", "c", "a"])
+        with pytest.raises(ValueError, match=msg):
+            ci.reindex(Categorical(["a", "c"]))
+    def test_reindex_list_non_unique_unused_category(self):
+        msg = "cannot reindex on an axis with duplicate labels"
+        ci = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
+        with pytest.raises(ValueError, match=msg):
+            ci.reindex(["a", "c"])
+    def test_reindex_categorical_non_unique_unused_category(self):
+        msg = "cannot reindex on an axis with duplicate labels"
+        ci = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
+        with pytest.raises(ValueError, match=msg):
+            ci.reindex(Categorical(["a", "c"]))
+    def test_reindex_duplicate_target(self):
+        # See GH25459
+        cat = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c", "d"])
+        res, indexer = cat.reindex(["a", "c", "c"])
+        exp = Index(["a", "c", "c"])
+        tm.assert_index_equal(res, exp, exact=True)
+        tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp))
+        res, indexer = cat.reindex(
+            CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"])
+        )
+        exp = CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"])
+        tm.assert_index_equal(res, exp, exact=True)
+        tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp))
+    def test_reindex_empty_index(self):
+        # See GH16770
+        c = CategoricalIndex([])
+        res, indexer = c.reindex(["a", "b"])
+        tm.assert_index_equal(res, Index(["a", "b"]), exact=True)
+        tm.assert_numpy_array_equal(indexer, np.array([-1, -1], dtype=np.intp))
+    def test_reindex_categorical_added_category(self):
+        # GH 42424
+        ci = CategoricalIndex(
+            [Interval(0, 1, closed="right"), Interval(1, 2, closed="right")],
+            ordered=True,
+        )
+        ci_add = CategoricalIndex(
+            [
+                Interval(0, 1, closed="right"),
+                Interval(1, 2, closed="right"),
+                Interval(2, 3, closed="right"),
+                Interval(3, 4, closed="right"),
+            ],
+            ordered=True,
+        )
+        result, _ = ci.reindex(ci_add)
+        expected = ci_add
+        tm.assert_index_equal(expected, result)

py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_setops.py ADDED Viewed

	@@ -0,0 +1,18 @@

+import numpy as np
+import pytest
+from pandas import (
+    CategoricalIndex,
+    Index,
+)
+import pandas._testing as tm
+@pytest.mark.parametrize("na_value", [None, np.nan])
+def test_difference_with_na(na_value):
+    # GH 57318
+    ci = CategoricalIndex(["a", "b", "c", None])
+    other = Index(["c", na_value])
+    result = ci.difference(other)
+    expected = CategoricalIndex(["a", "b"], categories=["a", "b", "c"])
+    tm.assert_index_equal(result, expected)

py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/__init__.py ADDED Viewed

File without changes

py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_arithmetic.py ADDED Viewed

	@@ -0,0 +1,56 @@

+# Arithmetic tests specific to DatetimeIndex are generally about `freq`
+#  rentention or inference.  Other arithmetic tests belong in
+#  tests/arithmetic/test_datetime64.py
+import pytest
+from pandas import (
+    Timedelta,
+    TimedeltaIndex,
+    Timestamp,
+    date_range,
+    timedelta_range,
+)
+import pandas._testing as tm
+class TestDatetimeIndexArithmetic:
+    def test_add_timedelta_preserves_freq(self):
+        # GH#37295 should hold for any DTI with freq=None or Tick freq
+        tz = "Canada/Eastern"
+        dti = date_range(
+            start=Timestamp("2019-03-26 00:00:00-0400", tz=tz),
+            end=Timestamp("2020-10-17 00:00:00-0400", tz=tz),
+            freq="D",
+        )
+        result = dti + Timedelta(days=1)
+        assert result.freq == dti.freq
+    def test_sub_datetime_preserves_freq(self, tz_naive_fixture):
+        # GH#48818
+        dti = date_range("2016-01-01", periods=12, tz=tz_naive_fixture)
+        res = dti - dti[0]
+        expected = timedelta_range("0 Days", "11 Days")
+        tm.assert_index_equal(res, expected)
+        assert res.freq == expected.freq
+    @pytest.mark.xfail(
+        reason="The inherited freq is incorrect bc dti.freq is incorrect "
+        "https://github.com/pandas-dev/pandas/pull/48818/files#r982793461"
+    )
+    def test_sub_datetime_preserves_freq_across_dst(self):
+        # GH#48818
+        ts = Timestamp("2016-03-11", tz="US/Pacific")
+        dti = date_range(ts, periods=4)
+        res = dti - dti[0]
+        expected = TimedeltaIndex(
+            [
+                Timedelta(days=0),
+                Timedelta(days=1),
+                Timedelta(days=2),
+                Timedelta(days=2, hours=23),
+            ]
+        )
+        tm.assert_index_equal(res, expected)
+        assert res.freq == expected.freq

py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_constructors.py ADDED Viewed

	@@ -0,0 +1,1204 @@

+from __future__ import annotations
+from datetime import (
+    datetime,
+    timedelta,
+    timezone,
+)
+from functools import partial
+from operator import attrgetter
+import dateutil
+import dateutil.tz
+from dateutil.tz import gettz
+import numpy as np
+import pytest
+import pytz
+from pandas._libs.tslibs import (
+    OutOfBoundsDatetime,
+    astype_overflowsafe,
+    timezones,
+)
+import pandas as pd
+from pandas import (
+    DatetimeIndex,
+    Index,
+    Timestamp,
+    date_range,
+    offsets,
+    to_datetime,
+)
+import pandas._testing as tm
+from pandas.core.arrays import period_array
+class TestDatetimeIndex:
+    def test_closed_deprecated(self):
+        # GH#52628
+        msg = "The 'closed' keyword"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            DatetimeIndex([], closed=True)
+    def test_normalize_deprecated(self):
+        # GH#52628
+        msg = "The 'normalize' keyword"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            DatetimeIndex([], normalize=True)
+    def test_from_dt64_unsupported_unit(self):
+        # GH#49292
+        val = np.datetime64(1, "D")
+        result = DatetimeIndex([val], tz="US/Pacific")
+        expected = DatetimeIndex([val.astype("M8[s]")], tz="US/Pacific")
+        tm.assert_index_equal(result, expected)
+    def test_explicit_tz_none(self):
+        # GH#48659
+        dti = date_range("2016-01-01", periods=10, tz="UTC")
+        msg = "Passed data is timezone-aware, incompatible with 'tz=None'"
+        with pytest.raises(ValueError, match=msg):
+            DatetimeIndex(dti, tz=None)
+        with pytest.raises(ValueError, match=msg):
+            DatetimeIndex(np.array(dti), tz=None)
+        msg = "Cannot pass both a timezone-aware dtype and tz=None"
+        with pytest.raises(ValueError, match=msg):
+            DatetimeIndex([], dtype="M8[ns, UTC]", tz=None)
+    def test_freq_validation_with_nat(self):
+        # GH#11587 make sure we get a useful error message when generate_range
+        #  raises
+        msg = (
+            "Inferred frequency None from passed values does not conform "
+            "to passed frequency D"
+        )
+        with pytest.raises(ValueError, match=msg):
+            DatetimeIndex([pd.NaT, Timestamp("2011-01-01")], freq="D")
+        with pytest.raises(ValueError, match=msg):
+            DatetimeIndex([pd.NaT, Timestamp("2011-01-01")._value], freq="D")
+    # TODO: better place for tests shared by DTI/TDI?
+    @pytest.mark.parametrize(
+        "index",
+        [
+            date_range("2016-01-01", periods=5, tz="US/Pacific"),
+            pd.timedelta_range("1 Day", periods=5),
+        ],
+    )
+    def test_shallow_copy_inherits_array_freq(self, index):
+        # If we pass a DTA/TDA to shallow_copy and dont specify a freq,
+        #  we should inherit the array's freq, not our own.
+        array = index._data
+        arr = array[[0, 3, 2, 4, 1]]
+        assert arr.freq is None
+        result = index._shallow_copy(arr)
+        assert result.freq is None
+    def test_categorical_preserves_tz(self):
+        # GH#18664 retain tz when going DTI-->Categorical-->DTI
+        dti = DatetimeIndex(
+            [pd.NaT, "2015-01-01", "1999-04-06 15:14:13", "2015-01-01"], tz="US/Eastern"
+        )
+        for dtobj in [dti, dti._data]:
+            # works for DatetimeIndex or DatetimeArray
+            ci = pd.CategoricalIndex(dtobj)
+            carr = pd.Categorical(dtobj)
+            cser = pd.Series(ci)
+            for obj in [ci, carr, cser]:
+                result = DatetimeIndex(obj)
+                tm.assert_index_equal(result, dti)
+    def test_dti_with_period_data_raises(self):
+        # GH#23675
+        data = pd.PeriodIndex(["2016Q1", "2016Q2"], freq="Q")
+        with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
+            DatetimeIndex(data)
+        with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
+            to_datetime(data)
+        with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
+            DatetimeIndex(period_array(data))
+        with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
+            to_datetime(period_array(data))
+    def test_dti_with_timedelta64_data_raises(self):
+        # GH#23675 deprecated, enforrced in GH#29794
+        data = np.array([0], dtype="m8[ns]")
+        msg = r"timedelta64\[ns\] cannot be converted to datetime64"
+        with pytest.raises(TypeError, match=msg):
+            DatetimeIndex(data)
+        with pytest.raises(TypeError, match=msg):
+            to_datetime(data)
+        with pytest.raises(TypeError, match=msg):
+            DatetimeIndex(pd.TimedeltaIndex(data))
+        with pytest.raises(TypeError, match=msg):
+            to_datetime(pd.TimedeltaIndex(data))
+    def test_constructor_from_sparse_array(self):
+        # https://github.com/pandas-dev/pandas/issues/35843
+        values = [
+            Timestamp("2012-05-01T01:00:00.000000"),
+            Timestamp("2016-05-01T01:00:00.000000"),
+        ]
+        arr = pd.arrays.SparseArray(values)
+        result = Index(arr)
+        assert type(result) is Index
+        assert result.dtype == arr.dtype
+    def test_construction_caching(self):
+        df = pd.DataFrame(
+            {
+                "dt": date_range("20130101", periods=3),
+                "dttz": date_range("20130101", periods=3, tz="US/Eastern"),
+                "dt_with_null": [
+                    Timestamp("20130101"),
+                    pd.NaT,
+                    Timestamp("20130103"),
+                ],
+                "dtns": date_range("20130101", periods=3, freq="ns"),
+            }
+        )
+        assert df.dttz.dtype.tz.zone == "US/Eastern"
+    @pytest.mark.parametrize(
+        "kwargs",
+        [{"tz": "dtype.tz"}, {"dtype": "dtype"}, {"dtype": "dtype", "tz": "dtype.tz"}],
+    )
+    def test_construction_with_alt(self, kwargs, tz_aware_fixture):
+        tz = tz_aware_fixture
+        i = date_range("20130101", periods=5, freq="h", tz=tz)
+        kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()}
+        result = DatetimeIndex(i, **kwargs)
+        tm.assert_index_equal(i, result)
+    @pytest.mark.parametrize(
+        "kwargs",
+        [{"tz": "dtype.tz"}, {"dtype": "dtype"}, {"dtype": "dtype", "tz": "dtype.tz"}],
+    )
+    def test_construction_with_alt_tz_localize(self, kwargs, tz_aware_fixture):
+        tz = tz_aware_fixture
+        i = date_range("20130101", periods=5, freq="h", tz=tz)
+        i = i._with_freq(None)
+        kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()}
+        if "tz" in kwargs:
+            result = DatetimeIndex(i.asi8, tz="UTC").tz_convert(kwargs["tz"])
+            expected = DatetimeIndex(i, **kwargs)
+            tm.assert_index_equal(result, expected)
+        # localize into the provided tz
+        i2 = DatetimeIndex(i.tz_localize(None).asi8, tz="UTC")
+        expected = i.tz_localize(None).tz_localize("UTC")
+        tm.assert_index_equal(i2, expected)
+        # incompat tz/dtype
+        msg = "cannot supply both a tz and a dtype with a tz"
+        with pytest.raises(ValueError, match=msg):
+            DatetimeIndex(i.tz_localize(None).asi8, dtype=i.dtype, tz="US/Pacific")
+    def test_construction_index_with_mixed_timezones(self):
+        # gh-11488: no tz results in DatetimeIndex
+        result = Index([Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx")
+        exp = DatetimeIndex(
+            [Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx"
+        )
+        tm.assert_index_equal(result, exp, exact=True)
+        assert isinstance(result, DatetimeIndex)
+        assert result.tz is None
+        # same tz results in DatetimeIndex
+        result = Index(
+            [
+                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
+                Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"),
+            ],
+            name="idx",
+        )
+        exp = DatetimeIndex(
+            [Timestamp("2011-01-01 10:00"), Timestamp("2011-01-02 10:00")],
+            tz="Asia/Tokyo",
+            name="idx",
+        )
+        tm.assert_index_equal(result, exp, exact=True)
+        assert isinstance(result, DatetimeIndex)
+        assert result.tz is not None
+        assert result.tz == exp.tz
+        # same tz results in DatetimeIndex (DST)
+        result = Index(
+            [
+                Timestamp("2011-01-01 10:00", tz="US/Eastern"),
+                Timestamp("2011-08-01 10:00", tz="US/Eastern"),
+            ],
+            name="idx",
+        )
+        exp = DatetimeIndex(
+            [Timestamp("2011-01-01 10:00"), Timestamp("2011-08-01 10:00")],
+            tz="US/Eastern",
+            name="idx",
+        )
+        tm.assert_index_equal(result, exp, exact=True)
+        assert isinstance(result, DatetimeIndex)
+        assert result.tz is not None
+        assert result.tz == exp.tz
+        # Different tz results in Index(dtype=object)
+        result = Index(
+            [
+                Timestamp("2011-01-01 10:00"),
+                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
+            ],
+            name="idx",
+        )
+        exp = Index(
+            [
+                Timestamp("2011-01-01 10:00"),
+                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
+            ],
+            dtype="object",
+            name="idx",
+        )
+        tm.assert_index_equal(result, exp, exact=True)
+        assert not isinstance(result, DatetimeIndex)
+        result = Index(
+            [
+                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
+                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
+            ],
+            name="idx",
+        )
+        exp = Index(
+            [
+                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
+                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
+            ],
+            dtype="object",
+            name="idx",
+        )
+        tm.assert_index_equal(result, exp, exact=True)
+        assert not isinstance(result, DatetimeIndex)
+        msg = "DatetimeIndex has mixed timezones"
+        msg_depr = "parsing datetimes with mixed time zones will raise an error"
+        with pytest.raises(TypeError, match=msg):
+            with tm.assert_produces_warning(FutureWarning, match=msg_depr):
+                DatetimeIndex(["2013-11-02 22:00-05:00", "2013-11-03 22:00-06:00"])
+        # length = 1
+        result = Index([Timestamp("2011-01-01")], name="idx")
+        exp = DatetimeIndex([Timestamp("2011-01-01")], name="idx")
+        tm.assert_index_equal(result, exp, exact=True)
+        assert isinstance(result, DatetimeIndex)
+        assert result.tz is None
+        # length = 1 with tz
+        result = Index([Timestamp("2011-01-01 10:00", tz="Asia/Tokyo")], name="idx")
+        exp = DatetimeIndex(
+            [Timestamp("2011-01-01 10:00")], tz="Asia/Tokyo", name="idx"
+        )
+        tm.assert_index_equal(result, exp, exact=True)
+        assert isinstance(result, DatetimeIndex)
+        assert result.tz is not None
+        assert result.tz == exp.tz
+    def test_construction_index_with_mixed_timezones_with_NaT(self):
+        # see gh-11488
+        result = Index(
+            [pd.NaT, Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-02")],
+            name="idx",
+        )
+        exp = DatetimeIndex(
+            [pd.NaT, Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-02")],
+            name="idx",
+        )
+        tm.assert_index_equal(result, exp, exact=True)
+        assert isinstance(result, DatetimeIndex)
+        assert result.tz is None
+        # Same tz results in DatetimeIndex
+        result = Index(
+            [
+                pd.NaT,
+                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
+                pd.NaT,
+                Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"),
+            ],
+            name="idx",
+        )
+        exp = DatetimeIndex(
+            [
+                pd.NaT,
+                Timestamp("2011-01-01 10:00"),
+                pd.NaT,
+                Timestamp("2011-01-02 10:00"),
+            ],
+            tz="Asia/Tokyo",
+            name="idx",
+        )
+        tm.assert_index_equal(result, exp, exact=True)
+        assert isinstance(result, DatetimeIndex)
+        assert result.tz is not None
+        assert result.tz == exp.tz
+        # same tz results in DatetimeIndex (DST)
+        result = Index(
+            [
+                Timestamp("2011-01-01 10:00", tz="US/Eastern"),
+                pd.NaT,
+                Timestamp("2011-08-01 10:00", tz="US/Eastern"),
+            ],
+            name="idx",
+        )
+        exp = DatetimeIndex(
+            [Timestamp("2011-01-01 10:00"), pd.NaT, Timestamp("2011-08-01 10:00")],
+            tz="US/Eastern",
+            name="idx",
+        )
+        tm.assert_index_equal(result, exp, exact=True)
+        assert isinstance(result, DatetimeIndex)
+        assert result.tz is not None
+        assert result.tz == exp.tz
+        # different tz results in Index(dtype=object)
+        result = Index(
+            [
+                pd.NaT,
+                Timestamp("2011-01-01 10:00"),
+                pd.NaT,
+                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
+            ],
+            name="idx",
+        )
+        exp = Index(
+            [
+                pd.NaT,
+                Timestamp("2011-01-01 10:00"),
+                pd.NaT,
+                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
+            ],
+            dtype="object",
+            name="idx",
+        )
+        tm.assert_index_equal(result, exp, exact=True)
+        assert not isinstance(result, DatetimeIndex)
+        result = Index(
+            [
+                pd.NaT,
+                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
+                pd.NaT,
+                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
+            ],
+            name="idx",
+        )
+        exp = Index(
+            [
+                pd.NaT,
+                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
+                pd.NaT,
+                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
+            ],
+            dtype="object",
+            name="idx",
+        )
+        tm.assert_index_equal(result, exp, exact=True)
+        assert not isinstance(result, DatetimeIndex)
+        # all NaT
+        result = Index([pd.NaT, pd.NaT], name="idx")
+        exp = DatetimeIndex([pd.NaT, pd.NaT], name="idx")
+        tm.assert_index_equal(result, exp, exact=True)
+        assert isinstance(result, DatetimeIndex)
+        assert result.tz is None
+    def test_construction_dti_with_mixed_timezones(self):
+        # GH 11488 (not changed, added explicit tests)
+        # no tz results in DatetimeIndex
+        result = DatetimeIndex(
+            [Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx"
+        )
+        exp = DatetimeIndex(
+            [Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx"
+        )
+        tm.assert_index_equal(result, exp, exact=True)
+        assert isinstance(result, DatetimeIndex)
+        # same tz results in DatetimeIndex
+        result = DatetimeIndex(
+            [
+                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
+                Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"),
+            ],
+            name="idx",
+        )
+        exp = DatetimeIndex(
+            [Timestamp("2011-01-01 10:00"), Timestamp("2011-01-02 10:00")],
+            tz="Asia/Tokyo",
+            name="idx",
+        )
+        tm.assert_index_equal(result, exp, exact=True)
+        assert isinstance(result, DatetimeIndex)
+        # same tz results in DatetimeIndex (DST)
+        result = DatetimeIndex(
+            [
+                Timestamp("2011-01-01 10:00", tz="US/Eastern"),
+                Timestamp("2011-08-01 10:00", tz="US/Eastern"),
+            ],
+            name="idx",
+        )
+        exp = DatetimeIndex(
+            [Timestamp("2011-01-01 10:00"), Timestamp("2011-08-01 10:00")],
+            tz="US/Eastern",
+            name="idx",
+        )
+        tm.assert_index_equal(result, exp, exact=True)
+        assert isinstance(result, DatetimeIndex)
+        # tz mismatch affecting to tz-aware raises TypeError/ValueError
+        msg = "cannot be converted to datetime64"
+        with pytest.raises(ValueError, match=msg):
+            DatetimeIndex(
+                [
+                    Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
+                    Timestamp("2011-01-02 10:00", tz="US/Eastern"),
+                ],
+                name="idx",
+            )
+        # pre-2.0 this raised bc of awareness mismatch. in 2.0 with a tz#
+        #  specified we behave as if this was called pointwise, so
+        #  the naive Timestamp is treated as a wall time.
+        dti = DatetimeIndex(
+            [
+                Timestamp("2011-01-01 10:00"),
+                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
+            ],
+            tz="Asia/Tokyo",
+            name="idx",
+        )
+        expected = DatetimeIndex(
+            [
+                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
+                Timestamp("2011-01-02 10:00", tz="US/Eastern").tz_convert("Asia/Tokyo"),
+            ],
+            tz="Asia/Tokyo",
+            name="idx",
+        )
+        tm.assert_index_equal(dti, expected)
+        # pre-2.0 mixed-tz scalars raised even if a tz/dtype was specified.
+        #  as of 2.0 we successfully return the requested tz/dtype
+        dti = DatetimeIndex(
+            [
+                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
+                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
+            ],
+            tz="US/Eastern",
+            name="idx",
+        )
+        expected = DatetimeIndex(
+            [
+                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo").tz_convert("US/Eastern"),
+                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
+            ],
+            tz="US/Eastern",
+            name="idx",
+        )
+        tm.assert_index_equal(dti, expected)
+        # same thing but pass dtype instead of tz
+        dti = DatetimeIndex(
+            [
+                Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
+                Timestamp("2011-01-02 10:00", tz="US/Eastern"),
+            ],
+            dtype="M8[ns, US/Eastern]",
+            name="idx",
+        )
+        tm.assert_index_equal(dti, expected)
+    def test_construction_base_constructor(self):
+        arr = [Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-03")]
+        tm.assert_index_equal(Index(arr), DatetimeIndex(arr))
+        tm.assert_index_equal(Index(np.array(arr)), DatetimeIndex(np.array(arr)))
+        arr = [np.nan, pd.NaT, Timestamp("2011-01-03")]
+        tm.assert_index_equal(Index(arr), DatetimeIndex(arr))
+        tm.assert_index_equal(Index(np.array(arr)), DatetimeIndex(np.array(arr)))
+    def test_construction_outofbounds(self):
+        # GH 13663
+        dates = [
+            datetime(3000, 1, 1),
+            datetime(4000, 1, 1),
+            datetime(5000, 1, 1),
+            datetime(6000, 1, 1),
+        ]
+        exp = Index(dates, dtype=object)
+        # coerces to object
+        tm.assert_index_equal(Index(dates), exp)
+        msg = "^Out of bounds nanosecond timestamp: 3000-01-01 00:00:00, at position 0$"
+        with pytest.raises(OutOfBoundsDatetime, match=msg):
+            # can't create DatetimeIndex
+            DatetimeIndex(dates)
+    @pytest.mark.parametrize("data", [["1400-01-01"], [datetime(1400, 1, 1)]])
+    def test_dti_date_out_of_range(self, data):
+        # GH#1475
+        msg = (
+            "^Out of bounds nanosecond timestamp: "
+            "1400-01-01( 00:00:00)?, at position 0$"
+        )
+        with pytest.raises(OutOfBoundsDatetime, match=msg):
+            DatetimeIndex(data)
+    def test_construction_with_ndarray(self):
+        # GH 5152
+        dates = [datetime(2013, 10, 7), datetime(2013, 10, 8), datetime(2013, 10, 9)]
+        data = DatetimeIndex(dates, freq=offsets.BDay()).values
+        result = DatetimeIndex(data, freq=offsets.BDay())
+        expected = DatetimeIndex(["2013-10-07", "2013-10-08", "2013-10-09"], freq="B")
+        tm.assert_index_equal(result, expected)
+    def test_integer_values_and_tz_interpreted_as_utc(self):
+        # GH-24559
+        val = np.datetime64("2000-01-01 00:00:00", "ns")
+        values = np.array([val.view("i8")])
+        result = DatetimeIndex(values).tz_localize("US/Central")
+        expected = DatetimeIndex(["2000-01-01T00:00:00"], dtype="M8[ns, US/Central]")
+        tm.assert_index_equal(result, expected)
+        # but UTC is *not* deprecated.
+        with tm.assert_produces_warning(None):
+            result = DatetimeIndex(values, tz="UTC")
+        expected = DatetimeIndex(["2000-01-01T00:00:00"], dtype="M8[ns, UTC]")
+        tm.assert_index_equal(result, expected)
+    def test_constructor_coverage(self):
+        msg = r"DatetimeIndex\(\.\.\.\) must be called with a collection"
+        with pytest.raises(TypeError, match=msg):
+            DatetimeIndex("1/1/2000")
+        # generator expression
+        gen = (datetime(2000, 1, 1) + timedelta(i) for i in range(10))
+        result = DatetimeIndex(gen)
+        expected = DatetimeIndex(
+            [datetime(2000, 1, 1) + timedelta(i) for i in range(10)]
+        )
+        tm.assert_index_equal(result, expected)
+        # NumPy string array
+        strings = np.array(["2000-01-01", "2000-01-02", "2000-01-03"])
+        result = DatetimeIndex(strings)
+        expected = DatetimeIndex(strings.astype("O"))
+        tm.assert_index_equal(result, expected)
+        from_ints = DatetimeIndex(expected.asi8)
+        tm.assert_index_equal(from_ints, expected)
+        # string with NaT
+        strings = np.array(["2000-01-01", "2000-01-02", "NaT"])
+        result = DatetimeIndex(strings)
+        expected = DatetimeIndex(strings.astype("O"))
+        tm.assert_index_equal(result, expected)
+        from_ints = DatetimeIndex(expected.asi8)
+        tm.assert_index_equal(from_ints, expected)
+        # non-conforming
+        msg = (
+            "Inferred frequency None from passed values does not conform "
+            "to passed frequency D"
+        )
+        with pytest.raises(ValueError, match=msg):
+            DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-04"], freq="D")
+    @pytest.mark.parametrize("freq", ["YS", "W-SUN"])
+    def test_constructor_datetime64_tzformat(self, freq):
+        # see GH#6572: ISO 8601 format results in stdlib timezone object
+        idx = date_range(
+            "2013-01-01T00:00:00-05:00", "2016-01-01T23:59:59-05:00", freq=freq
+        )
+        expected = date_range(
+            "2013-01-01T00:00:00",
+            "2016-01-01T23:59:59",
+            freq=freq,
+            tz=timezone(timedelta(minutes=-300)),
+        )
+        tm.assert_index_equal(idx, expected)
+        # Unable to use `US/Eastern` because of DST
+        expected_i8 = date_range(
+            "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="America/Lima"
+        )
+        tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8)
+        idx = date_range(
+            "2013-01-01T00:00:00+09:00", "2016-01-01T23:59:59+09:00", freq=freq
+        )
+        expected = date_range(
+            "2013-01-01T00:00:00",
+            "2016-01-01T23:59:59",
+            freq=freq,
+            tz=timezone(timedelta(minutes=540)),
+        )
+        tm.assert_index_equal(idx, expected)
+        expected_i8 = date_range(
+            "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="Asia/Tokyo"
+        )
+        tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8)
+        # Non ISO 8601 format results in dateutil.tz.tzoffset
+        idx = date_range("2013/1/1 0:00:00-5:00", "2016/1/1 23:59:59-5:00", freq=freq)
+        expected = date_range(
+            "2013-01-01T00:00:00",
+            "2016-01-01T23:59:59",
+            freq=freq,
+            tz=timezone(timedelta(minutes=-300)),
+        )
+        tm.assert_index_equal(idx, expected)
+        # Unable to use `US/Eastern` because of DST
+        expected_i8 = date_range(
+            "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="America/Lima"
+        )
+        tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8)
+        idx = date_range("2013/1/1 0:00:00+9:00", "2016/1/1 23:59:59+09:00", freq=freq)
+        expected = date_range(
+            "2013-01-01T00:00:00",
+            "2016-01-01T23:59:59",
+            freq=freq,
+            tz=timezone(timedelta(minutes=540)),
+        )
+        tm.assert_index_equal(idx, expected)
+        expected_i8 = date_range(
+            "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="Asia/Tokyo"
+        )
+        tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8)
+    def test_constructor_dtype(self):
+        # passing a dtype with a tz should localize
+        idx = DatetimeIndex(
+            ["2013-01-01", "2013-01-02"], dtype="datetime64[ns, US/Eastern]"
+        )
+        expected = (
+            DatetimeIndex(["2013-01-01", "2013-01-02"])
+            .as_unit("ns")
+            .tz_localize("US/Eastern")
+        )
+        tm.assert_index_equal(idx, expected)
+        idx = DatetimeIndex(["2013-01-01", "2013-01-02"], tz="US/Eastern").as_unit("ns")
+        tm.assert_index_equal(idx, expected)
+    def test_constructor_dtype_tz_mismatch_raises(self):
+        # if we already have a tz and its not the same, then raise
+        idx = DatetimeIndex(
+            ["2013-01-01", "2013-01-02"], dtype="datetime64[ns, US/Eastern]"
+        )
+        msg = (
+            "cannot supply both a tz and a timezone-naive dtype "
+            r"\(i\.e\. datetime64\[ns\]\)"
+        )
+        with pytest.raises(ValueError, match=msg):
+            DatetimeIndex(idx, dtype="datetime64[ns]")
+        # this is effectively trying to convert tz's
+        msg = "data is already tz-aware US/Eastern, unable to set specified tz: CET"
+        with pytest.raises(TypeError, match=msg):
+            DatetimeIndex(idx, dtype="datetime64[ns, CET]")
+        msg = "cannot supply both a tz and a dtype with a tz"
+        with pytest.raises(ValueError, match=msg):
+            DatetimeIndex(idx, tz="CET", dtype="datetime64[ns, US/Eastern]")
+        result = DatetimeIndex(idx, dtype="datetime64[ns, US/Eastern]")
+        tm.assert_index_equal(idx, result)
+    @pytest.mark.parametrize("dtype", [object, np.int32, np.int64])
+    def test_constructor_invalid_dtype_raises(self, dtype):
+        # GH 23986
+        msg = "Unexpected value for 'dtype'"
+        with pytest.raises(ValueError, match=msg):
+            DatetimeIndex([1, 2], dtype=dtype)
+    def test_000constructor_resolution(self):
+        # 2252
+        t1 = Timestamp((1352934390 * 1000000000) + 1000000 + 1000 + 1)
+        idx = DatetimeIndex([t1])
+        assert idx.nanosecond[0] == t1.nanosecond
+    def test_disallow_setting_tz(self):
+        # GH 3746
+        dti = DatetimeIndex(["2010"], tz="UTC")
+        msg = "Cannot directly set timezone"
+        with pytest.raises(AttributeError, match=msg):
+            dti.tz = pytz.timezone("US/Pacific")
+    @pytest.mark.parametrize(
+        "tz",
+        [
+            None,
+            "America/Los_Angeles",
+            pytz.timezone("America/Los_Angeles"),
+            Timestamp("2000", tz="America/Los_Angeles").tz,
+        ],
+    )
+    def test_constructor_start_end_with_tz(self, tz):
+        # GH 18595
+        start = Timestamp("2013-01-01 06:00:00", tz="America/Los_Angeles")
+        end = Timestamp("2013-01-02 06:00:00", tz="America/Los_Angeles")
+        result = date_range(freq="D", start=start, end=end, tz=tz)
+        expected = DatetimeIndex(
+            ["2013-01-01 06:00:00", "2013-01-02 06:00:00"],
+            dtype="M8[ns, America/Los_Angeles]",
+            freq="D",
+        )
+        tm.assert_index_equal(result, expected)
+        # Especially assert that the timezone is consistent for pytz
+        assert pytz.timezone("America/Los_Angeles") is result.tz
+    @pytest.mark.parametrize("tz", ["US/Pacific", "US/Eastern", "Asia/Tokyo"])
+    def test_constructor_with_non_normalized_pytz(self, tz):
+        # GH 18595
+        non_norm_tz = Timestamp("2010", tz=tz).tz
+        result = DatetimeIndex(["2010"], tz=non_norm_tz)
+        assert pytz.timezone(tz) is result.tz
+    def test_constructor_timestamp_near_dst(self):
+        # GH 20854
+        ts = [
+            Timestamp("2016-10-30 03:00:00+0300", tz="Europe/Helsinki"),
+            Timestamp("2016-10-30 03:00:00+0200", tz="Europe/Helsinki"),
+        ]
+        result = DatetimeIndex(ts)
+        expected = DatetimeIndex([ts[0].to_pydatetime(), ts[1].to_pydatetime()])
+        tm.assert_index_equal(result, expected)
+    @pytest.mark.parametrize("klass", [Index, DatetimeIndex])
+    @pytest.mark.parametrize("box", [np.array, partial(np.array, dtype=object), list])
+    @pytest.mark.parametrize(
+        "tz, dtype",
+        [("US/Pacific", "datetime64[ns, US/Pacific]"), (None, "datetime64[ns]")],
+    )
+    def test_constructor_with_int_tz(self, klass, box, tz, dtype):
+        # GH 20997, 20964
+        ts = Timestamp("2018-01-01", tz=tz).as_unit("ns")
+        result = klass(box([ts._value]), dtype=dtype)
+        expected = klass([ts])
+        assert result == expected
+    def test_construction_int_rountrip(self, tz_naive_fixture):
+        # GH 12619, GH#24559
+        tz = tz_naive_fixture
+        result = 1293858000000000000
+        expected = DatetimeIndex([result], tz=tz).asi8[0]
+        assert result == expected
+    def test_construction_from_replaced_timestamps_with_dst(self):
+        # GH 18785
+        index = date_range(
+            Timestamp(2000, 12, 31),
+            Timestamp(2005, 12, 31),
+            freq="YE-DEC",
+            tz="Australia/Melbourne",
+        )
+        result = DatetimeIndex([x.replace(month=6, day=1) for x in index])
+        expected = DatetimeIndex(
+            [
+                "2000-06-01 00:00:00",
+                "2001-06-01 00:00:00",
+                "2002-06-01 00:00:00",
+                "2003-06-01 00:00:00",
+                "2004-06-01 00:00:00",
+                "2005-06-01 00:00:00",
+            ],
+            tz="Australia/Melbourne",
+        )
+        tm.assert_index_equal(result, expected)
+    def test_construction_with_tz_and_tz_aware_dti(self):
+        # GH 23579
+        dti = date_range("2016-01-01", periods=3, tz="US/Central")
+        msg = "data is already tz-aware US/Central, unable to set specified tz"
+        with pytest.raises(TypeError, match=msg):
+            DatetimeIndex(dti, tz="Asia/Tokyo")
+    def test_construction_with_nat_and_tzlocal(self):
+        tz = dateutil.tz.tzlocal()
+        result = DatetimeIndex(["2018", "NaT"], tz=tz)
+        expected = DatetimeIndex([Timestamp("2018", tz=tz), pd.NaT])
+        tm.assert_index_equal(result, expected)
+    def test_constructor_with_ambiguous_keyword_arg(self):
+        # GH 35297
+        expected = DatetimeIndex(
+            ["2020-11-01 01:00:00", "2020-11-02 01:00:00"],
+            dtype="datetime64[ns, America/New_York]",
+            freq="D",
+            ambiguous=False,
+        )
+        # ambiguous keyword in start
+        timezone = "America/New_York"
+        start = Timestamp(year=2020, month=11, day=1, hour=1).tz_localize(
+            timezone, ambiguous=False
+        )
+        result = date_range(start=start, periods=2, ambiguous=False)
+        tm.assert_index_equal(result, expected)
+        # ambiguous keyword in end
+        timezone = "America/New_York"
+        end = Timestamp(year=2020, month=11, day=2, hour=1).tz_localize(
+            timezone, ambiguous=False
+        )
+        result = date_range(end=end, periods=2, ambiguous=False)
+        tm.assert_index_equal(result, expected)
+    def test_constructor_with_nonexistent_keyword_arg(self, warsaw):
+        # GH 35297
+        timezone = warsaw
+        # nonexistent keyword in start
+        start = Timestamp("2015-03-29 02:30:00").tz_localize(
+            timezone, nonexistent="shift_forward"
+        )
+        result = date_range(start=start, periods=2, freq="h")
+        expected = DatetimeIndex(
+            [
+                Timestamp("2015-03-29 03:00:00+02:00", tz=timezone),
+                Timestamp("2015-03-29 04:00:00+02:00", tz=timezone),
+            ]
+        )
+        tm.assert_index_equal(result, expected)
+        # nonexistent keyword in end
+        end = start
+        result = date_range(end=end, periods=2, freq="h")
+        expected = DatetimeIndex(
+            [
+                Timestamp("2015-03-29 01:00:00+01:00", tz=timezone),
+                Timestamp("2015-03-29 03:00:00+02:00", tz=timezone),
+            ]
+        )
+        tm.assert_index_equal(result, expected)
+    def test_constructor_no_precision_raises(self):
+        # GH-24753, GH-24739
+        msg = "with no precision is not allowed"
+        with pytest.raises(ValueError, match=msg):
+            DatetimeIndex(["2000"], dtype="datetime64")
+        msg = "The 'datetime64' dtype has no unit. Please pass in"
+        with pytest.raises(ValueError, match=msg):
+            Index(["2000"], dtype="datetime64")
+    def test_constructor_wrong_precision_raises(self):
+        dti = DatetimeIndex(["2000"], dtype="datetime64[us]")
+        assert dti.dtype == "M8[us]"
+        assert dti[0] == Timestamp(2000, 1, 1)
+    def test_index_constructor_with_numpy_object_array_and_timestamp_tz_with_nan(self):
+        # GH 27011
+        result = Index(np.array([Timestamp("2019", tz="UTC"), np.nan], dtype=object))
+        expected = DatetimeIndex([Timestamp("2019", tz="UTC"), pd.NaT])
+        tm.assert_index_equal(result, expected)
+    @pytest.mark.parametrize("tz", [pytz.timezone("US/Eastern"), gettz("US/Eastern")])
+    def test_dti_from_tzaware_datetime(self, tz):
+        d = [datetime(2012, 8, 19, tzinfo=tz)]
+        index = DatetimeIndex(d)
+        assert timezones.tz_compare(index.tz, tz)
+    @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
+    def test_dti_tz_constructors(self, tzstr):
+        """Test different DatetimeIndex constructions with timezone
+        Follow-up of GH#4229
+        """
+        arr = ["11/10/2005 08:00:00", "11/10/2005 09:00:00"]
+        idx1 = to_datetime(arr).tz_localize(tzstr)
+        idx2 = date_range(start="2005-11-10 08:00:00", freq="h", periods=2, tz=tzstr)
+        idx2 = idx2._with_freq(None)  # the others all have freq=None
+        idx3 = DatetimeIndex(arr, tz=tzstr)
+        idx4 = DatetimeIndex(np.array(arr), tz=tzstr)
+        for other in [idx2, idx3, idx4]:
+            tm.assert_index_equal(idx1, other)
+    def test_dti_construction_idempotent(self, unit):
+        rng = date_range(
+            "03/12/2012 00:00", periods=10, freq="W-FRI", tz="US/Eastern", unit=unit
+        )
+        rng2 = DatetimeIndex(data=rng, tz="US/Eastern")
+        tm.assert_index_equal(rng, rng2)
+    @pytest.mark.parametrize("prefix", ["", "dateutil/"])
+    def test_dti_constructor_static_tzinfo(self, prefix):
+        # it works!
+        index = DatetimeIndex([datetime(2012, 1, 1)], tz=prefix + "EST")
+        index.hour
+        index[0]
+    @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
+    def test_dti_convert_datetime_list(self, tzstr):
+        dr = date_range("2012-06-02", periods=10, tz=tzstr, name="foo")
+        dr2 = DatetimeIndex(list(dr), name="foo", freq="D")
+        tm.assert_index_equal(dr, dr2)
+    @pytest.mark.parametrize(
+        "tz",
+        [
+            pytz.timezone("US/Eastern"),
+            gettz("US/Eastern"),
+        ],
+    )
+    @pytest.mark.parametrize("use_str", [True, False])
+    @pytest.mark.parametrize("box_cls", [Timestamp, DatetimeIndex])
+    def test_dti_ambiguous_matches_timestamp(self, tz, use_str, box_cls, request):
+        # GH#47471 check that we get the same raising behavior in the DTI
+        # constructor and Timestamp constructor
+        dtstr = "2013-11-03 01:59:59.999999"
+        item = dtstr
+        if not use_str:
+            item = Timestamp(dtstr).to_pydatetime()
+        if box_cls is not Timestamp:
+            item = [item]
+        if not use_str and isinstance(tz, dateutil.tz.tzfile):
+            # FIXME: The Timestamp constructor here behaves differently than all
+            #  the other cases bc with dateutil/zoneinfo tzinfos we implicitly
+            #  get fold=0. Having this raise is not important, but having the
+            #  behavior be consistent across cases is.
+            mark = pytest.mark.xfail(reason="We implicitly get fold=0.")
+            request.applymarker(mark)
+        with pytest.raises(pytz.AmbiguousTimeError, match=dtstr):
+            box_cls(item, tz=tz)
+    @pytest.mark.parametrize("tz", [None, "UTC", "US/Pacific"])
+    def test_dti_constructor_with_non_nano_dtype(self, tz):
+        # GH#55756, GH#54620
+        ts = Timestamp("2999-01-01")
+        dtype = "M8[us]"
+        if tz is not None:
+            dtype = f"M8[us, {tz}]"
+        vals = [ts, "2999-01-02 03:04:05.678910", 2500]
+        result = DatetimeIndex(vals, dtype=dtype)
+        # The 2500 is interpreted as microseconds, consistent with what
+        #  we would get if we created DatetimeIndexes from vals[:2] and vals[2:]
+        #  and concated the results.
+        pointwise = [
+            vals[0].tz_localize(tz),
+            Timestamp(vals[1], tz=tz),
+            to_datetime(vals[2], unit="us", utc=True).tz_convert(tz),
+        ]
+        exp_vals = [x.as_unit("us").asm8 for x in pointwise]
+        exp_arr = np.array(exp_vals, dtype="M8[us]")
+        expected = DatetimeIndex(exp_arr, dtype="M8[us]")
+        if tz is not None:
+            expected = expected.tz_localize("UTC").tz_convert(tz)
+        tm.assert_index_equal(result, expected)
+        result2 = DatetimeIndex(np.array(vals, dtype=object), dtype=dtype)
+        tm.assert_index_equal(result2, expected)
+    def test_dti_constructor_with_non_nano_now_today(self):
+        # GH#55756
+        now = Timestamp.now()
+        today = Timestamp.today()
+        result = DatetimeIndex(["now", "today"], dtype="M8[s]")
+        assert result.dtype == "M8[s]"
+        # result may not exactly match [now, today] so we'll test it up to a tolerance.
+        #  (it *may* match exactly due to rounding)
+        tolerance = pd.Timedelta(microseconds=1)
+        diff0 = result[0] - now.as_unit("s")
+        assert diff0 >= pd.Timedelta(0)
+        assert diff0 < tolerance
+        diff1 = result[1] - today.as_unit("s")
+        assert diff1 >= pd.Timedelta(0)
+        assert diff1 < tolerance
+    def test_dti_constructor_object_float_matches_float_dtype(self):
+        # GH#55780
+        arr = np.array([0, np.nan], dtype=np.float64)
+        arr2 = arr.astype(object)
+        dti1 = DatetimeIndex(arr, tz="CET")
+        dti2 = DatetimeIndex(arr2, tz="CET")
+        tm.assert_index_equal(dti1, dti2)
+    @pytest.mark.parametrize("dtype", ["M8[us]", "M8[us, US/Pacific]"])
+    def test_dti_constructor_with_dtype_object_int_matches_int_dtype(self, dtype):
+        # Going through the object path should match the non-object path
+        vals1 = np.arange(5, dtype="i8") * 1000
+        vals1[0] = pd.NaT.value
+        vals2 = vals1.astype(np.float64)
+        vals2[0] = np.nan
+        vals3 = vals1.astype(object)
+        # change lib.infer_dtype(vals3) from "integer" so we go through
+        #  array_to_datetime in _sequence_to_dt64
+        vals3[0] = pd.NaT
+        vals4 = vals2.astype(object)
+        res1 = DatetimeIndex(vals1, dtype=dtype)
+        res2 = DatetimeIndex(vals2, dtype=dtype)
+        res3 = DatetimeIndex(vals3, dtype=dtype)
+        res4 = DatetimeIndex(vals4, dtype=dtype)
+        expected = DatetimeIndex(vals1.view("M8[us]"))
+        if res1.tz is not None:
+            expected = expected.tz_localize("UTC").tz_convert(res1.tz)
+        tm.assert_index_equal(res1, expected)
+        tm.assert_index_equal(res2, expected)
+        tm.assert_index_equal(res3, expected)
+        tm.assert_index_equal(res4, expected)
+class TestTimeSeries:
+    def test_dti_constructor_preserve_dti_freq(self):
+        rng = date_range("1/1/2000", "1/2/2000", freq="5min")
+        rng2 = DatetimeIndex(rng)
+        assert rng.freq == rng2.freq
+    def test_explicit_none_freq(self):
+        # Explicitly passing freq=None is respected
+        rng = date_range("1/1/2000", "1/2/2000", freq="5min")
+        result = DatetimeIndex(rng, freq=None)
+        assert result.freq is None
+        result = DatetimeIndex(rng._data, freq=None)
+        assert result.freq is None
+    def test_dti_constructor_small_int(self, any_int_numpy_dtype):
+        # see gh-13721
+        exp = DatetimeIndex(
+            [
+                "1970-01-01 00:00:00.00000000",
+                "1970-01-01 00:00:00.00000001",
+                "1970-01-01 00:00:00.00000002",
+            ]
+        )
+        arr = np.array([0, 10, 20], dtype=any_int_numpy_dtype)
+        tm.assert_index_equal(DatetimeIndex(arr), exp)
+    def test_ctor_str_intraday(self):
+        rng = DatetimeIndex(["1-1-2000 00:00:01"])
+        assert rng[0].second == 1
+    def test_index_cast_datetime64_other_units(self):
+        arr = np.arange(0, 100, 10, dtype=np.int64).view("M8[D]")
+        idx = Index(arr)
+        assert (idx.values == astype_overflowsafe(arr, dtype=np.dtype("M8[ns]"))).all()
+    def test_constructor_int64_nocopy(self):
+        # GH#1624
+        arr = np.arange(1000, dtype=np.int64)
+        index = DatetimeIndex(arr)
+        arr[50:100] = -1
+        assert (index.asi8[50:100] == -1).all()
+        arr = np.arange(1000, dtype=np.int64)
+        index = DatetimeIndex(arr, copy=True)
+        arr[50:100] = -1
+        assert (index.asi8[50:100] != -1).all()
+    @pytest.mark.parametrize(
+        "freq",
+        ["ME", "QE", "YE", "D", "B", "bh", "min", "s", "ms", "us", "h", "ns", "C"],
+    )
+    def test_from_freq_recreate_from_data(self, freq):
+        org = date_range(start="2001/02/01 09:00", freq=freq, periods=1)
+        idx = DatetimeIndex(org, freq=freq)
+        tm.assert_index_equal(idx, org)
+        org = date_range(
+            start="2001/02/01 09:00", freq=freq, tz="US/Pacific", periods=1
+        )
+        idx = DatetimeIndex(org, freq=freq, tz="US/Pacific")
+        tm.assert_index_equal(idx, org)
+    def test_datetimeindex_constructor_misc(self):
+        arr = ["1/1/2005", "1/2/2005", "Jn 3, 2005", "2005-01-04"]
+        msg = r"(\(')?Unknown datetime string format(:', 'Jn 3, 2005'\))?"
+        with pytest.raises(ValueError, match=msg):
+            DatetimeIndex(arr)
+        arr = ["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"]
+        idx1 = DatetimeIndex(arr)
+        arr = [datetime(2005, 1, 1), "1/2/2005", "1/3/2005", "2005-01-04"]
+        idx2 = DatetimeIndex(arr)
+        arr = [Timestamp(datetime(2005, 1, 1)), "1/2/2005", "1/3/2005", "2005-01-04"]
+        idx3 = DatetimeIndex(arr)
+        arr = np.array(["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"], dtype="O")
+        idx4 = DatetimeIndex(arr)
+        idx5 = DatetimeIndex(["12/05/2007", "25/01/2008"], dayfirst=True)
+        idx6 = DatetimeIndex(
+            ["2007/05/12", "2008/01/25"], dayfirst=False, yearfirst=True
+        )
+        tm.assert_index_equal(idx5, idx6)
+        for other in [idx2, idx3, idx4]:
+            assert (idx1.values == other.values).all()
+    def test_dti_constructor_object_dtype_dayfirst_yearfirst_with_tz(self):
+        # GH#55813
+        val = "5/10/16"
+        dfirst = Timestamp(2016, 10, 5, tz="US/Pacific")
+        yfirst = Timestamp(2005, 10, 16, tz="US/Pacific")
+        result1 = DatetimeIndex([val], tz="US/Pacific", dayfirst=True)
+        expected1 = DatetimeIndex([dfirst])
+        tm.assert_index_equal(result1, expected1)
+        result2 = DatetimeIndex([val], tz="US/Pacific", yearfirst=True)
+        expected2 = DatetimeIndex([yfirst])
+        tm.assert_index_equal(result2, expected2)

py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_date_range.py ADDED Viewed

	@@ -0,0 +1,1721 @@

+"""
+test date_range, bdate_range construction from the convenience range functions
+"""
+from datetime import (
+    datetime,
+    time,
+    timedelta,
+)
+import re
+import numpy as np
+import pytest
+import pytz
+from pytz import timezone
+from pandas._libs.tslibs import timezones
+from pandas._libs.tslibs.offsets import (
+    BDay,
+    CDay,
+    DateOffset,
+    MonthEnd,
+    prefix_mapping,
+)
+from pandas.errors import OutOfBoundsDatetime
+import pandas.util._test_decorators as td
+import pandas as pd
+from pandas import (
+    DataFrame,
+    DatetimeIndex,
+    Series,
+    Timedelta,
+    Timestamp,
+    bdate_range,
+    date_range,
+    offsets,
+)
+import pandas._testing as tm
+from pandas.core.arrays.datetimes import _generate_range as generate_range
+from pandas.tests.indexes.datetimes.test_timezones import (
+    FixedOffset,
+    fixed_off_no_name,
+)
+from pandas.tseries.holiday import USFederalHolidayCalendar
+START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
+def _get_expected_range(
+    begin_to_match,
+    end_to_match,
+    both_range,
+    inclusive_endpoints,
+):
+    """Helper to get expected range from a both inclusive range"""
+    left_match = begin_to_match == both_range[0]
+    right_match = end_to_match == both_range[-1]
+    if inclusive_endpoints == "left" and right_match:
+        expected_range = both_range[:-1]
+    elif inclusive_endpoints == "right" and left_match:
+        expected_range = both_range[1:]
+    elif inclusive_endpoints == "neither" and left_match and right_match:
+        expected_range = both_range[1:-1]
+    elif inclusive_endpoints == "neither" and right_match:
+        expected_range = both_range[:-1]
+    elif inclusive_endpoints == "neither" and left_match:
+        expected_range = both_range[1:]
+    elif inclusive_endpoints == "both":
+        expected_range = both_range[:]
+    else:
+        expected_range = both_range[:]
+    return expected_range
+class TestTimestampEquivDateRange:
+    # Older tests in TestTimeSeries constructed their `stamp` objects
+    # using `date_range` instead of the `Timestamp` constructor.
+    # TestTimestampEquivDateRange checks that these are equivalent in the
+    # pertinent cases.
+    def test_date_range_timestamp_equiv(self):
+        rng = date_range("20090415", "20090519", tz="US/Eastern")
+        stamp = rng[0]
+        ts = Timestamp("20090415", tz="US/Eastern")
+        assert ts == stamp
+    def test_date_range_timestamp_equiv_dateutil(self):
+        rng = date_range("20090415", "20090519", tz="dateutil/US/Eastern")
+        stamp = rng[0]
+        ts = Timestamp("20090415", tz="dateutil/US/Eastern")
+        assert ts == stamp
+    def test_date_range_timestamp_equiv_explicit_pytz(self):
+        rng = date_range("20090415", "20090519", tz=pytz.timezone("US/Eastern"))
+        stamp = rng[0]
+        ts = Timestamp("20090415", tz=pytz.timezone("US/Eastern"))
+        assert ts == stamp
+    @td.skip_if_windows
+    def test_date_range_timestamp_equiv_explicit_dateutil(self):
+        from pandas._libs.tslibs.timezones import dateutil_gettz as gettz
+        rng = date_range("20090415", "20090519", tz=gettz("US/Eastern"))
+        stamp = rng[0]
+        ts = Timestamp("20090415", tz=gettz("US/Eastern"))
+        assert ts == stamp
+    def test_date_range_timestamp_equiv_from_datetime_instance(self):
+        datetime_instance = datetime(2014, 3, 4)
+        # build a timestamp with a frequency, since then it supports
+        # addition/subtraction of integers
+        timestamp_instance = date_range(datetime_instance, periods=1, freq="D")[0]
+        ts = Timestamp(datetime_instance)
+        assert ts == timestamp_instance
+    def test_date_range_timestamp_equiv_preserve_frequency(self):
+        timestamp_instance = date_range("2014-03-05", periods=1, freq="D")[0]
+        ts = Timestamp("2014-03-05")
+        assert timestamp_instance == ts
+class TestDateRanges:
+    def test_date_range_name(self):
+        idx = date_range(start="2000-01-01", periods=1, freq="YE", name="TEST")
+        assert idx.name == "TEST"
+    def test_date_range_invalid_periods(self):
+        msg = "periods must be a number, got foo"
+        with pytest.raises(TypeError, match=msg):
+            date_range(start="1/1/2000", periods="foo", freq="D")
+    def test_date_range_fractional_period(self):
+        msg = "Non-integer 'periods' in pd.date_range, pd.timedelta_range"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            rng = date_range("1/1/2000", periods=10.5)
+        exp = date_range("1/1/2000", periods=10)
+        tm.assert_index_equal(rng, exp)
+    @pytest.mark.parametrize(
+        "freq,freq_depr",
+        [
+            ("2ME", "2M"),
+            ("2SME", "2SM"),
+            ("2BQE", "2BQ"),
+            ("2BYE", "2BY"),
+        ],
+    )
+    def test_date_range_frequency_M_SM_BQ_BY_deprecated(self, freq, freq_depr):
+        # GH#52064
+        depr_msg = f"'{freq_depr[1:]}' is deprecated and will be removed "
+        f"in a future version, please use '{freq[1:]}' instead."
+        expected = date_range("1/1/2000", periods=4, freq=freq)
+        with tm.assert_produces_warning(FutureWarning, match=depr_msg):
+            result = date_range("1/1/2000", periods=4, freq=freq_depr)
+        tm.assert_index_equal(result, expected)
+    def test_date_range_tuple_freq_raises(self):
+        # GH#34703
+        edate = datetime(2000, 1, 1)
+        with pytest.raises(TypeError, match="pass as a string instead"):
+            date_range(end=edate, freq=("D", 5), periods=20)
+    @pytest.mark.parametrize("freq", ["ns", "us", "ms", "min", "s", "h", "D"])
+    def test_date_range_edges(self, freq):
+        # GH#13672
+        td = Timedelta(f"1{freq}")
+        ts = Timestamp("1970-01-01")
+        idx = date_range(
+            start=ts + td,
+            end=ts + 4 * td,
+            freq=freq,
+        )
+        exp = DatetimeIndex(
+            [ts + n * td for n in range(1, 5)],
+            dtype="M8[ns]",
+            freq=freq,
+        )
+        tm.assert_index_equal(idx, exp)
+        # start after end
+        idx = date_range(
+            start=ts + 4 * td,
+            end=ts + td,
+            freq=freq,
+        )
+        exp = DatetimeIndex([], dtype="M8[ns]", freq=freq)
+        tm.assert_index_equal(idx, exp)
+        # start matches end
+        idx = date_range(
+            start=ts + td,
+            end=ts + td,
+            freq=freq,
+        )
+        exp = DatetimeIndex([ts + td], dtype="M8[ns]", freq=freq)
+        tm.assert_index_equal(idx, exp)
+    def test_date_range_near_implementation_bound(self):
+        # GH#???
+        freq = Timedelta(1)
+        with pytest.raises(OutOfBoundsDatetime, match="Cannot generate range with"):
+            date_range(end=Timestamp.min, periods=2, freq=freq)
+    def test_date_range_nat(self):
+        # GH#11587
+        msg = "Neither `start` nor `end` can be NaT"
+        with pytest.raises(ValueError, match=msg):
+            date_range(start="2016-01-01", end=pd.NaT, freq="D")
+        with pytest.raises(ValueError, match=msg):
+            date_range(start=pd.NaT, end="2016-01-01", freq="D")
+    def test_date_range_multiplication_overflow(self):
+        # GH#24255
+        # check that overflows in calculating `addend = periods * stride`
+        #  are caught
+        with tm.assert_produces_warning(None):
+            # we should _not_ be seeing a overflow RuntimeWarning
+            dti = date_range(start="1677-09-22", periods=213503, freq="D")
+        assert dti[0] == Timestamp("1677-09-22")
+        assert len(dti) == 213503
+        msg = "Cannot generate range with"
+        with pytest.raises(OutOfBoundsDatetime, match=msg):
+            date_range("1969-05-04", periods=200000000, freq="30000D")
+    def test_date_range_unsigned_overflow_handling(self):
+        # GH#24255
+        # case where `addend = periods * stride` overflows int64 bounds
+        #  but not uint64 bounds
+        dti = date_range(start="1677-09-22", end="2262-04-11", freq="D")
+        dti2 = date_range(start=dti[0], periods=len(dti), freq="D")
+        assert dti2.equals(dti)
+        dti3 = date_range(end=dti[-1], periods=len(dti), freq="D")
+        assert dti3.equals(dti)
+    def test_date_range_int64_overflow_non_recoverable(self):
+        # GH#24255
+        # case with start later than 1970-01-01, overflow int64 but not uint64
+        msg = "Cannot generate range with"
+        with pytest.raises(OutOfBoundsDatetime, match=msg):
+            date_range(start="1970-02-01", periods=106752 * 24, freq="h")
+        # case with end before 1970-01-01, overflow int64 but not uint64
+        with pytest.raises(OutOfBoundsDatetime, match=msg):
+            date_range(end="1969-11-14", periods=106752 * 24, freq="h")
+    @pytest.mark.slow
+    @pytest.mark.parametrize(
+        "s_ts, e_ts", [("2262-02-23", "1969-11-14"), ("1970-02-01", "1677-10-22")]
+    )
+    def test_date_range_int64_overflow_stride_endpoint_different_signs(
+        self, s_ts, e_ts
+    ):
+        # cases where stride * periods overflow int64 and stride/endpoint
+        #  have different signs
+        start = Timestamp(s_ts)
+        end = Timestamp(e_ts)
+        expected = date_range(start=start, end=end, freq="-1h")
+        assert expected[0] == start
+        assert expected[-1] == end
+        dti = date_range(end=end, periods=len(expected), freq="-1h")
+        tm.assert_index_equal(dti, expected)
+    def test_date_range_out_of_bounds(self):
+        # GH#14187
+        msg = "Cannot generate range"
+        with pytest.raises(OutOfBoundsDatetime, match=msg):
+            date_range("2016-01-01", periods=100000, freq="D")
+        with pytest.raises(OutOfBoundsDatetime, match=msg):
+            date_range(end="1763-10-12", periods=100000, freq="D")
+    def test_date_range_gen_error(self):
+        rng = date_range("1/1/2000 00:00", "1/1/2000 00:18", freq="5min")
+        assert len(rng) == 4
+    def test_date_range_normalize(self):
+        snap = datetime.today()
+        n = 50
+        rng = date_range(snap, periods=n, normalize=False, freq="2D")
+        offset = timedelta(2)
+        expected = DatetimeIndex(
+            [snap + i * offset for i in range(n)], dtype="M8[ns]", freq=offset
+        )
+        tm.assert_index_equal(rng, expected)
+        rng = date_range("1/1/2000 08:15", periods=n, normalize=False, freq="B")
+        the_time = time(8, 15)
+        for val in rng:
+            assert val.time() == the_time
+    def test_date_range_ambiguous_arguments(self):
+        # #2538
+        start = datetime(2011, 1, 1, 5, 3, 40)
+        end = datetime(2011, 1, 1, 8, 9, 40)
+        msg = (
+            "Of the four parameters: start, end, periods, and "
+            "freq, exactly three must be specified"
+        )
+        with pytest.raises(ValueError, match=msg):
+            date_range(start, end, periods=10, freq="s")
+    def test_date_range_convenience_periods(self, unit):
+        # GH 20808
+        result = date_range("2018-04-24", "2018-04-27", periods=3, unit=unit)
+        expected = DatetimeIndex(
+            ["2018-04-24 00:00:00", "2018-04-25 12:00:00", "2018-04-27 00:00:00"],
+            dtype=f"M8[{unit}]",
+            freq=None,
+        )
+        tm.assert_index_equal(result, expected)
+        # Test if spacing remains linear if tz changes to dst in range
+        result = date_range(
+            "2018-04-01 01:00:00",
+            "2018-04-01 04:00:00",
+            tz="Australia/Sydney",
+            periods=3,
+            unit=unit,
+        )
+        expected = DatetimeIndex(
+            [
+                Timestamp("2018-04-01 01:00:00+1100", tz="Australia/Sydney"),
+                Timestamp("2018-04-01 02:00:00+1000", tz="Australia/Sydney"),
+                Timestamp("2018-04-01 04:00:00+1000", tz="Australia/Sydney"),
+            ]
+        ).as_unit(unit)
+        tm.assert_index_equal(result, expected)
+    def test_date_range_index_comparison(self):
+        rng = date_range("2011-01-01", periods=3, tz="US/Eastern")
+        df = Series(rng).to_frame()
+        arr = np.array([rng.to_list()]).T
+        arr2 = np.array([rng]).T
+        with pytest.raises(ValueError, match="Unable to coerce to Series"):
+            rng == df
+        with pytest.raises(ValueError, match="Unable to coerce to Series"):
+            df == rng
+        expected = DataFrame([True, True, True])
+        results = df == arr2
+        tm.assert_frame_equal(results, expected)
+        expected = Series([True, True, True], name=0)
+        results = df[0] == arr2[:, 0]
+        tm.assert_series_equal(results, expected)
+        expected = np.array(
+            [[True, False, False], [False, True, False], [False, False, True]]
+        )
+        results = rng == arr
+        tm.assert_numpy_array_equal(results, expected)
+    @pytest.mark.parametrize(
+        "start,end,result_tz",
+        [
+            ["20180101", "20180103", "US/Eastern"],
+            [datetime(2018, 1, 1), datetime(2018, 1, 3), "US/Eastern"],
+            [Timestamp("20180101"), Timestamp("20180103"), "US/Eastern"],
+            [
+                Timestamp("20180101", tz="US/Eastern"),
+                Timestamp("20180103", tz="US/Eastern"),
+                "US/Eastern",
+            ],
+            [
+                Timestamp("20180101", tz="US/Eastern"),
+                Timestamp("20180103", tz="US/Eastern"),
+                None,
+            ],
+        ],
+    )
+    def test_date_range_linspacing_tz(self, start, end, result_tz):
+        # GH 20983
+        result = date_range(start, end, periods=3, tz=result_tz)
+        expected = date_range("20180101", periods=3, freq="D", tz="US/Eastern")
+        tm.assert_index_equal(result, expected)
+    def test_date_range_timedelta(self):
+        start = "2020-01-01"
+        end = "2020-01-11"
+        rng1 = date_range(start, end, freq="3D")
+        rng2 = date_range(start, end, freq=timedelta(days=3))
+        tm.assert_index_equal(rng1, rng2)
+    def test_range_misspecified(self):
+        # GH #1095
+        msg = (
+            "Of the four parameters: start, end, periods, and "
+            "freq, exactly three must be specified"
+        )
+        with pytest.raises(ValueError, match=msg):
+            date_range(start="1/1/2000")
+        with pytest.raises(ValueError, match=msg):
+            date_range(end="1/1/2000")
+        with pytest.raises(ValueError, match=msg):
+            date_range(periods=10)
+        with pytest.raises(ValueError, match=msg):
+            date_range(start="1/1/2000", freq="h")
+        with pytest.raises(ValueError, match=msg):
+            date_range(end="1/1/2000", freq="h")
+        with pytest.raises(ValueError, match=msg):
+            date_range(periods=10, freq="h")
+        with pytest.raises(ValueError, match=msg):
+            date_range()
+    def test_compat_replace(self):
+        # https://github.com/statsmodels/statsmodels/issues/3349
+        # replace should take ints/longs for compat
+        result = date_range(Timestamp("1960-04-01 00:00:00"), periods=76, freq="QS-JAN")
+        assert len(result) == 76
+    def test_catch_infinite_loop(self):
+        offset = offsets.DateOffset(minute=5)
+        # blow up, don't loop forever
+        msg = "Offset <DateOffset: minute=5> did not increment date"
+        with pytest.raises(ValueError, match=msg):
+            date_range(datetime(2011, 11, 11), datetime(2011, 11, 12), freq=offset)
+    def test_construct_over_dst(self, unit):
+        # GH 20854
+        pre_dst = Timestamp("2010-11-07 01:00:00").tz_localize(
+            "US/Pacific", ambiguous=True
+        )
+        pst_dst = Timestamp("2010-11-07 01:00:00").tz_localize(
+            "US/Pacific", ambiguous=False
+        )
+        expect_data = [
+            Timestamp("2010-11-07 00:00:00", tz="US/Pacific"),
+            pre_dst,
+            pst_dst,
+        ]
+        expected = DatetimeIndex(expect_data, freq="h").as_unit(unit)
+        result = date_range(
+            start="2010-11-7", periods=3, freq="h", tz="US/Pacific", unit=unit
+        )
+        tm.assert_index_equal(result, expected)
+    def test_construct_with_different_start_end_string_format(self, unit):
+        # GH 12064
+        result = date_range(
+            "2013-01-01 00:00:00+09:00",
+            "2013/01/01 02:00:00+09:00",
+            freq="h",
+            unit=unit,
+        )
+        expected = DatetimeIndex(
+            [
+                Timestamp("2013-01-01 00:00:00+09:00"),
+                Timestamp("2013-01-01 01:00:00+09:00"),
+                Timestamp("2013-01-01 02:00:00+09:00"),
+            ],
+            freq="h",
+        ).as_unit(unit)
+        tm.assert_index_equal(result, expected)
+    def test_error_with_zero_monthends(self):
+        msg = r"Offset <0 \* MonthEnds> did not increment date"
+        with pytest.raises(ValueError, match=msg):
+            date_range("1/1/2000", "1/1/2001", freq=MonthEnd(0))
+    def test_range_bug(self, unit):
+        # GH #770
+        offset = DateOffset(months=3)
+        result = date_range("2011-1-1", "2012-1-31", freq=offset, unit=unit)
+        start = datetime(2011, 1, 1)
+        expected = DatetimeIndex(
+            [start + i * offset for i in range(5)], dtype=f"M8[{unit}]", freq=offset
+        )
+        tm.assert_index_equal(result, expected)
+    def test_range_tz_pytz(self):
+        # see gh-2906
+        tz = timezone("US/Eastern")
+        start = tz.localize(datetime(2011, 1, 1))
+        end = tz.localize(datetime(2011, 1, 3))
+        dr = date_range(start=start, periods=3)
+        assert dr.tz.zone == tz.zone
+        assert dr[0] == start
+        assert dr[2] == end
+        dr = date_range(end=end, periods=3)
+        assert dr.tz.zone == tz.zone
+        assert dr[0] == start
+        assert dr[2] == end
+        dr = date_range(start=start, end=end)
+        assert dr.tz.zone == tz.zone
+        assert dr[0] == start
+        assert dr[2] == end
+    @pytest.mark.parametrize(
+        "start, end",
+        [
+            [
+                Timestamp(datetime(2014, 3, 6), tz="US/Eastern"),
+                Timestamp(datetime(2014, 3, 12), tz="US/Eastern"),
+            ],
+            [
+                Timestamp(datetime(2013, 11, 1), tz="US/Eastern"),
+                Timestamp(datetime(2013, 11, 6), tz="US/Eastern"),
+            ],
+        ],
+    )
+    def test_range_tz_dst_straddle_pytz(self, start, end):
+        dr = date_range(start, end, freq="D")
+        assert dr[0] == start
+        assert dr[-1] == end
+        assert np.all(dr.hour == 0)
+        dr = date_range(start, end, freq="D", tz="US/Eastern")
+        assert dr[0] == start
+        assert dr[-1] == end
+        assert np.all(dr.hour == 0)
+        dr = date_range(
+            start.replace(tzinfo=None),
+            end.replace(tzinfo=None),
+            freq="D",
+            tz="US/Eastern",
+        )
+        assert dr[0] == start
+        assert dr[-1] == end
+        assert np.all(dr.hour == 0)
+    def test_range_tz_dateutil(self):
+        # see gh-2906
+        # Use maybe_get_tz to fix filename in tz under dateutil.
+        from pandas._libs.tslibs.timezones import maybe_get_tz
+        tz = lambda x: maybe_get_tz("dateutil/" + x)
+        start = datetime(2011, 1, 1, tzinfo=tz("US/Eastern"))
+        end = datetime(2011, 1, 3, tzinfo=tz("US/Eastern"))
+        dr = date_range(start=start, periods=3)
+        assert dr.tz == tz("US/Eastern")
+        assert dr[0] == start
+        assert dr[2] == end
+        dr = date_range(end=end, periods=3)
+        assert dr.tz == tz("US/Eastern")
+        assert dr[0] == start
+        assert dr[2] == end
+        dr = date_range(start=start, end=end)
+        assert dr.tz == tz("US/Eastern")
+        assert dr[0] == start
+        assert dr[2] == end
+    @pytest.mark.parametrize("freq", ["1D", "3D", "2ME", "7W", "3h", "YE"])
+    @pytest.mark.parametrize("tz", [None, "US/Eastern"])
+    def test_range_closed(self, freq, tz, inclusive_endpoints_fixture):
+        # GH#12409, GH#12684
+        begin = Timestamp("2011/1/1", tz=tz)
+        end = Timestamp("2014/1/1", tz=tz)
+        result_range = date_range(
+            begin, end, inclusive=inclusive_endpoints_fixture, freq=freq
+        )
+        both_range = date_range(begin, end, inclusive="both", freq=freq)
+        expected_range = _get_expected_range(
+            begin, end, both_range, inclusive_endpoints_fixture
+        )
+        tm.assert_index_equal(expected_range, result_range)
+    @pytest.mark.parametrize("freq", ["1D", "3D", "2ME", "7W", "3h", "YE"])
+    def test_range_with_tz_closed_with_tz_aware_start_end(
+        self, freq, inclusive_endpoints_fixture
+    ):
+        begin = Timestamp("2011/1/1")
+        end = Timestamp("2014/1/1")
+        begintz = Timestamp("2011/1/1", tz="US/Eastern")
+        endtz = Timestamp("2014/1/1", tz="US/Eastern")
+        result_range = date_range(
+            begin,
+            end,
+            inclusive=inclusive_endpoints_fixture,
+            freq=freq,
+            tz="US/Eastern",
+        )
+        both_range = date_range(
+            begin, end, inclusive="both", freq=freq, tz="US/Eastern"
+        )
+        expected_range = _get_expected_range(
+            begintz,
+            endtz,
+            both_range,
+            inclusive_endpoints_fixture,
+        )
+        tm.assert_index_equal(expected_range, result_range)
+    def test_range_closed_boundary(self, inclusive_endpoints_fixture):
+        # GH#11804
+        right_boundary = date_range(
+            "2015-09-12",
+            "2015-12-01",
+            freq="QS-MAR",
+            inclusive=inclusive_endpoints_fixture,
+        )
+        left_boundary = date_range(
+            "2015-09-01",
+            "2015-09-12",
+            freq="QS-MAR",
+            inclusive=inclusive_endpoints_fixture,
+        )
+        both_boundary = date_range(
+            "2015-09-01",
+            "2015-12-01",
+            freq="QS-MAR",
+            inclusive=inclusive_endpoints_fixture,
+        )
+        neither_boundary = date_range(
+            "2015-09-11",
+            "2015-09-12",
+            freq="QS-MAR",
+            inclusive=inclusive_endpoints_fixture,
+        )
+        expected_right = both_boundary
+        expected_left = both_boundary
+        expected_both = both_boundary
+        if inclusive_endpoints_fixture == "right":
+            expected_left = both_boundary[1:]
+        elif inclusive_endpoints_fixture == "left":
+            expected_right = both_boundary[:-1]
+        elif inclusive_endpoints_fixture == "both":
+            expected_right = both_boundary[1:]
+            expected_left = both_boundary[:-1]
+        expected_neither = both_boundary[1:-1]
+        tm.assert_index_equal(right_boundary, expected_right)
+        tm.assert_index_equal(left_boundary, expected_left)
+        tm.assert_index_equal(both_boundary, expected_both)
+        tm.assert_index_equal(neither_boundary, expected_neither)
+    def test_date_range_years_only(self, tz_naive_fixture):
+        tz = tz_naive_fixture
+        # GH#6961
+        rng1 = date_range("2014", "2015", freq="ME", tz=tz)
+        expected1 = date_range("2014-01-31", "2014-12-31", freq="ME", tz=tz)
+        tm.assert_index_equal(rng1, expected1)
+        rng2 = date_range("2014", "2015", freq="MS", tz=tz)
+        expected2 = date_range("2014-01-01", "2015-01-01", freq="MS", tz=tz)
+        tm.assert_index_equal(rng2, expected2)
+        rng3 = date_range("2014", "2020", freq="YE", tz=tz)
+        expected3 = date_range("2014-12-31", "2019-12-31", freq="YE", tz=tz)
+        tm.assert_index_equal(rng3, expected3)
+        rng4 = date_range("2014", "2020", freq="YS", tz=tz)
+        expected4 = date_range("2014-01-01", "2020-01-01", freq="YS", tz=tz)
+        tm.assert_index_equal(rng4, expected4)
+    def test_freq_divides_end_in_nanos(self):
+        # GH 10885
+        result_1 = date_range("2005-01-12 10:00", "2005-01-12 16:00", freq="345min")
+        result_2 = date_range("2005-01-13 10:00", "2005-01-13 16:00", freq="345min")
+        expected_1 = DatetimeIndex(
+            ["2005-01-12 10:00:00", "2005-01-12 15:45:00"],
+            dtype="datetime64[ns]",
+            freq="345min",
+            tz=None,
+        )
+        expected_2 = DatetimeIndex(
+            ["2005-01-13 10:00:00", "2005-01-13 15:45:00"],
+            dtype="datetime64[ns]",
+            freq="345min",
+            tz=None,
+        )
+        tm.assert_index_equal(result_1, expected_1)
+        tm.assert_index_equal(result_2, expected_2)
+    def test_cached_range_bug(self):
+        rng = date_range("2010-09-01 05:00:00", periods=50, freq=DateOffset(hours=6))
+        assert len(rng) == 50
+        assert rng[0] == datetime(2010, 9, 1, 5)
+    def test_timezone_comparison_bug(self):
+        # smoke test
+        start = Timestamp("20130220 10:00", tz="US/Eastern")
+        result = date_range(start, periods=2, tz="US/Eastern")
+        assert len(result) == 2
+    def test_timezone_comparison_assert(self):
+        start = Timestamp("20130220 10:00", tz="US/Eastern")
+        msg = "Inferred time zone not equal to passed time zone"
+        with pytest.raises(AssertionError, match=msg):
+            date_range(start, periods=2, tz="Europe/Berlin")
+    def test_negative_non_tick_frequency_descending_dates(self, tz_aware_fixture):
+        # GH 23270
+        tz = tz_aware_fixture
+        result = date_range(start="2011-06-01", end="2011-01-01", freq="-1MS", tz=tz)
+        expected = date_range(end="2011-06-01", start="2011-01-01", freq="1MS", tz=tz)[
+            ::-1
+        ]
+        tm.assert_index_equal(result, expected)
+    def test_range_where_start_equal_end(self, inclusive_endpoints_fixture):
+        # GH 43394
+        start = "2021-09-02"
+        end = "2021-09-02"
+        result = date_range(
+            start=start, end=end, freq="D", inclusive=inclusive_endpoints_fixture
+        )
+        both_range = date_range(start=start, end=end, freq="D", inclusive="both")
+        if inclusive_endpoints_fixture == "neither":
+            expected = both_range[1:-1]
+        elif inclusive_endpoints_fixture in ("left", "right", "both"):
+            expected = both_range[:]
+        tm.assert_index_equal(result, expected)
+    def test_freq_dateoffset_with_relateivedelta_nanos(self):
+        # GH 46877
+        freq = DateOffset(hours=10, days=57, nanoseconds=3)
+        result = date_range(end="1970-01-01 00:00:00", periods=10, freq=freq, name="a")
+        expected = DatetimeIndex(
+            [
+                "1968-08-02T05:59:59.999999973",
+                "1968-09-28T15:59:59.999999976",
+                "1968-11-25T01:59:59.999999979",
+                "1969-01-21T11:59:59.999999982",
+                "1969-03-19T21:59:59.999999985",
+                "1969-05-16T07:59:59.999999988",
+                "1969-07-12T17:59:59.999999991",
+                "1969-09-08T03:59:59.999999994",
+                "1969-11-04T13:59:59.999999997",
+                "1970-01-01T00:00:00.000000000",
+            ],
+            name="a",
+        )
+        tm.assert_index_equal(result, expected)
+    @pytest.mark.parametrize(
+        "freq,freq_depr",
+        [
+            ("h", "H"),
+            ("2min", "2T"),
+            ("1s", "1S"),
+            ("2ms", "2L"),
+            ("1us", "1U"),
+            ("2ns", "2N"),
+        ],
+    )
+    def test_frequencies_H_T_S_L_U_N_deprecated(self, freq, freq_depr):
+        # GH#52536
+        freq_msg = re.split("[0-9]*", freq, maxsplit=1)[1]
+        freq_depr_msg = re.split("[0-9]*", freq_depr, maxsplit=1)[1]
+        msg = (
+            f"'{freq_depr_msg}' is deprecated and will be removed in a future version, "
+        )
+        f"please use '{freq_msg}' instead"
+        expected = date_range("1/1/2000", periods=2, freq=freq)
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = date_range("1/1/2000", periods=2, freq=freq_depr)
+        tm.assert_index_equal(result, expected)
+    @pytest.mark.parametrize(
+        "freq,freq_depr",
+        [
+            ("200YE", "200A"),
+            ("YE", "Y"),
+            ("2YE-MAY", "2A-MAY"),
+            ("YE-MAY", "Y-MAY"),
+        ],
+    )
+    def test_frequencies_A_deprecated_Y_renamed(self, freq, freq_depr):
+        # GH#9586, GH#54275
+        freq_msg = re.split("[0-9]*", freq, maxsplit=1)[1]
+        freq_depr_msg = re.split("[0-9]*", freq_depr, maxsplit=1)[1]
+        msg = f"'{freq_depr_msg}' is deprecated and will be removed "
+        f"in a future version, please use '{freq_msg}' instead."
+        expected = date_range("1/1/2000", periods=2, freq=freq)
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = date_range("1/1/2000", periods=2, freq=freq_depr)
+        tm.assert_index_equal(result, expected)
+    def test_to_offset_with_lowercase_deprecated_freq(self) -> None:
+        # https://github.com/pandas-dev/pandas/issues/56847
+        msg = (
+            "'m' is deprecated and will be removed in a future version, please use "
+            "'ME' instead."
+        )
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = date_range("2010-01-01", periods=2, freq="m")
+        expected = DatetimeIndex(["2010-01-31", "2010-02-28"], freq="ME")
+        tm.assert_index_equal(result, expected)
+    def test_date_range_bday(self):
+        sdate = datetime(1999, 12, 25)
+        idx = date_range(start=sdate, freq="1B", periods=20)
+        assert len(idx) == 20
+        assert idx[0] == sdate + 0 * offsets.BDay()
+        assert idx.freq == "B"
+class TestDateRangeTZ:
+    """Tests for date_range with timezones"""
+    def test_hongkong_tz_convert(self):
+        # GH#1673 smoke test
+        dr = date_range("2012-01-01", "2012-01-10", freq="D", tz="Hongkong")
+        # it works!
+        dr.hour
+    @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
+    def test_date_range_span_dst_transition(self, tzstr):
+        # GH#1778
+        # Standard -> Daylight Savings Time
+        dr = date_range("03/06/2012 00:00", periods=200, freq="W-FRI", tz="US/Eastern")
+        assert (dr.hour == 0).all()
+        dr = date_range("2012-11-02", periods=10, tz=tzstr)
+        result = dr.hour
+        expected = pd.Index([0] * 10, dtype="int32")
+        tm.assert_index_equal(result, expected)
+    @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
+    def test_date_range_timezone_str_argument(self, tzstr):
+        tz = timezones.maybe_get_tz(tzstr)
+        result = date_range("1/1/2000", periods=10, tz=tzstr)
+        expected = date_range("1/1/2000", periods=10, tz=tz)
+        tm.assert_index_equal(result, expected)
+    def test_date_range_with_fixed_tz(self):
+        off = FixedOffset(420, "+07:00")
+        start = datetime(2012, 3, 11, 5, 0, 0, tzinfo=off)
+        end = datetime(2012, 6, 11, 5, 0, 0, tzinfo=off)
+        rng = date_range(start=start, end=end)
+        assert off == rng.tz
+        rng2 = date_range(start, periods=len(rng), tz=off)
+        tm.assert_index_equal(rng, rng2)
+        rng3 = date_range("3/11/2012 05:00:00+07:00", "6/11/2012 05:00:00+07:00")
+        assert (rng.values == rng3.values).all()
+    def test_date_range_with_fixedoffset_noname(self):
+        off = fixed_off_no_name
+        start = datetime(2012, 3, 11, 5, 0, 0, tzinfo=off)
+        end = datetime(2012, 6, 11, 5, 0, 0, tzinfo=off)
+        rng = date_range(start=start, end=end)
+        assert off == rng.tz
+        idx = pd.Index([start, end])
+        assert off == idx.tz
+    @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
+    def test_date_range_with_tz(self, tzstr):
+        stamp = Timestamp("3/11/2012 05:00", tz=tzstr)
+        assert stamp.hour == 5
+        rng = date_range("3/11/2012 04:00", periods=10, freq="h", tz=tzstr)
+        assert stamp == rng[1]
+    @pytest.mark.parametrize("tz", ["Europe/London", "dateutil/Europe/London"])
+    def test_date_range_ambiguous_endpoint(self, tz):
+        # construction with an ambiguous end-point
+        # GH#11626
+        with pytest.raises(pytz.AmbiguousTimeError, match="Cannot infer dst time"):
+            date_range(
+                "2013-10-26 23:00", "2013-10-27 01:00", tz="Europe/London", freq="h"
+            )
+        times = date_range(
+            "2013-10-26 23:00", "2013-10-27 01:00", freq="h", tz=tz, ambiguous="infer"
+        )
+        assert times[0] == Timestamp("2013-10-26 23:00", tz=tz)
+        assert times[-1] == Timestamp("2013-10-27 01:00:00+0000", tz=tz)
+    @pytest.mark.parametrize(
+        "tz, option, expected",
+        [
+            ["US/Pacific", "shift_forward", "2019-03-10 03:00"],
+            ["dateutil/US/Pacific", "shift_forward", "2019-03-10 03:00"],
+            ["US/Pacific", "shift_backward", "2019-03-10 01:00"],
+            ["dateutil/US/Pacific", "shift_backward", "2019-03-10 01:00"],
+            ["US/Pacific", timedelta(hours=1), "2019-03-10 03:00"],
+        ],
+    )
+    def test_date_range_nonexistent_endpoint(self, tz, option, expected):
+        # construction with an nonexistent end-point
+        with pytest.raises(pytz.NonExistentTimeError, match="2019-03-10 02:00:00"):
+            date_range(
+                "2019-03-10 00:00", "2019-03-10 02:00", tz="US/Pacific", freq="h"
+            )
+        times = date_range(
+            "2019-03-10 00:00", "2019-03-10 02:00", freq="h", tz=tz, nonexistent=option
+        )
+        assert times[-1] == Timestamp(expected, tz=tz)
+class TestGenRangeGeneration:
+    @pytest.mark.parametrize(
+        "freqstr,offset",
+        [
+            ("B", BDay()),
+            ("C", CDay()),
+        ],
+    )
+    def test_generate(self, freqstr, offset):
+        rng1 = list(generate_range(START, END, periods=None, offset=offset, unit="ns"))
+        rng2 = list(generate_range(START, END, periods=None, offset=freqstr, unit="ns"))
+        assert rng1 == rng2
+    def test_1(self):
+        rng = list(
+            generate_range(
+                start=datetime(2009, 3, 25),
+                end=None,
+                periods=2,
+                offset=BDay(),
+                unit="ns",
+            )
+        )
+        expected = [datetime(2009, 3, 25), datetime(2009, 3, 26)]
+        assert rng == expected
+    def test_2(self):
+        rng = list(
+            generate_range(
+                start=datetime(2008, 1, 1),
+                end=datetime(2008, 1, 3),
+                periods=None,
+                offset=BDay(),
+                unit="ns",
+            )
+        )
+        expected = [datetime(2008, 1, 1), datetime(2008, 1, 2), datetime(2008, 1, 3)]
+        assert rng == expected
+    def test_3(self):
+        rng = list(
+            generate_range(
+                start=datetime(2008, 1, 5),
+                end=datetime(2008, 1, 6),
+                periods=None,
+                offset=BDay(),
+                unit="ns",
+            )
+        )
+        expected = []
+        assert rng == expected
+    def test_precision_finer_than_offset(self):
+        # GH#9907
+        result1 = date_range(
+            start="2015-04-15 00:00:03", end="2016-04-22 00:00:00", freq="QE"
+        )
+        result2 = date_range(
+            start="2015-04-15 00:00:03", end="2015-06-22 00:00:04", freq="W"
+        )
+        expected1_list = [
+            "2015-06-30 00:00:03",
+            "2015-09-30 00:00:03",
+            "2015-12-31 00:00:03",
+            "2016-03-31 00:00:03",
+        ]
+        expected2_list = [
+            "2015-04-19 00:00:03",
+            "2015-04-26 00:00:03",
+            "2015-05-03 00:00:03",
+            "2015-05-10 00:00:03",
+            "2015-05-17 00:00:03",
+            "2015-05-24 00:00:03",
+            "2015-05-31 00:00:03",
+            "2015-06-07 00:00:03",
+            "2015-06-14 00:00:03",
+            "2015-06-21 00:00:03",
+        ]
+        expected1 = DatetimeIndex(
+            expected1_list, dtype="datetime64[ns]", freq="QE-DEC", tz=None
+        )
+        expected2 = DatetimeIndex(
+            expected2_list, dtype="datetime64[ns]", freq="W-SUN", tz=None
+        )
+        tm.assert_index_equal(result1, expected1)
+        tm.assert_index_equal(result2, expected2)
+    dt1, dt2 = "2017-01-01", "2017-01-01"
+    tz1, tz2 = "US/Eastern", "Europe/London"
+    @pytest.mark.parametrize(
+        "start,end",
+        [
+            (Timestamp(dt1, tz=tz1), Timestamp(dt2)),
+            (Timestamp(dt1), Timestamp(dt2, tz=tz2)),
+            (Timestamp(dt1, tz=tz1), Timestamp(dt2, tz=tz2)),
+            (Timestamp(dt1, tz=tz2), Timestamp(dt2, tz=tz1)),
+        ],
+    )
+    def test_mismatching_tz_raises_err(self, start, end):
+        # issue 18488
+        msg = "Start and end cannot both be tz-aware with different timezones"
+        with pytest.raises(TypeError, match=msg):
+            date_range(start, end)
+        with pytest.raises(TypeError, match=msg):
+            date_range(start, end, freq=BDay())
+class TestBusinessDateRange:
+    def test_constructor(self):
+        bdate_range(START, END, freq=BDay())
+        bdate_range(START, periods=20, freq=BDay())
+        bdate_range(end=START, periods=20, freq=BDay())
+        msg = "periods must be a number, got B"
+        with pytest.raises(TypeError, match=msg):
+            date_range("2011-1-1", "2012-1-1", "B")
+        with pytest.raises(TypeError, match=msg):
+            bdate_range("2011-1-1", "2012-1-1", "B")
+        msg = "freq must be specified for bdate_range; use date_range instead"
+        with pytest.raises(TypeError, match=msg):
+            bdate_range(START, END, periods=10, freq=None)
+    def test_misc(self):
+        end = datetime(2009, 5, 13)
+        dr = bdate_range(end=end, periods=20)
+        firstDate = end - 19 * BDay()
+        assert len(dr) == 20
+        assert dr[0] == firstDate
+        assert dr[-1] == end
+    def test_date_parse_failure(self):
+        badly_formed_date = "2007/100/1"
+        msg = "Unknown datetime string format, unable to parse: 2007/100/1"
+        with pytest.raises(ValueError, match=msg):
+            Timestamp(badly_formed_date)
+        with pytest.raises(ValueError, match=msg):
+            bdate_range(start=badly_formed_date, periods=10)
+        with pytest.raises(ValueError, match=msg):
+            bdate_range(end=badly_formed_date, periods=10)
+        with pytest.raises(ValueError, match=msg):
+            bdate_range(badly_formed_date, badly_formed_date)
+    def test_daterange_bug_456(self):
+        # GH #456
+        rng1 = bdate_range("12/5/2011", "12/5/2011")
+        rng2 = bdate_range("12/2/2011", "12/5/2011")
+        assert rng2._data.freq == BDay()
+        result = rng1.union(rng2)
+        assert isinstance(result, DatetimeIndex)
+    @pytest.mark.parametrize("inclusive", ["left", "right", "neither", "both"])
+    def test_bdays_and_open_boundaries(self, inclusive):
+        # GH 6673
+        start = "2018-07-21"  # Saturday
+        end = "2018-07-29"  # Sunday
+        result = date_range(start, end, freq="B", inclusive=inclusive)
+        bday_start = "2018-07-23"  # Monday
+        bday_end = "2018-07-27"  # Friday
+        expected = date_range(bday_start, bday_end, freq="D")
+        tm.assert_index_equal(result, expected)
+        # Note: we do _not_ expect the freqs to match here
+    def test_bday_near_overflow(self):
+        # GH#24252 avoid doing unnecessary addition that _would_ overflow
+        start = Timestamp.max.floor("D").to_pydatetime()
+        rng = date_range(start, end=None, periods=1, freq="B")
+        expected = DatetimeIndex([start], freq="B").as_unit("ns")
+        tm.assert_index_equal(rng, expected)
+    def test_bday_overflow_error(self):
+        # GH#24252 check that we get OutOfBoundsDatetime and not OverflowError
+        msg = "Out of bounds nanosecond timestamp"
+        start = Timestamp.max.floor("D").to_pydatetime()
+        with pytest.raises(OutOfBoundsDatetime, match=msg):
+            date_range(start, periods=2, freq="B")
+class TestCustomDateRange:
+    def test_constructor(self):
+        bdate_range(START, END, freq=CDay())
+        bdate_range(START, periods=20, freq=CDay())
+        bdate_range(end=START, periods=20, freq=CDay())
+        msg = "periods must be a number, got C"
+        with pytest.raises(TypeError, match=msg):
+            date_range("2011-1-1", "2012-1-1", "C")
+        with pytest.raises(TypeError, match=msg):
+            bdate_range("2011-1-1", "2012-1-1", "C")
+    def test_misc(self):
+        end = datetime(2009, 5, 13)
+        dr = bdate_range(end=end, periods=20, freq="C")
+        firstDate = end - 19 * CDay()
+        assert len(dr) == 20
+        assert dr[0] == firstDate
+        assert dr[-1] == end
+    def test_daterange_bug_456(self):
+        # GH #456
+        rng1 = bdate_range("12/5/2011", "12/5/2011", freq="C")
+        rng2 = bdate_range("12/2/2011", "12/5/2011", freq="C")
+        assert rng2._data.freq == CDay()
+        result = rng1.union(rng2)
+        assert isinstance(result, DatetimeIndex)
+    def test_cdaterange(self, unit):
+        result = bdate_range("2013-05-01", periods=3, freq="C", unit=unit)
+        expected = DatetimeIndex(
+            ["2013-05-01", "2013-05-02", "2013-05-03"], dtype=f"M8[{unit}]", freq="C"
+        )
+        tm.assert_index_equal(result, expected)
+        assert result.freq == expected.freq
+    def test_cdaterange_weekmask(self, unit):
+        result = bdate_range(
+            "2013-05-01", periods=3, freq="C", weekmask="Sun Mon Tue Wed Thu", unit=unit
+        )
+        expected = DatetimeIndex(
+            ["2013-05-01", "2013-05-02", "2013-05-05"],
+            dtype=f"M8[{unit}]",
+            freq=result.freq,
+        )
+        tm.assert_index_equal(result, expected)
+        assert result.freq == expected.freq
+        # raise with non-custom freq
+        msg = (
+            "a custom frequency string is required when holidays or "
+            "weekmask are passed, got frequency B"
+        )
+        with pytest.raises(ValueError, match=msg):
+            bdate_range("2013-05-01", periods=3, weekmask="Sun Mon Tue Wed Thu")
+    def test_cdaterange_holidays(self, unit):
+        result = bdate_range(
+            "2013-05-01", periods=3, freq="C", holidays=["2013-05-01"], unit=unit
+        )
+        expected = DatetimeIndex(
+            ["2013-05-02", "2013-05-03", "2013-05-06"],
+            dtype=f"M8[{unit}]",
+            freq=result.freq,
+        )
+        tm.assert_index_equal(result, expected)
+        assert result.freq == expected.freq
+        # raise with non-custom freq
+        msg = (
+            "a custom frequency string is required when holidays or "
+            "weekmask are passed, got frequency B"
+        )
+        with pytest.raises(ValueError, match=msg):
+            bdate_range("2013-05-01", periods=3, holidays=["2013-05-01"])
+    def test_cdaterange_weekmask_and_holidays(self, unit):
+        result = bdate_range(
+            "2013-05-01",
+            periods=3,
+            freq="C",
+            weekmask="Sun Mon Tue Wed Thu",
+            holidays=["2013-05-01"],
+            unit=unit,
+        )
+        expected = DatetimeIndex(
+            ["2013-05-02", "2013-05-05", "2013-05-06"],
+            dtype=f"M8[{unit}]",
+            freq=result.freq,
+        )
+        tm.assert_index_equal(result, expected)
+        assert result.freq == expected.freq
+    def test_cdaterange_holidays_weekmask_requires_freqstr(self):
+        # raise with non-custom freq
+        msg = (
+            "a custom frequency string is required when holidays or "
+            "weekmask are passed, got frequency B"
+        )
+        with pytest.raises(ValueError, match=msg):
+            bdate_range(
+                "2013-05-01",
+                periods=3,
+                weekmask="Sun Mon Tue Wed Thu",
+                holidays=["2013-05-01"],
+            )
+    @pytest.mark.parametrize(
+        "freq", [freq for freq in prefix_mapping if freq.startswith("C")]
+    )
+    def test_all_custom_freq(self, freq):
+        # should not raise
+        bdate_range(
+            START, END, freq=freq, weekmask="Mon Wed Fri", holidays=["2009-03-14"]
+        )
+        bad_freq = freq + "FOO"
+        msg = f"invalid custom frequency string: {bad_freq}"
+        with pytest.raises(ValueError, match=msg):
+            bdate_range(START, END, freq=bad_freq)
+    @pytest.mark.parametrize(
+        "start_end",
+        [
+            ("2018-01-01T00:00:01.000Z", "2018-01-03T00:00:01.000Z"),
+            ("2018-01-01T00:00:00.010Z", "2018-01-03T00:00:00.010Z"),
+            ("2001-01-01T00:00:00.010Z", "2001-01-03T00:00:00.010Z"),
+        ],
+    )
+    def test_range_with_millisecond_resolution(self, start_end):
+        # https://github.com/pandas-dev/pandas/issues/24110
+        start, end = start_end
+        result = date_range(start=start, end=end, periods=2, inclusive="left")
+        expected = DatetimeIndex([start], dtype="M8[ns, UTC]")
+        tm.assert_index_equal(result, expected)
+    @pytest.mark.parametrize(
+        "start,period,expected",
+        [
+            ("2022-07-23 00:00:00+02:00", 1, ["2022-07-25 00:00:00+02:00"]),
+            ("2022-07-22 00:00:00+02:00", 1, ["2022-07-22 00:00:00+02:00"]),
+            (
+                "2022-07-22 00:00:00+02:00",
+                2,
+                ["2022-07-22 00:00:00+02:00", "2022-07-25 00:00:00+02:00"],
+            ),
+        ],
+    )
+    def test_range_with_timezone_and_custombusinessday(self, start, period, expected):
+        # GH49441
+        result = date_range(start=start, periods=period, freq="C")
+        expected = DatetimeIndex(expected).as_unit("ns")
+        tm.assert_index_equal(result, expected)
+class TestDateRangeNonNano:
+    def test_date_range_reso_validation(self):
+        msg = "'unit' must be one of 's', 'ms', 'us', 'ns'"
+        with pytest.raises(ValueError, match=msg):
+            date_range("2016-01-01", "2016-03-04", periods=3, unit="h")
+    def test_date_range_freq_higher_than_reso(self):
+        # freq being higher-resolution than reso is a problem
+        msg = "Use a lower freq or a higher unit instead"
+        with pytest.raises(ValueError, match=msg):
+            #    # TODO give a more useful or informative message?
+            date_range("2016-01-01", "2016-01-02", freq="ns", unit="ms")
+    def test_date_range_freq_matches_reso(self):
+        # GH#49106 matching reso is OK
+        dti = date_range("2016-01-01", "2016-01-01 00:00:01", freq="ms", unit="ms")
+        rng = np.arange(1_451_606_400_000, 1_451_606_401_001, dtype=np.int64)
+        expected = DatetimeIndex(rng.view("M8[ms]"), freq="ms")
+        tm.assert_index_equal(dti, expected)
+        dti = date_range("2016-01-01", "2016-01-01 00:00:01", freq="us", unit="us")
+        rng = np.arange(1_451_606_400_000_000, 1_451_606_401_000_001, dtype=np.int64)
+        expected = DatetimeIndex(rng.view("M8[us]"), freq="us")
+        tm.assert_index_equal(dti, expected)
+        dti = date_range("2016-01-01", "2016-01-01 00:00:00.001", freq="ns", unit="ns")
+        rng = np.arange(
+            1_451_606_400_000_000_000, 1_451_606_400_001_000_001, dtype=np.int64
+        )
+        expected = DatetimeIndex(rng.view("M8[ns]"), freq="ns")
+        tm.assert_index_equal(dti, expected)
+    def test_date_range_freq_lower_than_endpoints(self):
+        start = Timestamp("2022-10-19 11:50:44.719781")
+        end = Timestamp("2022-10-19 11:50:47.066458")
+        # start and end cannot be cast to "s" unit without lossy rounding,
+        #  so we do not allow this in date_range
+        with pytest.raises(ValueError, match="Cannot losslessly convert units"):
+            date_range(start, end, periods=3, unit="s")
+        # but we can losslessly cast to "us"
+        dti = date_range(start, end, periods=2, unit="us")
+        rng = np.array(
+            [start.as_unit("us")._value, end.as_unit("us")._value], dtype=np.int64
+        )
+        expected = DatetimeIndex(rng.view("M8[us]"))
+        tm.assert_index_equal(dti, expected)
+    def test_date_range_non_nano(self):
+        start = np.datetime64("1066-10-14")  # Battle of Hastings
+        end = np.datetime64("2305-07-13")  # Jean-Luc Picard's birthday
+        dti = date_range(start, end, freq="D", unit="s")
+        assert dti.freq == "D"
+        assert dti.dtype == "M8[s]"
+        exp = np.arange(
+            start.astype("M8[s]").view("i8"),
+            (end + 1).astype("M8[s]").view("i8"),
+            24 * 3600,
+        ).view("M8[s]")
+        tm.assert_numpy_array_equal(dti.to_numpy(), exp)
+class TestDateRangeNonTickFreq:
+    # Tests revolving around less-common (non-Tick) `freq` keywords.
+    def test_date_range_custom_business_month_begin(self, unit):
+        hcal = USFederalHolidayCalendar()
+        freq = offsets.CBMonthBegin(calendar=hcal)
+        dti = date_range(start="20120101", end="20130101", freq=freq, unit=unit)
+        assert all(freq.is_on_offset(x) for x in dti)
+        expected = DatetimeIndex(
+            [
+                "2012-01-03",
+                "2012-02-01",
+                "2012-03-01",
+                "2012-04-02",
+                "2012-05-01",
+                "2012-06-01",
+                "2012-07-02",
+                "2012-08-01",
+                "2012-09-04",
+                "2012-10-01",
+                "2012-11-01",
+                "2012-12-03",
+            ],
+            dtype=f"M8[{unit}]",
+            freq=freq,
+        )
+        tm.assert_index_equal(dti, expected)
+    def test_date_range_custom_business_month_end(self, unit):
+        hcal = USFederalHolidayCalendar()
+        freq = offsets.CBMonthEnd(calendar=hcal)
+        dti = date_range(start="20120101", end="20130101", freq=freq, unit=unit)
+        assert all(freq.is_on_offset(x) for x in dti)
+        expected = DatetimeIndex(
+            [
+                "2012-01-31",
+                "2012-02-29",
+                "2012-03-30",
+                "2012-04-30",
+                "2012-05-31",
+                "2012-06-29",
+                "2012-07-31",
+                "2012-08-31",
+                "2012-09-28",
+                "2012-10-31",
+                "2012-11-30",
+                "2012-12-31",
+            ],
+            dtype=f"M8[{unit}]",
+            freq=freq,
+        )
+        tm.assert_index_equal(dti, expected)
+    def test_date_range_with_custom_holidays(self, unit):
+        # GH#30593
+        freq = offsets.CustomBusinessHour(start="15:00", holidays=["2020-11-26"])
+        result = date_range(start="2020-11-25 15:00", periods=4, freq=freq, unit=unit)
+        expected = DatetimeIndex(
+            [
+                "2020-11-25 15:00:00",
+                "2020-11-25 16:00:00",
+                "2020-11-27 15:00:00",
+                "2020-11-27 16:00:00",
+            ],
+            dtype=f"M8[{unit}]",
+            freq=freq,
+        )
+        tm.assert_index_equal(result, expected)
+    def test_date_range_businesshour(self, unit):
+        idx = DatetimeIndex(
+            [
+                "2014-07-04 09:00",
+                "2014-07-04 10:00",
+                "2014-07-04 11:00",
+                "2014-07-04 12:00",
+                "2014-07-04 13:00",
+                "2014-07-04 14:00",
+                "2014-07-04 15:00",
+                "2014-07-04 16:00",
+            ],
+            dtype=f"M8[{unit}]",
+            freq="bh",
+        )
+        rng = date_range("2014-07-04 09:00", "2014-07-04 16:00", freq="bh", unit=unit)
+        tm.assert_index_equal(idx, rng)
+        idx = DatetimeIndex(
+            ["2014-07-04 16:00", "2014-07-07 09:00"], dtype=f"M8[{unit}]", freq="bh"
+        )
+        rng = date_range("2014-07-04 16:00", "2014-07-07 09:00", freq="bh", unit=unit)
+        tm.assert_index_equal(idx, rng)
+        idx = DatetimeIndex(
+            [
+                "2014-07-04 09:00",
+                "2014-07-04 10:00",
+                "2014-07-04 11:00",
+                "2014-07-04 12:00",
+                "2014-07-04 13:00",
+                "2014-07-04 14:00",
+                "2014-07-04 15:00",
+                "2014-07-04 16:00",
+                "2014-07-07 09:00",
+                "2014-07-07 10:00",
+                "2014-07-07 11:00",
+                "2014-07-07 12:00",
+                "2014-07-07 13:00",
+                "2014-07-07 14:00",
+                "2014-07-07 15:00",
+                "2014-07-07 16:00",
+                "2014-07-08 09:00",
+                "2014-07-08 10:00",
+                "2014-07-08 11:00",
+                "2014-07-08 12:00",
+                "2014-07-08 13:00",
+                "2014-07-08 14:00",
+                "2014-07-08 15:00",
+                "2014-07-08 16:00",
+            ],
+            dtype=f"M8[{unit}]",
+            freq="bh",
+        )
+        rng = date_range("2014-07-04 09:00", "2014-07-08 16:00", freq="bh", unit=unit)
+        tm.assert_index_equal(idx, rng)
+    def test_date_range_business_hour2(self, unit):
+        idx1 = date_range(
+            start="2014-07-04 15:00", end="2014-07-08 10:00", freq="bh", unit=unit
+        )
+        idx2 = date_range(start="2014-07-04 15:00", periods=12, freq="bh", unit=unit)
+        idx3 = date_range(end="2014-07-08 10:00", periods=12, freq="bh", unit=unit)
+        expected = DatetimeIndex(
+            [
+                "2014-07-04 15:00",
+                "2014-07-04 16:00",
+                "2014-07-07 09:00",
+                "2014-07-07 10:00",
+                "2014-07-07 11:00",
+                "2014-07-07 12:00",
+                "2014-07-07 13:00",
+                "2014-07-07 14:00",
+                "2014-07-07 15:00",
+                "2014-07-07 16:00",
+                "2014-07-08 09:00",
+                "2014-07-08 10:00",
+            ],
+            dtype=f"M8[{unit}]",
+            freq="bh",
+        )
+        tm.assert_index_equal(idx1, expected)
+        tm.assert_index_equal(idx2, expected)
+        tm.assert_index_equal(idx3, expected)
+        idx4 = date_range(
+            start="2014-07-04 15:45", end="2014-07-08 10:45", freq="bh", unit=unit
+        )
+        idx5 = date_range(start="2014-07-04 15:45", periods=12, freq="bh", unit=unit)
+        idx6 = date_range(end="2014-07-08 10:45", periods=12, freq="bh", unit=unit)
+        expected2 = expected + Timedelta(minutes=45).as_unit(unit)
+        expected2.freq = "bh"
+        tm.assert_index_equal(idx4, expected2)
+        tm.assert_index_equal(idx5, expected2)
+        tm.assert_index_equal(idx6, expected2)
+    def test_date_range_business_hour_short(self, unit):
+        # GH#49835
+        idx4 = date_range(start="2014-07-01 10:00", freq="bh", periods=1, unit=unit)
+        expected4 = DatetimeIndex(["2014-07-01 10:00"], dtype=f"M8[{unit}]", freq="bh")
+        tm.assert_index_equal(idx4, expected4)
+    def test_date_range_year_start(self, unit):
+        # see GH#9313
+        rng = date_range("1/1/2013", "7/1/2017", freq="YS", unit=unit)
+        exp = DatetimeIndex(
+            ["2013-01-01", "2014-01-01", "2015-01-01", "2016-01-01", "2017-01-01"],
+            dtype=f"M8[{unit}]",
+            freq="YS",
+        )
+        tm.assert_index_equal(rng, exp)
+    def test_date_range_year_end(self, unit):
+        # see GH#9313
+        rng = date_range("1/1/2013", "7/1/2017", freq="YE", unit=unit)
+        exp = DatetimeIndex(
+            ["2013-12-31", "2014-12-31", "2015-12-31", "2016-12-31"],
+            dtype=f"M8[{unit}]",
+            freq="YE",
+        )
+        tm.assert_index_equal(rng, exp)
+    def test_date_range_negative_freq_year_end(self, unit):
+        # GH#11018
+        rng = date_range("2011-12-31", freq="-2YE", periods=3, unit=unit)
+        exp = DatetimeIndex(
+            ["2011-12-31", "2009-12-31", "2007-12-31"], dtype=f"M8[{unit}]", freq="-2YE"
+        )
+        tm.assert_index_equal(rng, exp)
+        assert rng.freq == "-2YE"
+    def test_date_range_business_year_end_year(self, unit):
+        # see GH#9313
+        rng = date_range("1/1/2013", "7/1/2017", freq="BYE", unit=unit)
+        exp = DatetimeIndex(
+            ["2013-12-31", "2014-12-31", "2015-12-31", "2016-12-30"],
+            dtype=f"M8[{unit}]",
+            freq="BYE",
+        )
+        tm.assert_index_equal(rng, exp)
+    def test_date_range_bms(self, unit):
+        # GH#1645
+        result = date_range("1/1/2000", periods=10, freq="BMS", unit=unit)
+        expected = DatetimeIndex(
+            [
+                "2000-01-03",
+                "2000-02-01",
+                "2000-03-01",
+                "2000-04-03",
+                "2000-05-01",
+                "2000-06-01",
+                "2000-07-03",
+                "2000-08-01",
+                "2000-09-01",
+                "2000-10-02",
+            ],
+            dtype=f"M8[{unit}]",
+            freq="BMS",
+        )
+        tm.assert_index_equal(result, expected)
+    def test_date_range_semi_month_begin(self, unit):
+        dates = [
+            datetime(2007, 12, 15),
+            datetime(2008, 1, 1),
+            datetime(2008, 1, 15),
+            datetime(2008, 2, 1),
+            datetime(2008, 2, 15),
+            datetime(2008, 3, 1),
+            datetime(2008, 3, 15),
+            datetime(2008, 4, 1),
+            datetime(2008, 4, 15),
+            datetime(2008, 5, 1),
+            datetime(2008, 5, 15),
+            datetime(2008, 6, 1),
+            datetime(2008, 6, 15),
+            datetime(2008, 7, 1),
+            datetime(2008, 7, 15),
+            datetime(2008, 8, 1),
+            datetime(2008, 8, 15),
+            datetime(2008, 9, 1),
+            datetime(2008, 9, 15),
+            datetime(2008, 10, 1),
+            datetime(2008, 10, 15),
+            datetime(2008, 11, 1),
+            datetime(2008, 11, 15),
+            datetime(2008, 12, 1),
+            datetime(2008, 12, 15),
+        ]
+        # ensure generating a range with DatetimeIndex gives same result
+        result = date_range(start=dates[0], end=dates[-1], freq="SMS", unit=unit)
+        exp = DatetimeIndex(dates, dtype=f"M8[{unit}]", freq="SMS")
+        tm.assert_index_equal(result, exp)
+    def test_date_range_semi_month_end(self, unit):
+        dates = [
+            datetime(2007, 12, 31),
+            datetime(2008, 1, 15),
+            datetime(2008, 1, 31),
+            datetime(2008, 2, 15),
+            datetime(2008, 2, 29),
+            datetime(2008, 3, 15),
+            datetime(2008, 3, 31),
+            datetime(2008, 4, 15),
+            datetime(2008, 4, 30),
+            datetime(2008, 5, 15),
+            datetime(2008, 5, 31),
+            datetime(2008, 6, 15),
+            datetime(2008, 6, 30),
+            datetime(2008, 7, 15),
+            datetime(2008, 7, 31),
+            datetime(2008, 8, 15),
+            datetime(2008, 8, 31),
+            datetime(2008, 9, 15),
+            datetime(2008, 9, 30),
+            datetime(2008, 10, 15),
+            datetime(2008, 10, 31),
+            datetime(2008, 11, 15),
+            datetime(2008, 11, 30),
+            datetime(2008, 12, 15),
+            datetime(2008, 12, 31),
+        ]
+        # ensure generating a range with DatetimeIndex gives same result
+        result = date_range(start=dates[0], end=dates[-1], freq="SME", unit=unit)
+        exp = DatetimeIndex(dates, dtype=f"M8[{unit}]", freq="SME")
+        tm.assert_index_equal(result, exp)
+    def test_date_range_week_of_month(self, unit):
+        # GH#20517
+        # Note the start here is not on_offset for this freq
+        result = date_range(start="20110101", periods=1, freq="WOM-1MON", unit=unit)
+        expected = DatetimeIndex(["2011-01-03"], dtype=f"M8[{unit}]", freq="WOM-1MON")
+        tm.assert_index_equal(result, expected)
+        result2 = date_range(start="20110101", periods=2, freq="WOM-1MON", unit=unit)
+        expected2 = DatetimeIndex(
+            ["2011-01-03", "2011-02-07"], dtype=f"M8[{unit}]", freq="WOM-1MON"
+        )
+        tm.assert_index_equal(result2, expected2)
+    def test_date_range_week_of_month2(self, unit):
+        # GH#5115, GH#5348
+        result = date_range("2013-1-1", periods=4, freq="WOM-1SAT", unit=unit)
+        expected = DatetimeIndex(
+            ["2013-01-05", "2013-02-02", "2013-03-02", "2013-04-06"],
+            dtype=f"M8[{unit}]",
+            freq="WOM-1SAT",
+        )
+        tm.assert_index_equal(result, expected)
+    def test_date_range_negative_freq_month_end(self, unit):
+        # GH#11018
+        rng = date_range("2011-01-31", freq="-2ME", periods=3, unit=unit)
+        exp = DatetimeIndex(
+            ["2011-01-31", "2010-11-30", "2010-09-30"], dtype=f"M8[{unit}]", freq="-2ME"
+        )
+        tm.assert_index_equal(rng, exp)
+        assert rng.freq == "-2ME"
+    def test_date_range_fy5253(self, unit):
+        freq = offsets.FY5253(startingMonth=1, weekday=3, variation="nearest")
+        dti = date_range(
+            start="2013-01-01",
+            periods=2,
+            freq=freq,
+            unit=unit,
+        )
+        expected = DatetimeIndex(
+            ["2013-01-31", "2014-01-30"], dtype=f"M8[{unit}]", freq=freq
+        )
+        tm.assert_index_equal(dti, expected)
+    @pytest.mark.parametrize(
+        "freqstr,offset",
+        [
+            ("QS", offsets.QuarterBegin(startingMonth=1)),
+            ("BQE", offsets.BQuarterEnd(startingMonth=12)),
+            ("W-SUN", offsets.Week(weekday=6)),
+        ],
+    )
+    def test_date_range_freqstr_matches_offset(self, freqstr, offset):
+        sdate = datetime(1999, 12, 25)
+        edate = datetime(2000, 1, 1)
+        idx1 = date_range(start=sdate, end=edate, freq=freqstr)
+        idx2 = date_range(start=sdate, end=edate, freq=offset)
+        assert len(idx1) == len(idx2)
+        assert idx1.freq == idx2.freq

py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_datetime.py ADDED Viewed

	@@ -0,0 +1,216 @@

+import datetime as dt
+from datetime import date
+import re
+import numpy as np
+import pytest
+from pandas.compat.numpy import np_long
+import pandas as pd
+from pandas import (
+    DataFrame,
+    DatetimeIndex,
+    Index,
+    Timestamp,
+    date_range,
+    offsets,
+)
+import pandas._testing as tm
+class TestDatetimeIndex:
+    def test_is_(self):
+        dti = date_range(start="1/1/2005", end="12/1/2005", freq="ME")
+        assert dti.is_(dti)
+        assert dti.is_(dti.view())
+        assert not dti.is_(dti.copy())
+    def test_time_overflow_for_32bit_machines(self):
+        # GH8943.  On some machines NumPy defaults to np.int32 (for example,
+        # 32-bit Linux machines).  In the function _generate_regular_range
+        # found in tseries/index.py, `periods` gets multiplied by `strides`
+        # (which has value 1e9) and since the max value for np.int32 is ~2e9,
+        # and since those machines won't promote np.int32 to np.int64, we get
+        # overflow.
+        periods = np_long(1000)
+        idx1 = date_range(start="2000", periods=periods, freq="s")
+        assert len(idx1) == periods
+        idx2 = date_range(end="2000", periods=periods, freq="s")
+        assert len(idx2) == periods
+    def test_nat(self):
+        assert DatetimeIndex([np.nan])[0] is pd.NaT
+    def test_week_of_month_frequency(self):
+        # GH 5348: "ValueError: Could not evaluate WOM-1SUN" shouldn't raise
+        d1 = date(2002, 9, 1)
+        d2 = date(2013, 10, 27)
+        d3 = date(2012, 9, 30)
+        idx1 = DatetimeIndex([d1, d2])
+        idx2 = DatetimeIndex([d3])
+        result_append = idx1.append(idx2)
+        expected = DatetimeIndex([d1, d2, d3])
+        tm.assert_index_equal(result_append, expected)
+        result_union = idx1.union(idx2)
+        expected = DatetimeIndex([d1, d3, d2])
+        tm.assert_index_equal(result_union, expected)
+    def test_append_nondatetimeindex(self):
+        rng = date_range("1/1/2000", periods=10)
+        idx = Index(["a", "b", "c", "d"])
+        result = rng.append(idx)
+        assert isinstance(result[0], Timestamp)
+    def test_misc_coverage(self):
+        rng = date_range("1/1/2000", periods=5)
+        result = rng.groupby(rng.day)
+        assert isinstance(next(iter(result.values()))[0], Timestamp)
+    # TODO: belongs in frame groupby tests?
+    def test_groupby_function_tuple_1677(self):
+        df = DataFrame(
+            np.random.default_rng(2).random(100),
+            index=date_range("1/1/2000", periods=100),
+        )
+        monthly_group = df.groupby(lambda x: (x.year, x.month))
+        result = monthly_group.mean()
+        assert isinstance(result.index[0], tuple)
+    def assert_index_parameters(self, index):
+        assert index.freq == "40960ns"
+        assert index.inferred_freq == "40960ns"
+    def test_ns_index(self):
+        nsamples = 400
+        ns = int(1e9 / 24414)
+        dtstart = np.datetime64("2012-09-20T00:00:00")
+        dt = dtstart + np.arange(nsamples) * np.timedelta64(ns, "ns")
+        freq = ns * offsets.Nano()
+        index = DatetimeIndex(dt, freq=freq, name="time")
+        self.assert_index_parameters(index)
+        new_index = date_range(start=index[0], end=index[-1], freq=index.freq)
+        self.assert_index_parameters(new_index)
+    def test_asarray_tz_naive(self):
+        # This shouldn't produce a warning.
+        idx = date_range("2000", periods=2)
+        # M8[ns] by default
+        result = np.asarray(idx)
+        expected = np.array(["2000-01-01", "2000-01-02"], dtype="M8[ns]")
+        tm.assert_numpy_array_equal(result, expected)
+        # optionally, object
+        result = np.asarray(idx, dtype=object)
+        expected = np.array([Timestamp("2000-01-01"), Timestamp("2000-01-02")])
+        tm.assert_numpy_array_equal(result, expected)
+    def test_asarray_tz_aware(self):
+        tz = "US/Central"
+        idx = date_range("2000", periods=2, tz=tz)
+        expected = np.array(["2000-01-01T06", "2000-01-02T06"], dtype="M8[ns]")
+        result = np.asarray(idx, dtype="datetime64[ns]")
+        tm.assert_numpy_array_equal(result, expected)
+        # Old behavior with no warning
+        result = np.asarray(idx, dtype="M8[ns]")
+        tm.assert_numpy_array_equal(result, expected)
+        # Future behavior with no warning
+        expected = np.array(
+            [Timestamp("2000-01-01", tz=tz), Timestamp("2000-01-02", tz=tz)]
+        )
+        result = np.asarray(idx, dtype=object)
+        tm.assert_numpy_array_equal(result, expected)
+    def test_CBH_deprecated(self):
+        msg = "'CBH' is deprecated and will be removed in a future version."
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            expected = date_range(
+                dt.datetime(2022, 12, 11), dt.datetime(2022, 12, 13), freq="CBH"
+            )
+        result = DatetimeIndex(
+            [
+                "2022-12-12 09:00:00",
+                "2022-12-12 10:00:00",
+                "2022-12-12 11:00:00",
+                "2022-12-12 12:00:00",
+                "2022-12-12 13:00:00",
+                "2022-12-12 14:00:00",
+                "2022-12-12 15:00:00",
+                "2022-12-12 16:00:00",
+            ],
+            dtype="datetime64[ns]",
+            freq="cbh",
+        )
+        tm.assert_index_equal(result, expected)
+    @pytest.mark.parametrize(
+        "freq_depr, expected_values, expected_freq",
+        [
+            (
+                "AS-AUG",
+                ["2021-08-01", "2022-08-01", "2023-08-01"],
+                "YS-AUG",
+            ),
+            (
+                "1BAS-MAY",
+                ["2021-05-03", "2022-05-02", "2023-05-01"],
+                "1BYS-MAY",
+            ),
+        ],
+    )
+    def test_AS_BAS_deprecated(self, freq_depr, expected_values, expected_freq):
+        # GH#55479
+        freq_msg = re.split("[0-9]*", freq_depr, maxsplit=1)[1]
+        msg = f"'{freq_msg}' is deprecated and will be removed in a future version."
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            expected = date_range(
+                dt.datetime(2020, 12, 1), dt.datetime(2023, 12, 1), freq=freq_depr
+            )
+        result = DatetimeIndex(
+            expected_values,
+            dtype="datetime64[ns]",
+            freq=expected_freq,
+        )
+        tm.assert_index_equal(result, expected)
+    @pytest.mark.parametrize(
+        "freq, expected_values, freq_depr",
+        [
+            ("2BYE-MAR", ["2016-03-31"], "2BA-MAR"),
+            ("2BYE-JUN", ["2016-06-30"], "2BY-JUN"),
+            ("2BME", ["2016-02-29", "2016-04-29", "2016-06-30"], "2BM"),
+            ("2BQE", ["2016-03-31"], "2BQ"),
+            ("1BQE-MAR", ["2016-03-31", "2016-06-30"], "1BQ-MAR"),
+        ],
+    )
+    def test_BM_BQ_BY_deprecated(self, freq, expected_values, freq_depr):
+        # GH#52064
+        msg = f"'{freq_depr[1:]}' is deprecated and will be removed "
+        f"in a future version, please use '{freq[1:]}' instead."
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            expected = date_range(start="2016-02-21", end="2016-08-21", freq=freq_depr)
+        result = DatetimeIndex(
+            data=expected_values,
+            dtype="datetime64[ns]",
+            freq=freq,
+        )
+        tm.assert_index_equal(result, expected)

py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_formats.py ADDED Viewed

	@@ -0,0 +1,356 @@

+from datetime import datetime
+import dateutil.tz
+import numpy as np
+import pytest
+import pytz
+import pandas as pd
+from pandas import (
+    DatetimeIndex,
+    NaT,
+    Series,
+)
+import pandas._testing as tm
+@pytest.fixture(params=["s", "ms", "us", "ns"])
+def unit(request):
+    return request.param
+def test_get_values_for_csv():
+    index = pd.date_range(freq="1D", periods=3, start="2017-01-01")
+    # First, with no arguments.
+    expected = np.array(["2017-01-01", "2017-01-02", "2017-01-03"], dtype=object)
+    result = index._get_values_for_csv()
+    tm.assert_numpy_array_equal(result, expected)
+    # No NaN values, so na_rep has no effect
+    result = index._get_values_for_csv(na_rep="pandas")
+    tm.assert_numpy_array_equal(result, expected)
+    # Make sure date formatting works
+    expected = np.array(["01-2017-01", "01-2017-02", "01-2017-03"], dtype=object)
+    result = index._get_values_for_csv(date_format="%m-%Y-%d")
+    tm.assert_numpy_array_equal(result, expected)
+    # NULL object handling should work
+    index = DatetimeIndex(["2017-01-01", NaT, "2017-01-03"])
+    expected = np.array(["2017-01-01", "NaT", "2017-01-03"], dtype=object)
+    result = index._get_values_for_csv(na_rep="NaT")
+    tm.assert_numpy_array_equal(result, expected)
+    expected = np.array(["2017-01-01", "pandas", "2017-01-03"], dtype=object)
+    result = index._get_values_for_csv(na_rep="pandas")
+    tm.assert_numpy_array_equal(result, expected)
+    result = index._get_values_for_csv(na_rep="NaT", date_format="%Y-%m-%d %H:%M:%S.%f")
+    expected = np.array(
+        ["2017-01-01 00:00:00.000000", "NaT", "2017-01-03 00:00:00.000000"],
+        dtype=object,
+    )
+    tm.assert_numpy_array_equal(result, expected)
+    # invalid format
+    result = index._get_values_for_csv(na_rep="NaT", date_format="foo")
+    expected = np.array(["foo", "NaT", "foo"], dtype=object)
+    tm.assert_numpy_array_equal(result, expected)
+class TestDatetimeIndexRendering:
+    @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
+    def test_dti_with_timezone_repr(self, tzstr):
+        rng = pd.date_range("4/13/2010", "5/6/2010")
+        rng_eastern = rng.tz_localize(tzstr)
+        rng_repr = repr(rng_eastern)
+        assert "2010-04-13 00:00:00" in rng_repr
+    def test_dti_repr_dates(self):
+        text = str(pd.to_datetime([datetime(2013, 1, 1), datetime(2014, 1, 1)]))
+        assert "['2013-01-01'," in text
+        assert ", '2014-01-01']" in text
+    def test_dti_repr_mixed(self):
+        text = str(
+            pd.to_datetime(
+                [datetime(2013, 1, 1), datetime(2014, 1, 1, 12), datetime(2014, 1, 1)]
+            )
+        )
+        assert "'2013-01-01 00:00:00'," in text
+        assert "'2014-01-01 00:00:00']" in text
+    def test_dti_repr_short(self):
+        dr = pd.date_range(start="1/1/2012", periods=1)
+        repr(dr)
+        dr = pd.date_range(start="1/1/2012", periods=2)
+        repr(dr)
+        dr = pd.date_range(start="1/1/2012", periods=3)
+        repr(dr)
+    @pytest.mark.parametrize(
+        "dates, freq, expected_repr",
+        [
+            (
+                ["2012-01-01 00:00:00"],
+                "60min",
+                (
+                    "DatetimeIndex(['2012-01-01 00:00:00'], "
+                    "dtype='datetime64[ns]', freq='60min')"
+                ),
+            ),
+            (
+                ["2012-01-01 00:00:00", "2012-01-01 01:00:00"],
+                "60min",
+                "DatetimeIndex(['2012-01-01 00:00:00', '2012-01-01 01:00:00'], "
+                "dtype='datetime64[ns]', freq='60min')",
+            ),
+            (
+                ["2012-01-01"],
+                "24h",
+                "DatetimeIndex(['2012-01-01'], dtype='datetime64[ns]', freq='24h')",
+            ),
+        ],
+    )
+    def test_dti_repr_time_midnight(self, dates, freq, expected_repr, unit):
+        # GH53634
+        dti = DatetimeIndex(dates, freq).as_unit(unit)
+        actual_repr = repr(dti)
+        assert actual_repr == expected_repr.replace("[ns]", f"[{unit}]")
+    def test_dti_representation(self, unit):
+        idxs = []
+        idxs.append(DatetimeIndex([], freq="D"))
+        idxs.append(DatetimeIndex(["2011-01-01"], freq="D"))
+        idxs.append(DatetimeIndex(["2011-01-01", "2011-01-02"], freq="D"))
+        idxs.append(DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D"))
+        idxs.append(
+            DatetimeIndex(
+                ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
+                freq="h",
+                tz="Asia/Tokyo",
+            )
+        )
+        idxs.append(
+            DatetimeIndex(
+                ["2011-01-01 09:00", "2011-01-01 10:00", NaT], tz="US/Eastern"
+            )
+        )
+        idxs.append(
+            DatetimeIndex(["2011-01-01 09:00", "2011-01-01 10:00", NaT], tz="UTC")
+        )
+        exp = []
+        exp.append("DatetimeIndex([], dtype='datetime64[ns]', freq='D')")
+        exp.append("DatetimeIndex(['2011-01-01'], dtype='datetime64[ns]', freq='D')")
+        exp.append(
+            "DatetimeIndex(['2011-01-01', '2011-01-02'], "
+            "dtype='datetime64[ns]', freq='D')"
+        )
+        exp.append(
+            "DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], "
+            "dtype='datetime64[ns]', freq='D')"
+        )
+        exp.append(
+            "DatetimeIndex(['2011-01-01 09:00:00+09:00', "
+            "'2011-01-01 10:00:00+09:00', '2011-01-01 11:00:00+09:00']"
+            ", dtype='datetime64[ns, Asia/Tokyo]', freq='h')"
+        )
+        exp.append(
+            "DatetimeIndex(['2011-01-01 09:00:00-05:00', "
+            "'2011-01-01 10:00:00-05:00', 'NaT'], "
+            "dtype='datetime64[ns, US/Eastern]', freq=None)"
+        )
+        exp.append(
+            "DatetimeIndex(['2011-01-01 09:00:00+00:00', "
+            "'2011-01-01 10:00:00+00:00', 'NaT'], "
+            "dtype='datetime64[ns, UTC]', freq=None)"
+            ""
+        )
+        with pd.option_context("display.width", 300):
+            for index, expected in zip(idxs, exp):
+                index = index.as_unit(unit)
+                expected = expected.replace("[ns", f"[{unit}")
+                result = repr(index)
+                assert result == expected
+                result = str(index)
+                assert result == expected
+    # TODO: this is a Series.__repr__ test
+    def test_dti_representation_to_series(self, unit):
+        idx1 = DatetimeIndex([], freq="D")
+        idx2 = DatetimeIndex(["2011-01-01"], freq="D")
+        idx3 = DatetimeIndex(["2011-01-01", "2011-01-02"], freq="D")
+        idx4 = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D")
+        idx5 = DatetimeIndex(
+            ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
+            freq="h",
+            tz="Asia/Tokyo",
+        )
+        idx6 = DatetimeIndex(
+            ["2011-01-01 09:00", "2011-01-01 10:00", NaT], tz="US/Eastern"
+        )
+        idx7 = DatetimeIndex(["2011-01-01 09:00", "2011-01-02 10:15"])
+        exp1 = """Series([], dtype: datetime64[ns])"""
+        exp2 = "0   2011-01-01\ndtype: datetime64[ns]"
+        exp3 = "0   2011-01-01\n1   2011-01-02\ndtype: datetime64[ns]"
+        exp4 = (
+            "0   2011-01-01\n"
+            "1   2011-01-02\n"
+            "2   2011-01-03\n"
+            "dtype: datetime64[ns]"
+        )
+        exp5 = (
+            "0   2011-01-01 09:00:00+09:00\n"
+            "1   2011-01-01 10:00:00+09:00\n"
+            "2   2011-01-01 11:00:00+09:00\n"
+            "dtype: datetime64[ns, Asia/Tokyo]"
+        )
+        exp6 = (
+            "0   2011-01-01 09:00:00-05:00\n"
+            "1   2011-01-01 10:00:00-05:00\n"
+            "2                         NaT\n"
+            "dtype: datetime64[ns, US/Eastern]"
+        )
+        exp7 = (
+            "0   2011-01-01 09:00:00\n"
+            "1   2011-01-02 10:15:00\n"
+            "dtype: datetime64[ns]"
+        )
+        with pd.option_context("display.width", 300):
+            for idx, expected in zip(
+                [idx1, idx2, idx3, idx4, idx5, idx6, idx7],
+                [exp1, exp2, exp3, exp4, exp5, exp6, exp7],
+            ):
+                ser = Series(idx.as_unit(unit))
+                result = repr(ser)
+                assert result == expected.replace("[ns", f"[{unit}")
+    def test_dti_summary(self):
+        # GH#9116
+        idx1 = DatetimeIndex([], freq="D")
+        idx2 = DatetimeIndex(["2011-01-01"], freq="D")
+        idx3 = DatetimeIndex(["2011-01-01", "2011-01-02"], freq="D")
+        idx4 = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D")
+        idx5 = DatetimeIndex(
+            ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
+            freq="h",
+            tz="Asia/Tokyo",
+        )
+        idx6 = DatetimeIndex(
+            ["2011-01-01 09:00", "2011-01-01 10:00", NaT], tz="US/Eastern"
+        )
+        exp1 = "DatetimeIndex: 0 entries\nFreq: D"
+        exp2 = "DatetimeIndex: 1 entries, 2011-01-01 to 2011-01-01\nFreq: D"
+        exp3 = "DatetimeIndex: 2 entries, 2011-01-01 to 2011-01-02\nFreq: D"
+        exp4 = "DatetimeIndex: 3 entries, 2011-01-01 to 2011-01-03\nFreq: D"
+        exp5 = (
+            "DatetimeIndex: 3 entries, 2011-01-01 09:00:00+09:00 "
+            "to 2011-01-01 11:00:00+09:00\n"
+            "Freq: h"
+        )
+        exp6 = """DatetimeIndex: 3 entries, 2011-01-01 09:00:00-05:00 to NaT"""
+        for idx, expected in zip(
+            [idx1, idx2, idx3, idx4, idx5, idx6], [exp1, exp2, exp3, exp4, exp5, exp6]
+        ):
+            result = idx._summary()
+            assert result == expected
+    @pytest.mark.parametrize("tz", [None, pytz.utc, dateutil.tz.tzutc()])
+    @pytest.mark.parametrize("freq", ["B", "C"])
+    def test_dti_business_repr_etc_smoke(self, tz, freq):
+        # only really care that it works
+        dti = pd.bdate_range(
+            datetime(2009, 1, 1), datetime(2010, 1, 1), tz=tz, freq=freq
+        )
+        repr(dti)
+        dti._summary()
+        dti[2:2]._summary()
+class TestFormat:
+    def test_format(self):
+        # GH#35439
+        idx = pd.date_range("20130101", periods=5)
+        expected = [f"{x:%Y-%m-%d}" for x in idx]
+        msg = r"DatetimeIndex\.format is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            assert idx.format() == expected
+    def test_format_with_name_time_info(self):
+        # bug I fixed 12/20/2011
+        dates = pd.date_range("2011-01-01 04:00:00", periods=10, name="something")
+        msg = "DatetimeIndex.format is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            formatted = dates.format(name=True)
+        assert formatted[0] == "something"
+    def test_format_datetime_with_time(self):
+        dti = DatetimeIndex([datetime(2012, 2, 7), datetime(2012, 2, 7, 23)])
+        msg = "DatetimeIndex.format is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = dti.format()
+        expected = ["2012-02-07 00:00:00", "2012-02-07 23:00:00"]
+        assert len(result) == 2
+        assert result == expected
+    def test_format_datetime(self):
+        msg = "DatetimeIndex.format is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            formatted = pd.to_datetime([datetime(2003, 1, 1, 12), NaT]).format()
+        assert formatted[0] == "2003-01-01 12:00:00"
+        assert formatted[1] == "NaT"
+    def test_format_date(self):
+        msg = "DatetimeIndex.format is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            formatted = pd.to_datetime([datetime(2003, 1, 1), NaT]).format()
+        assert formatted[0] == "2003-01-01"
+        assert formatted[1] == "NaT"
+    def test_format_date_tz(self):
+        dti = pd.to_datetime([datetime(2013, 1, 1)], utc=True)
+        msg = "DatetimeIndex.format is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            formatted = dti.format()
+        assert formatted[0] == "2013-01-01 00:00:00+00:00"
+        dti = pd.to_datetime([datetime(2013, 1, 1), NaT], utc=True)
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            formatted = dti.format()
+        assert formatted[0] == "2013-01-01 00:00:00+00:00"
+    def test_format_date_explicit_date_format(self):
+        dti = pd.to_datetime([datetime(2003, 2, 1), NaT])
+        msg = "DatetimeIndex.format is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            formatted = dti.format(date_format="%m-%d-%Y", na_rep="UT")
+        assert formatted[0] == "02-01-2003"
+        assert formatted[1] == "UT"

py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_freq_attr.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import pytest
+from pandas import (
+    DatetimeIndex,
+    date_range,
+)
+from pandas.tseries.offsets import (
+    BDay,
+    DateOffset,
+    Day,
+    Hour,
+)
+class TestFreq:
+    def test_freq_setter_errors(self):
+        # GH#20678
+        idx = DatetimeIndex(["20180101", "20180103", "20180105"])
+        # setting with an incompatible freq
+        msg = (
+            "Inferred frequency 2D from passed values does not conform to "
+            "passed frequency 5D"
+        )
+        with pytest.raises(ValueError, match=msg):
+            idx._data.freq = "5D"
+        # setting with non-freq string
+        with pytest.raises(ValueError, match="Invalid frequency"):
+            idx._data.freq = "foo"
+    @pytest.mark.parametrize("values", [["20180101", "20180103", "20180105"], []])
+    @pytest.mark.parametrize("freq", ["2D", Day(2), "2B", BDay(2), "48h", Hour(48)])
+    @pytest.mark.parametrize("tz", [None, "US/Eastern"])
+    def test_freq_setter(self, values, freq, tz):
+        # GH#20678
+        idx = DatetimeIndex(values, tz=tz)
+        # can set to an offset, converting from string if necessary
+        idx._data.freq = freq
+        assert idx.freq == freq
+        assert isinstance(idx.freq, DateOffset)
+        # can reset to None
+        idx._data.freq = None
+        assert idx.freq is None
+    def test_freq_view_safe(self):
+        # Setting the freq for one DatetimeIndex shouldn't alter the freq
+        #  for another that views the same data
+        dti = date_range("2016-01-01", periods=5)
+        dta = dti._data
+        dti2 = DatetimeIndex(dta)._with_freq(None)
+        assert dti2.freq is None
+        # Original was not altered
+        assert dti.freq == "D"
+        assert dta.freq == "D"

py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_indexing.py ADDED Viewed

	@@ -0,0 +1,717 @@

+from datetime import (
+    date,
+    datetime,
+    time,
+    timedelta,
+)
+import numpy as np
+import pytest
+from pandas._libs import index as libindex
+from pandas.compat.numpy import np_long
+import pandas as pd
+from pandas import (
+    DatetimeIndex,
+    Index,
+    Timestamp,
+    bdate_range,
+    date_range,
+    notna,
+)
+import pandas._testing as tm
+from pandas.tseries.frequencies import to_offset
+START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
+class TestGetItem:
+    def test_getitem_slice_keeps_name(self):
+        # GH4226
+        st = Timestamp("2013-07-01 00:00:00", tz="America/Los_Angeles")
+        et = Timestamp("2013-07-02 00:00:00", tz="America/Los_Angeles")
+        dr = date_range(st, et, freq="h", name="timebucket")
+        assert dr[1:].name == dr.name
+    @pytest.mark.parametrize("tz", [None, "Asia/Tokyo"])
+    def test_getitem(self, tz):
+        idx = date_range("2011-01-01", "2011-01-31", freq="D", tz=tz, name="idx")
+        result = idx[0]
+        assert result == Timestamp("2011-01-01", tz=idx.tz)
+        result = idx[0:5]
+        expected = date_range(
+            "2011-01-01", "2011-01-05", freq="D", tz=idx.tz, name="idx"
+        )
+        tm.assert_index_equal(result, expected)
+        assert result.freq == expected.freq
+        result = idx[0:10:2]
+        expected = date_range(
+            "2011-01-01", "2011-01-09", freq="2D", tz=idx.tz, name="idx"
+        )
+        tm.assert_index_equal(result, expected)
+        assert result.freq == expected.freq
+        result = idx[-20:-5:3]
+        expected = date_range(
+            "2011-01-12", "2011-01-24", freq="3D", tz=idx.tz, name="idx"
+        )
+        tm.assert_index_equal(result, expected)
+        assert result.freq == expected.freq
+        result = idx[4::-1]
+        expected = DatetimeIndex(
+            ["2011-01-05", "2011-01-04", "2011-01-03", "2011-01-02", "2011-01-01"],
+            dtype=idx.dtype,
+            freq="-1D",
+            name="idx",
+        )
+        tm.assert_index_equal(result, expected)
+        assert result.freq == expected.freq
+    @pytest.mark.parametrize("freq", ["B", "C"])
+    def test_dti_business_getitem(self, freq):
+        rng = bdate_range(START, END, freq=freq)
+        smaller = rng[:5]
+        exp = DatetimeIndex(rng.view(np.ndarray)[:5], freq=freq)
+        tm.assert_index_equal(smaller, exp)
+        assert smaller.freq == exp.freq
+        assert smaller.freq == rng.freq
+        sliced = rng[::5]
+        assert sliced.freq == to_offset(freq) * 5
+        fancy_indexed = rng[[4, 3, 2, 1, 0]]
+        assert len(fancy_indexed) == 5
+        assert isinstance(fancy_indexed, DatetimeIndex)
+        assert fancy_indexed.freq is None
+        # 32-bit vs. 64-bit platforms
+        assert rng[4] == rng[np_long(4)]
+    @pytest.mark.parametrize("freq", ["B", "C"])
+    def test_dti_business_getitem_matplotlib_hackaround(self, freq):
+        rng = bdate_range(START, END, freq=freq)
+        with pytest.raises(ValueError, match="Multi-dimensional indexing"):
+            # GH#30588 multi-dimensional indexing deprecated
+            rng[:, None]
+    def test_getitem_int_list(self):
+        dti = date_range(start="1/1/2005", end="12/1/2005", freq="ME")
+        dti2 = dti[[1, 3, 5]]
+        v1 = dti2[0]
+        v2 = dti2[1]
+        v3 = dti2[2]
+        assert v1 == Timestamp("2/28/2005")
+        assert v2 == Timestamp("4/30/2005")
+        assert v3 == Timestamp("6/30/2005")
+        # getitem with non-slice drops freq
+        assert dti2.freq is None
+class TestWhere:
+    def test_where_doesnt_retain_freq(self):
+        dti = date_range("20130101", periods=3, freq="D", name="idx")
+        cond = [True, True, False]
+        expected = DatetimeIndex([dti[0], dti[1], dti[0]], freq=None, name="idx")
+        result = dti.where(cond, dti[::-1])
+        tm.assert_index_equal(result, expected)
+    def test_where_other(self):
+        # other is ndarray or Index
+        i = date_range("20130101", periods=3, tz="US/Eastern")
+        for arr in [np.nan, pd.NaT]:
+            result = i.where(notna(i), other=arr)
+            expected = i
+            tm.assert_index_equal(result, expected)
+        i2 = i.copy()
+        i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist())
+        result = i.where(notna(i2), i2)
+        tm.assert_index_equal(result, i2)
+        i2 = i.copy()
+        i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist())
+        result = i.where(notna(i2), i2._values)
+        tm.assert_index_equal(result, i2)
+    def test_where_invalid_dtypes(self):
+        dti = date_range("20130101", periods=3, tz="US/Eastern")
+        tail = dti[2:].tolist()
+        i2 = Index([pd.NaT, pd.NaT] + tail)
+        mask = notna(i2)
+        # passing tz-naive ndarray to tzaware DTI
+        result = dti.where(mask, i2.values)
+        expected = Index([pd.NaT.asm8, pd.NaT.asm8] + tail, dtype=object)
+        tm.assert_index_equal(result, expected)
+        # passing tz-aware DTI to tznaive DTI
+        naive = dti.tz_localize(None)
+        result = naive.where(mask, i2)
+        expected = Index([i2[0], i2[1]] + naive[2:].tolist(), dtype=object)
+        tm.assert_index_equal(result, expected)
+        pi = i2.tz_localize(None).to_period("D")
+        result = dti.where(mask, pi)
+        expected = Index([pi[0], pi[1]] + tail, dtype=object)
+        tm.assert_index_equal(result, expected)
+        tda = i2.asi8.view("timedelta64[ns]")
+        result = dti.where(mask, tda)
+        expected = Index([tda[0], tda[1]] + tail, dtype=object)
+        assert isinstance(expected[0], np.timedelta64)
+        tm.assert_index_equal(result, expected)
+        result = dti.where(mask, i2.asi8)
+        expected = Index([pd.NaT._value, pd.NaT._value] + tail, dtype=object)
+        assert isinstance(expected[0], int)
+        tm.assert_index_equal(result, expected)
+        # non-matching scalar
+        td = pd.Timedelta(days=4)
+        result = dti.where(mask, td)
+        expected = Index([td, td] + tail, dtype=object)
+        assert expected[0] is td
+        tm.assert_index_equal(result, expected)
+    def test_where_mismatched_nat(self, tz_aware_fixture):
+        tz = tz_aware_fixture
+        dti = date_range("2013-01-01", periods=3, tz=tz)
+        cond = np.array([True, False, True])
+        tdnat = np.timedelta64("NaT", "ns")
+        expected = Index([dti[0], tdnat, dti[2]], dtype=object)
+        assert expected[1] is tdnat
+        result = dti.where(cond, tdnat)
+        tm.assert_index_equal(result, expected)
+    def test_where_tz(self):
+        i = date_range("20130101", periods=3, tz="US/Eastern")
+        result = i.where(notna(i))
+        expected = i
+        tm.assert_index_equal(result, expected)
+        i2 = i.copy()
+        i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist())
+        result = i.where(notna(i2))
+        expected = i2
+        tm.assert_index_equal(result, expected)
+class TestTake:
+    @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
+    def test_dti_take_dont_lose_meta(self, tzstr):
+        rng = date_range("1/1/2000", periods=20, tz=tzstr)
+        result = rng.take(range(5))
+        assert result.tz == rng.tz
+        assert result.freq == rng.freq
+    def test_take_nan_first_datetime(self):
+        index = DatetimeIndex([pd.NaT, Timestamp("20130101"), Timestamp("20130102")])
+        result = index.take([-1, 0, 1])
+        expected = DatetimeIndex([index[-1], index[0], index[1]])
+        tm.assert_index_equal(result, expected)
+    @pytest.mark.parametrize("tz", [None, "Asia/Tokyo"])
+    def test_take(self, tz):
+        # GH#10295
+        idx = date_range("2011-01-01", "2011-01-31", freq="D", name="idx", tz=tz)
+        result = idx.take([0])
+        assert result == Timestamp("2011-01-01", tz=idx.tz)
+        result = idx.take([0, 1, 2])
+        expected = date_range(
+            "2011-01-01", "2011-01-03", freq="D", tz=idx.tz, name="idx"
+        )
+        tm.assert_index_equal(result, expected)
+        assert result.freq == expected.freq
+        result = idx.take([0, 2, 4])
+        expected = date_range(
+            "2011-01-01", "2011-01-05", freq="2D", tz=idx.tz, name="idx"
+        )
+        tm.assert_index_equal(result, expected)
+        assert result.freq == expected.freq
+        result = idx.take([7, 4, 1])
+        expected = date_range(
+            "2011-01-08", "2011-01-02", freq="-3D", tz=idx.tz, name="idx"
+        )
+        tm.assert_index_equal(result, expected)
+        assert result.freq == expected.freq
+        result = idx.take([3, 2, 5])
+        expected = DatetimeIndex(
+            ["2011-01-04", "2011-01-03", "2011-01-06"],
+            dtype=idx.dtype,
+            freq=None,
+            name="idx",
+        )
+        tm.assert_index_equal(result, expected)
+        assert result.freq is None
+        result = idx.take([-3, 2, 5])
+        expected = DatetimeIndex(
+            ["2011-01-29", "2011-01-03", "2011-01-06"],
+            dtype=idx.dtype,
+            freq=None,
+            name="idx",
+        )
+        tm.assert_index_equal(result, expected)
+        assert result.freq is None
+    def test_take_invalid_kwargs(self):
+        idx = date_range("2011-01-01", "2011-01-31", freq="D", name="idx")
+        indices = [1, 6, 5, 9, 10, 13, 15, 3]
+        msg = r"take\(\) got an unexpected keyword argument 'foo'"
+        with pytest.raises(TypeError, match=msg):
+            idx.take(indices, foo=2)
+        msg = "the 'out' parameter is not supported"
+        with pytest.raises(ValueError, match=msg):
+            idx.take(indices, out=indices)
+        msg = "the 'mode' parameter is not supported"
+        with pytest.raises(ValueError, match=msg):
+            idx.take(indices, mode="clip")
+    # TODO: This method came from test_datetime; de-dup with version above
+    @pytest.mark.parametrize("tz", [None, "US/Eastern", "Asia/Tokyo"])
+    def test_take2(self, tz):
+        dates = [
+            datetime(2010, 1, 1, 14),
+            datetime(2010, 1, 1, 15),
+            datetime(2010, 1, 1, 17),
+            datetime(2010, 1, 1, 21),
+        ]
+        idx = date_range(
+            start="2010-01-01 09:00",
+            end="2010-02-01 09:00",
+            freq="h",
+            tz=tz,
+            name="idx",
+        )
+        expected = DatetimeIndex(dates, freq=None, name="idx", dtype=idx.dtype)
+        taken1 = idx.take([5, 6, 8, 12])
+        taken2 = idx[[5, 6, 8, 12]]
+        for taken in [taken1, taken2]:
+            tm.assert_index_equal(taken, expected)
+            assert isinstance(taken, DatetimeIndex)
+            assert taken.freq is None
+            assert taken.tz == expected.tz
+            assert taken.name == expected.name
+    def test_take_fill_value(self):
+        # GH#12631
+        idx = DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx")
+        result = idx.take(np.array([1, 0, -1]))
+        expected = DatetimeIndex(["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx")
+        tm.assert_index_equal(result, expected)
+        # fill_value
+        result = idx.take(np.array([1, 0, -1]), fill_value=True)
+        expected = DatetimeIndex(["2011-02-01", "2011-01-01", "NaT"], name="xxx")
+        tm.assert_index_equal(result, expected)
+        # allow_fill=False
+        result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
+        expected = DatetimeIndex(["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx")
+        tm.assert_index_equal(result, expected)
+        msg = (
+            "When allow_fill=True and fill_value is not None, "
+            "all indices must be >= -1"
+        )
+        with pytest.raises(ValueError, match=msg):
+            idx.take(np.array([1, 0, -2]), fill_value=True)
+        with pytest.raises(ValueError, match=msg):
+            idx.take(np.array([1, 0, -5]), fill_value=True)
+        msg = "out of bounds"
+        with pytest.raises(IndexError, match=msg):
+            idx.take(np.array([1, -5]))
+    def test_take_fill_value_with_timezone(self):
+        idx = DatetimeIndex(
+            ["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx", tz="US/Eastern"
+        )
+        result = idx.take(np.array([1, 0, -1]))
+        expected = DatetimeIndex(
+            ["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx", tz="US/Eastern"
+        )
+        tm.assert_index_equal(result, expected)
+        # fill_value
+        result = idx.take(np.array([1, 0, -1]), fill_value=True)
+        expected = DatetimeIndex(
+            ["2011-02-01", "2011-01-01", "NaT"], name="xxx", tz="US/Eastern"
+        )
+        tm.assert_index_equal(result, expected)
+        # allow_fill=False
+        result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
+        expected = DatetimeIndex(
+            ["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx", tz="US/Eastern"
+        )
+        tm.assert_index_equal(result, expected)
+        msg = (
+            "When allow_fill=True and fill_value is not None, "
+            "all indices must be >= -1"
+        )
+        with pytest.raises(ValueError, match=msg):
+            idx.take(np.array([1, 0, -2]), fill_value=True)
+        with pytest.raises(ValueError, match=msg):
+            idx.take(np.array([1, 0, -5]), fill_value=True)
+        msg = "out of bounds"
+        with pytest.raises(IndexError, match=msg):
+            idx.take(np.array([1, -5]))
+class TestGetLoc:
+    def test_get_loc_key_unit_mismatch(self):
+        idx = date_range("2000-01-01", periods=3)
+        key = idx[1].as_unit("ms")
+        loc = idx.get_loc(key)
+        assert loc == 1
+        assert key in idx
+    def test_get_loc_key_unit_mismatch_not_castable(self):
+        dta = date_range("2000-01-01", periods=3)._data.astype("M8[s]")
+        dti = DatetimeIndex(dta)
+        key = dta[0].as_unit("ns") + pd.Timedelta(1)
+        with pytest.raises(
+            KeyError, match=r"Timestamp\('2000-01-01 00:00:00.000000001'\)"
+        ):
+            dti.get_loc(key)
+        assert key not in dti
+    def test_get_loc_time_obj(self):
+        # time indexing
+        idx = date_range("2000-01-01", periods=24, freq="h")
+        result = idx.get_loc(time(12))
+        expected = np.array([12])
+        tm.assert_numpy_array_equal(result, expected, check_dtype=False)
+        result = idx.get_loc(time(12, 30))
+        expected = np.array([])
+        tm.assert_numpy_array_equal(result, expected, check_dtype=False)
+    @pytest.mark.parametrize("offset", [-10, 10])
+    def test_get_loc_time_obj2(self, monkeypatch, offset):
+        # GH#8667
+        size_cutoff = 50
+        n = size_cutoff + offset
+        key = time(15, 11, 30)
+        start = key.hour * 3600 + key.minute * 60 + key.second
+        step = 24 * 3600
+        with monkeypatch.context():
+            monkeypatch.setattr(libindex, "_SIZE_CUTOFF", size_cutoff)
+            idx = date_range("2014-11-26", periods=n, freq="s")
+            ts = pd.Series(np.random.default_rng(2).standard_normal(n), index=idx)
+            locs = np.arange(start, n, step, dtype=np.intp)
+            result = ts.index.get_loc(key)
+            tm.assert_numpy_array_equal(result, locs)
+            tm.assert_series_equal(ts[key], ts.iloc[locs])
+            left, right = ts.copy(), ts.copy()
+            left[key] *= -10
+            right.iloc[locs] *= -10
+            tm.assert_series_equal(left, right)
+    def test_get_loc_time_nat(self):
+        # GH#35114
+        # Case where key's total microseconds happens to match iNaT % 1e6 // 1000
+        tic = time(minute=12, second=43, microsecond=145224)
+        dti = DatetimeIndex([pd.NaT])
+        loc = dti.get_loc(tic)
+        expected = np.array([], dtype=np.intp)
+        tm.assert_numpy_array_equal(loc, expected)
+    def test_get_loc_nat(self):
+        # GH#20464
+        index = DatetimeIndex(["1/3/2000", "NaT"])
+        assert index.get_loc(pd.NaT) == 1
+        assert index.get_loc(None) == 1
+        assert index.get_loc(np.nan) == 1
+        assert index.get_loc(pd.NA) == 1
+        assert index.get_loc(np.datetime64("NaT")) == 1
+        with pytest.raises(KeyError, match="NaT"):
+            index.get_loc(np.timedelta64("NaT"))
+    @pytest.mark.parametrize("key", [pd.Timedelta(0), pd.Timedelta(1), timedelta(0)])
+    def test_get_loc_timedelta_invalid_key(self, key):
+        # GH#20464
+        dti = date_range("1970-01-01", periods=10)
+        msg = "Cannot index DatetimeIndex with [Tt]imedelta"
+        with pytest.raises(TypeError, match=msg):
+            dti.get_loc(key)
+    def test_get_loc_reasonable_key_error(self):
+        # GH#1062
+        index = DatetimeIndex(["1/3/2000"])
+        with pytest.raises(KeyError, match="2000"):
+            index.get_loc("1/1/2000")
+    def test_get_loc_year_str(self):
+        rng = date_range("1/1/2000", "1/1/2010")
+        result = rng.get_loc("2009")
+        expected = slice(3288, 3653)
+        assert result == expected
+class TestContains:
+    def test_dti_contains_with_duplicates(self):
+        d = datetime(2011, 12, 5, 20, 30)
+        ix = DatetimeIndex([d, d])
+        assert d in ix
+    @pytest.mark.parametrize(
+        "vals",
+        [
+            [0, 1, 0],
+            [0, 0, -1],
+            [0, -1, -1],
+            ["2015", "2015", "2016"],
+            ["2015", "2015", "2014"],
+        ],
+    )
+    def test_contains_nonunique(self, vals):
+        # GH#9512
+        idx = DatetimeIndex(vals)
+        assert idx[0] in idx
+class TestGetIndexer:
+    def test_get_indexer_date_objs(self):
+        rng = date_range("1/1/2000", periods=20)
+        result = rng.get_indexer(rng.map(lambda x: x.date()))
+        expected = rng.get_indexer(rng)
+        tm.assert_numpy_array_equal(result, expected)
+    def test_get_indexer(self):
+        idx = date_range("2000-01-01", periods=3)
+        exp = np.array([0, 1, 2], dtype=np.intp)
+        tm.assert_numpy_array_equal(idx.get_indexer(idx), exp)
+        target = idx[0] + pd.to_timedelta(["-1 hour", "12 hours", "1 day 1 hour"])
+        tm.assert_numpy_array_equal(
+            idx.get_indexer(target, "pad"), np.array([-1, 0, 1], dtype=np.intp)
+        )
+        tm.assert_numpy_array_equal(
+            idx.get_indexer(target, "backfill"), np.array([0, 1, 2], dtype=np.intp)
+        )
+        tm.assert_numpy_array_equal(
+            idx.get_indexer(target, "nearest"), np.array([0, 1, 1], dtype=np.intp)
+        )
+        tm.assert_numpy_array_equal(
+            idx.get_indexer(target, "nearest", tolerance=pd.Timedelta("1 hour")),
+            np.array([0, -1, 1], dtype=np.intp),
+        )
+        tol_raw = [
+            pd.Timedelta("1 hour"),
+            pd.Timedelta("1 hour"),
+            pd.Timedelta("1 hour").to_timedelta64(),
+        ]
+        tm.assert_numpy_array_equal(
+            idx.get_indexer(
+                target, "nearest", tolerance=[np.timedelta64(x) for x in tol_raw]
+            ),
+            np.array([0, -1, 1], dtype=np.intp),
+        )
+        tol_bad = [
+            pd.Timedelta("2 hour").to_timedelta64(),
+            pd.Timedelta("1 hour").to_timedelta64(),
+            "foo",
+        ]
+        msg = "Could not convert 'foo' to NumPy timedelta"
+        with pytest.raises(ValueError, match=msg):
+            idx.get_indexer(target, "nearest", tolerance=tol_bad)
+        with pytest.raises(ValueError, match="abbreviation w/o a number"):
+            idx.get_indexer(idx[[0]], method="nearest", tolerance="foo")
+    @pytest.mark.parametrize(
+        "target",
+        [
+            [date(2020, 1, 1), Timestamp("2020-01-02")],
+            [Timestamp("2020-01-01"), date(2020, 1, 2)],
+        ],
+    )
+    def test_get_indexer_mixed_dtypes(self, target):
+        # https://github.com/pandas-dev/pandas/issues/33741
+        values = DatetimeIndex([Timestamp("2020-01-01"), Timestamp("2020-01-02")])
+        result = values.get_indexer(target)
+        expected = np.array([0, 1], dtype=np.intp)
+        tm.assert_numpy_array_equal(result, expected)
+    @pytest.mark.parametrize(
+        "target, positions",
+        [
+            ([date(9999, 1, 1), Timestamp("2020-01-01")], [-1, 0]),
+            ([Timestamp("2020-01-01"), date(9999, 1, 1)], [0, -1]),
+            ([date(9999, 1, 1), date(9999, 1, 1)], [-1, -1]),
+        ],
+    )
+    def test_get_indexer_out_of_bounds_date(self, target, positions):
+        values = DatetimeIndex([Timestamp("2020-01-01"), Timestamp("2020-01-02")])
+        result = values.get_indexer(target)
+        expected = np.array(positions, dtype=np.intp)
+        tm.assert_numpy_array_equal(result, expected)
+    def test_get_indexer_pad_requires_monotonicity(self):
+        rng = date_range("1/1/2000", "3/1/2000", freq="B")
+        # neither monotonic increasing or decreasing
+        rng2 = rng[[1, 0, 2]]
+        msg = "index must be monotonic increasing or decreasing"
+        with pytest.raises(ValueError, match=msg):
+            rng2.get_indexer(rng, method="pad")
+class TestMaybeCastSliceBound:
+    def test_maybe_cast_slice_bounds_empty(self):
+        # GH#14354
+        empty_idx = date_range(freq="1h", periods=0, end="2015")
+        right = empty_idx._maybe_cast_slice_bound("2015-01-02", "right")
+        exp = Timestamp("2015-01-02 23:59:59.999999999")
+        assert right == exp
+        left = empty_idx._maybe_cast_slice_bound("2015-01-02", "left")
+        exp = Timestamp("2015-01-02 00:00:00")
+        assert left == exp
+    def test_maybe_cast_slice_duplicate_monotonic(self):
+        # https://github.com/pandas-dev/pandas/issues/16515
+        idx = DatetimeIndex(["2017", "2017"])
+        result = idx._maybe_cast_slice_bound("2017-01-01", "left")
+        expected = Timestamp("2017-01-01")
+        assert result == expected
+class TestGetSliceBounds:
+    @pytest.mark.parametrize("box", [date, datetime, Timestamp])
+    @pytest.mark.parametrize("side, expected", [("left", 4), ("right", 5)])
+    def test_get_slice_bounds_datetime_within(
+        self, box, side, expected, tz_aware_fixture
+    ):
+        # GH 35690
+        tz = tz_aware_fixture
+        index = bdate_range("2000-01-03", "2000-02-11").tz_localize(tz)
+        key = box(year=2000, month=1, day=7)
+        if tz is not None:
+            with pytest.raises(TypeError, match="Cannot compare tz-naive"):
+                # GH#36148 we require tzawareness-compat as of 2.0
+                index.get_slice_bound(key, side=side)
+        else:
+            result = index.get_slice_bound(key, side=side)
+            assert result == expected
+    @pytest.mark.parametrize("box", [datetime, Timestamp])
+    @pytest.mark.parametrize("side", ["left", "right"])
+    @pytest.mark.parametrize("year, expected", [(1999, 0), (2020, 30)])
+    def test_get_slice_bounds_datetime_outside(
+        self, box, side, year, expected, tz_aware_fixture
+    ):
+        # GH 35690
+        tz = tz_aware_fixture
+        index = bdate_range("2000-01-03", "2000-02-11").tz_localize(tz)
+        key = box(year=year, month=1, day=7)
+        if tz is not None:
+            with pytest.raises(TypeError, match="Cannot compare tz-naive"):
+                # GH#36148 we require tzawareness-compat as of 2.0
+                index.get_slice_bound(key, side=side)
+        else:
+            result = index.get_slice_bound(key, side=side)
+            assert result == expected
+    @pytest.mark.parametrize("box", [datetime, Timestamp])
+    def test_slice_datetime_locs(self, box, tz_aware_fixture):
+        # GH 34077
+        tz = tz_aware_fixture
+        index = DatetimeIndex(["2010-01-01", "2010-01-03"]).tz_localize(tz)
+        key = box(2010, 1, 1)
+        if tz is not None:
+            with pytest.raises(TypeError, match="Cannot compare tz-naive"):
+                # GH#36148 we require tzawareness-compat as of 2.0
+                index.slice_locs(key, box(2010, 1, 2))
+        else:
+            result = index.slice_locs(key, box(2010, 1, 2))
+            expected = (0, 1)
+            assert result == expected
+class TestIndexerBetweenTime:
+    def test_indexer_between_time(self):
+        # GH#11818
+        rng = date_range("1/1/2000", "1/5/2000", freq="5min")
+        msg = r"Cannot convert arg \[datetime\.datetime\(2010, 1, 2, 1, 0\)\] to a time"
+        with pytest.raises(ValueError, match=msg):
+            rng.indexer_between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5))
+    @pytest.mark.parametrize("unit", ["us", "ms", "s"])
+    def test_indexer_between_time_non_nano(self, unit):
+        # For simple cases like this, the non-nano indexer_between_time
+        #  should match the nano result
+        rng = date_range("1/1/2000", "1/5/2000", freq="5min")
+        arr_nano = rng._data._ndarray
+        arr = arr_nano.astype(f"M8[{unit}]")
+        dta = type(rng._data)._simple_new(arr, dtype=arr.dtype)
+        dti = DatetimeIndex(dta)
+        assert dti.dtype == arr.dtype
+        tic = time(1, 25)
+        toc = time(2, 29)
+        result = dti.indexer_between_time(tic, toc)
+        expected = rng.indexer_between_time(tic, toc)
+        tm.assert_numpy_array_equal(result, expected)
+        # case with non-zero micros in arguments
+        tic = time(1, 25, 0, 45678)
+        toc = time(2, 29, 0, 1234)
+        result = dti.indexer_between_time(tic, toc)
+        expected = rng.indexer_between_time(tic, toc)
+        tm.assert_numpy_array_equal(result, expected)

py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_iter.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import dateutil.tz
+import numpy as np
+import pytest
+from pandas import (
+    DatetimeIndex,
+    date_range,
+    to_datetime,
+)
+from pandas.core.arrays import datetimes
+class TestDatetimeIndexIteration:
+    @pytest.mark.parametrize(
+        "tz", [None, "UTC", "US/Central", dateutil.tz.tzoffset(None, -28800)]
+    )
+    def test_iteration_preserves_nanoseconds(self, tz):
+        # GH#19603
+        index = DatetimeIndex(
+            ["2018-02-08 15:00:00.168456358", "2018-02-08 15:00:00.168456359"], tz=tz
+        )
+        for i, ts in enumerate(index):
+            assert ts == index[i]  # pylint: disable=unnecessary-list-index-lookup
+    def test_iter_readonly(self):
+        # GH#28055 ints_to_pydatetime with readonly array
+        arr = np.array([np.datetime64("2012-02-15T12:00:00.000000000")])
+        arr.setflags(write=False)
+        dti = to_datetime(arr)
+        list(dti)
+    def test_iteration_preserves_tz(self):
+        # see GH#8890
+        index = date_range("2012-01-01", periods=3, freq="h", tz="US/Eastern")
+        for i, ts in enumerate(index):
+            result = ts
+            expected = index[i]  # pylint: disable=unnecessary-list-index-lookup
+            assert result == expected
+    def test_iteration_preserves_tz2(self):
+        index = date_range(
+            "2012-01-01", periods=3, freq="h", tz=dateutil.tz.tzoffset(None, -28800)
+        )
+        for i, ts in enumerate(index):
+            result = ts
+            expected = index[i]  # pylint: disable=unnecessary-list-index-lookup
+            assert result._repr_base == expected._repr_base
+            assert result == expected
+    def test_iteration_preserves_tz3(self):
+        # GH#9100
+        index = DatetimeIndex(
+            ["2014-12-01 03:32:39.987000-08:00", "2014-12-01 04:12:34.987000-08:00"]
+        )
+        for i, ts in enumerate(index):
+            result = ts
+            expected = index[i]  # pylint: disable=unnecessary-list-index-lookup
+            assert result._repr_base == expected._repr_base
+            assert result == expected
+    @pytest.mark.parametrize("offset", [-5, -1, 0, 1])
+    def test_iteration_over_chunksize(self, offset, monkeypatch):
+        # GH#21012
+        chunksize = 5
+        index = date_range(
+            "2000-01-01 00:00:00", periods=chunksize - offset, freq="min"
+        )
+        num = 0
+        with monkeypatch.context() as m:
+            m.setattr(datetimes, "_ITER_CHUNKSIZE", chunksize)
+            for stamp in index:
+                assert index[num] == stamp
+                num += 1
+        assert num == len(index)

py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_join.py ADDED Viewed

	@@ -0,0 +1,153 @@

+from datetime import (
+    datetime,
+    timezone,
+)
+import numpy as np
+import pytest
+from pandas import (
+    DataFrame,
+    DatetimeIndex,
+    Index,
+    Timestamp,
+    date_range,
+    period_range,
+    to_datetime,
+)
+import pandas._testing as tm
+from pandas.tseries.offsets import (
+    BDay,
+    BMonthEnd,
+)
+class TestJoin:
+    def test_does_not_convert_mixed_integer(self):
+        df = DataFrame(np.ones((3, 2)), columns=date_range("2020-01-01", periods=2))
+        cols = df.columns.join(df.index, how="outer")
+        joined = cols.join(df.columns)
+        assert cols.dtype == np.dtype("O")
+        assert cols.dtype == joined.dtype
+        tm.assert_numpy_array_equal(cols.values, joined.values)
+    def test_join_self(self, join_type):
+        index = date_range("1/1/2000", periods=10)
+        joined = index.join(index, how=join_type)
+        assert index is joined
+    def test_join_with_period_index(self, join_type):
+        df = DataFrame(
+            np.ones((10, 2)),
+            index=date_range("2020-01-01", periods=10),
+            columns=period_range("2020-01-01", periods=2),
+        )
+        s = df.iloc[:5, 0]
+        expected = df.columns.astype("O").join(s.index, how=join_type)
+        result = df.columns.join(s.index, how=join_type)
+        tm.assert_index_equal(expected, result)
+    def test_join_object_index(self):
+        rng = date_range("1/1/2000", periods=10)
+        idx = Index(["a", "b", "c", "d"])
+        result = rng.join(idx, how="outer")
+        assert isinstance(result[0], Timestamp)
+    def test_join_utc_convert(self, join_type):
+        rng = date_range("1/1/2011", periods=100, freq="h", tz="utc")
+        left = rng.tz_convert("US/Eastern")
+        right = rng.tz_convert("Europe/Berlin")
+        result = left.join(left[:-5], how=join_type)
+        assert isinstance(result, DatetimeIndex)
+        assert result.tz == left.tz
+        result = left.join(right[:-5], how=join_type)
+        assert isinstance(result, DatetimeIndex)
+        assert result.tz is timezone.utc
+    def test_datetimeindex_union_join_empty(self, sort, using_infer_string):
+        dti = date_range(start="1/1/2001", end="2/1/2001", freq="D")
+        empty = Index([])
+        result = dti.union(empty, sort=sort)
+        if using_infer_string:
+            assert isinstance(result, DatetimeIndex)
+            tm.assert_index_equal(result, dti)
+        else:
+            expected = dti.astype("O")
+            tm.assert_index_equal(result, expected)
+        result = dti.join(empty)
+        assert isinstance(result, DatetimeIndex)
+        tm.assert_index_equal(result, dti)
+    def test_join_nonunique(self):
+        idx1 = to_datetime(["2012-11-06 16:00:11.477563", "2012-11-06 16:00:11.477563"])
+        idx2 = to_datetime(["2012-11-06 15:11:09.006507", "2012-11-06 15:11:09.006507"])
+        rs = idx1.join(idx2, how="outer")
+        assert rs.is_monotonic_increasing
+    @pytest.mark.parametrize("freq", ["B", "C"])
+    def test_outer_join(self, freq):
+        # should just behave as union
+        start, end = datetime(2009, 1, 1), datetime(2010, 1, 1)
+        rng = date_range(start=start, end=end, freq=freq)
+        # overlapping
+        left = rng[:10]
+        right = rng[5:10]
+        the_join = left.join(right, how="outer")
+        assert isinstance(the_join, DatetimeIndex)
+        # non-overlapping, gap in middle
+        left = rng[:5]
+        right = rng[10:]
+        the_join = left.join(right, how="outer")
+        assert isinstance(the_join, DatetimeIndex)
+        assert the_join.freq is None
+        # non-overlapping, no gap
+        left = rng[:5]
+        right = rng[5:10]
+        the_join = left.join(right, how="outer")
+        assert isinstance(the_join, DatetimeIndex)
+        # overlapping, but different offset
+        other = date_range(start, end, freq=BMonthEnd())
+        the_join = rng.join(other, how="outer")
+        assert isinstance(the_join, DatetimeIndex)
+        assert the_join.freq is None
+    def test_naive_aware_conflicts(self):
+        start, end = datetime(2009, 1, 1), datetime(2010, 1, 1)
+        naive = date_range(start, end, freq=BDay(), tz=None)
+        aware = date_range(start, end, freq=BDay(), tz="Asia/Hong_Kong")
+        msg = "tz-naive.*tz-aware"
+        with pytest.raises(TypeError, match=msg):
+            naive.join(aware)
+        with pytest.raises(TypeError, match=msg):
+            aware.join(naive)
+    @pytest.mark.parametrize("tz", [None, "US/Pacific"])
+    def test_join_preserves_freq(self, tz):
+        # GH#32157
+        dti = date_range("2016-01-01", periods=10, tz=tz)
+        result = dti[:5].join(dti[5:], how="outer")
+        assert result.freq == dti.freq
+        tm.assert_index_equal(result, dti)
+        result = dti[:5].join(dti[6:], how="outer")
+        assert result.freq is None
+        expected = dti.delete(5)
+        tm.assert_index_equal(result, expected)

py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_npfuncs.py ADDED Viewed

	@@ -0,0 +1,13 @@

+import numpy as np
+from pandas import date_range
+import pandas._testing as tm
+class TestSplit:
+    def test_split_non_utc(self):
+        # GH#14042
+        indices = date_range("2016-01-01 00:00:00+0200", freq="s", periods=10)
+        result = np.split(indices, indices_or_sections=[])[0]
+        expected = indices._with_freq(None)
+        tm.assert_index_equal(result, expected)

py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_ops.py ADDED Viewed

	@@ -0,0 +1,56 @@

+from datetime import datetime
+import pytest
+from pandas import (
+    DatetimeIndex,
+    Index,
+    bdate_range,
+    date_range,
+)
+import pandas._testing as tm
+class TestDatetimeIndexOps:
+    def test_infer_freq(self, freq_sample):
+        # GH 11018
+        idx = date_range("2011-01-01 09:00:00", freq=freq_sample, periods=10)
+        result = DatetimeIndex(idx.asi8, freq="infer")
+        tm.assert_index_equal(idx, result)
+        assert result.freq == freq_sample
+@pytest.mark.parametrize("freq", ["B", "C"])
+class TestBusinessDatetimeIndex:
+    @pytest.fixture
+    def rng(self, freq):
+        START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
+        return bdate_range(START, END, freq=freq)
+    def test_comparison(self, rng):
+        d = rng[10]
+        comp = rng > d
+        assert comp[11]
+        assert not comp[9]
+    def test_copy(self, rng):
+        cp = rng.copy()
+        tm.assert_index_equal(cp, rng)
+    def test_identical(self, rng):
+        t1 = rng.copy()
+        t2 = rng.copy()
+        assert t1.identical(t2)
+        # name
+        t1 = t1.rename("foo")
+        assert t1.equals(t2)
+        assert not t1.identical(t2)
+        t2 = t2.rename("foo")
+        assert t1.identical(t2)
+        # freq
+        t2v = Index(t2.values)
+        assert t1.equals(t2v)
+        assert not t1.identical(t2v)

py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_partial_slicing.py ADDED Viewed

	@@ -0,0 +1,466 @@

+""" test partial slicing on Series/Frame """
+from datetime import datetime
+import numpy as np
+import pytest
+from pandas import (
+    DataFrame,
+    DatetimeIndex,
+    Index,
+    MultiIndex,
+    Series,
+    Timedelta,
+    Timestamp,
+    date_range,
+)
+import pandas._testing as tm
+class TestSlicing:
+    def test_string_index_series_name_converted(self):
+        # GH#1644
+        df = DataFrame(
+            np.random.default_rng(2).standard_normal((10, 4)),
+            index=date_range("1/1/2000", periods=10),
+        )
+        result = df.loc["1/3/2000"]
+        assert result.name == df.index[2]
+        result = df.T["1/3/2000"]
+        assert result.name == df.index[2]
+    def test_stringified_slice_with_tz(self):
+        # GH#2658
+        start = "2013-01-07"
+        idx = date_range(start=start, freq="1d", periods=10, tz="US/Eastern")
+        df = DataFrame(np.arange(10), index=idx)
+        df["2013-01-14 23:44:34.437768-05:00":]  # no exception here
+    def test_return_type_doesnt_depend_on_monotonicity(self):
+        # GH#24892 we get Series back regardless of whether our DTI is monotonic
+        dti = date_range(start="2015-5-13 23:59:00", freq="min", periods=3)
+        ser = Series(range(3), index=dti)
+        # non-monotonic index
+        ser2 = Series(range(3), index=[dti[1], dti[0], dti[2]])
+        # key with resolution strictly lower than "min"
+        key = "2015-5-14 00"
+        # monotonic increasing index
+        result = ser.loc[key]
+        expected = ser.iloc[1:]
+        tm.assert_series_equal(result, expected)
+        # monotonic decreasing index
+        result = ser.iloc[::-1].loc[key]
+        expected = ser.iloc[::-1][:-1]
+        tm.assert_series_equal(result, expected)
+        # non-monotonic index
+        result2 = ser2.loc[key]
+        expected2 = ser2.iloc[::2]
+        tm.assert_series_equal(result2, expected2)
+    def test_return_type_doesnt_depend_on_monotonicity_higher_reso(self):
+        # GH#24892 we get Series back regardless of whether our DTI is monotonic
+        dti = date_range(start="2015-5-13 23:59:00", freq="min", periods=3)
+        ser = Series(range(3), index=dti)
+        # non-monotonic index
+        ser2 = Series(range(3), index=[dti[1], dti[0], dti[2]])
+        # key with resolution strictly *higher) than "min"
+        key = "2015-5-14 00:00:00"
+        # monotonic increasing index
+        result = ser.loc[key]
+        assert result == 1
+        # monotonic decreasing index
+        result = ser.iloc[::-1].loc[key]
+        assert result == 1
+        # non-monotonic index
+        result2 = ser2.loc[key]
+        assert result2 == 0
+    def test_monotone_DTI_indexing_bug(self):
+        # GH 19362
+        # Testing accessing the first element in a monotonic descending
+        # partial string indexing.
+        df = DataFrame(list(range(5)))
+        date_list = [
+            "2018-01-02",
+            "2017-02-10",
+            "2016-03-10",
+            "2015-03-15",
+            "2014-03-16",
+        ]
+        date_index = DatetimeIndex(date_list)
+        df["date"] = date_index
+        expected = DataFrame({0: list(range(5)), "date": date_index})
+        tm.assert_frame_equal(df, expected)
+        # We get a slice because df.index's resolution is hourly and we
+        #  are slicing with a daily-resolution string.  If both were daily,
+        #  we would get a single item back
+        dti = date_range("20170101 01:00:00", periods=3)
+        df = DataFrame({"A": [1, 2, 3]}, index=dti[::-1])
+        expected = DataFrame({"A": 1}, index=dti[-1:][::-1])
+        result = df.loc["2017-01-03"]
+        tm.assert_frame_equal(result, expected)
+        result2 = df.iloc[::-1].loc["2017-01-03"]
+        expected2 = expected.iloc[::-1]
+        tm.assert_frame_equal(result2, expected2)
+    def test_slice_year(self):
+        dti = date_range(freq="B", start=datetime(2005, 1, 1), periods=500)
+        s = Series(np.arange(len(dti)), index=dti)
+        result = s["2005"]
+        expected = s[s.index.year == 2005]
+        tm.assert_series_equal(result, expected)
+        df = DataFrame(np.random.default_rng(2).random((len(dti), 5)), index=dti)
+        result = df.loc["2005"]
+        expected = df[df.index.year == 2005]
+        tm.assert_frame_equal(result, expected)
+    @pytest.mark.parametrize(
+        "partial_dtime",
+        [
+            "2019",
+            "2019Q4",
+            "Dec 2019",
+            "2019-12-31",
+            "2019-12-31 23",
+            "2019-12-31 23:59",
+        ],
+    )
+    def test_slice_end_of_period_resolution(self, partial_dtime):
+        # GH#31064
+        dti = date_range("2019-12-31 23:59:55.999999999", periods=10, freq="s")
+        ser = Series(range(10), index=dti)
+        result = ser[partial_dtime]
+        expected = ser.iloc[:5]
+        tm.assert_series_equal(result, expected)
+    def test_slice_quarter(self):
+        dti = date_range(freq="D", start=datetime(2000, 6, 1), periods=500)
+        s = Series(np.arange(len(dti)), index=dti)
+        assert len(s["2001Q1"]) == 90
+        df = DataFrame(np.random.default_rng(2).random((len(dti), 5)), index=dti)
+        assert len(df.loc["1Q01"]) == 90
+    def test_slice_month(self):
+        dti = date_range(freq="D", start=datetime(2005, 1, 1), periods=500)
+        s = Series(np.arange(len(dti)), index=dti)
+        assert len(s["2005-11"]) == 30
+        df = DataFrame(np.random.default_rng(2).random((len(dti), 5)), index=dti)
+        assert len(df.loc["2005-11"]) == 30
+        tm.assert_series_equal(s["2005-11"], s["11-2005"])
+    def test_partial_slice(self):
+        rng = date_range(freq="D", start=datetime(2005, 1, 1), periods=500)
+        s = Series(np.arange(len(rng)), index=rng)
+        result = s["2005-05":"2006-02"]
+        expected = s["20050501":"20060228"]
+        tm.assert_series_equal(result, expected)
+        result = s["2005-05":]
+        expected = s["20050501":]
+        tm.assert_series_equal(result, expected)
+        result = s[:"2006-02"]
+        expected = s[:"20060228"]
+        tm.assert_series_equal(result, expected)
+        result = s["2005-1-1"]
+        assert result == s.iloc[0]
+        with pytest.raises(KeyError, match=r"^'2004-12-31'$"):
+            s["2004-12-31"]
+    def test_partial_slice_daily(self):
+        rng = date_range(freq="h", start=datetime(2005, 1, 31), periods=500)
+        s = Series(np.arange(len(rng)), index=rng)
+        result = s["2005-1-31"]
+        tm.assert_series_equal(result, s.iloc[:24])
+        with pytest.raises(KeyError, match=r"^'2004-12-31 00'$"):
+            s["2004-12-31 00"]
+    def test_partial_slice_hourly(self):
+        rng = date_range(freq="min", start=datetime(2005, 1, 1, 20, 0, 0), periods=500)
+        s = Series(np.arange(len(rng)), index=rng)
+        result = s["2005-1-1"]
+        tm.assert_series_equal(result, s.iloc[: 60 * 4])
+        result = s["2005-1-1 20"]
+        tm.assert_series_equal(result, s.iloc[:60])
+        assert s["2005-1-1 20:00"] == s.iloc[0]
+        with pytest.raises(KeyError, match=r"^'2004-12-31 00:15'$"):
+            s["2004-12-31 00:15"]
+    def test_partial_slice_minutely(self):
+        rng = date_range(freq="s", start=datetime(2005, 1, 1, 23, 59, 0), periods=500)
+        s = Series(np.arange(len(rng)), index=rng)
+        result = s["2005-1-1 23:59"]
+        tm.assert_series_equal(result, s.iloc[:60])
+        result = s["2005-1-1"]
+        tm.assert_series_equal(result, s.iloc[:60])
+        assert s[Timestamp("2005-1-1 23:59:00")] == s.iloc[0]
+        with pytest.raises(KeyError, match=r"^'2004-12-31 00:00:00'$"):
+            s["2004-12-31 00:00:00"]
+    def test_partial_slice_second_precision(self):
+        rng = date_range(
+            start=datetime(2005, 1, 1, 0, 0, 59, microsecond=999990),
+            periods=20,
+            freq="us",
+        )
+        s = Series(np.arange(20), rng)
+        tm.assert_series_equal(s["2005-1-1 00:00"], s.iloc[:10])
+        tm.assert_series_equal(s["2005-1-1 00:00:59"], s.iloc[:10])
+        tm.assert_series_equal(s["2005-1-1 00:01"], s.iloc[10:])
+        tm.assert_series_equal(s["2005-1-1 00:01:00"], s.iloc[10:])
+        assert s[Timestamp("2005-1-1 00:00:59.999990")] == s.iloc[0]
+        with pytest.raises(KeyError, match="2005-1-1 00:00:00"):
+            s["2005-1-1 00:00:00"]
+    def test_partial_slicing_dataframe(self):
+        # GH14856
+        # Test various combinations of string slicing resolution vs.
+        # index resolution
+        # - If string resolution is less precise than index resolution,
+        # string is considered a slice
+        # - If string resolution is equal to or more precise than index
+        # resolution, string is considered an exact match
+        formats = [
+            "%Y",
+            "%Y-%m",
+            "%Y-%m-%d",
+            "%Y-%m-%d %H",
+            "%Y-%m-%d %H:%M",
+            "%Y-%m-%d %H:%M:%S",
+        ]
+        resolutions = ["year", "month", "day", "hour", "minute", "second"]
+        for rnum, resolution in enumerate(resolutions[2:], 2):
+            # we check only 'day', 'hour', 'minute' and 'second'
+            unit = Timedelta("1 " + resolution)
+            middate = datetime(2012, 1, 1, 0, 0, 0)
+            index = DatetimeIndex([middate - unit, middate, middate + unit])
+            values = [1, 2, 3]
+            df = DataFrame({"a": values}, index, dtype=np.int64)
+            assert df.index.resolution == resolution
+            # Timestamp with the same resolution as index
+            # Should be exact match for Series (return scalar)
+            # and raise KeyError for Frame
+            for timestamp, expected in zip(index, values):
+                ts_string = timestamp.strftime(formats[rnum])
+                # make ts_string as precise as index
+                result = df["a"][ts_string]
+                assert isinstance(result, np.int64)
+                assert result == expected
+                msg = rf"^'{ts_string}'$"
+                with pytest.raises(KeyError, match=msg):
+                    df[ts_string]
+            # Timestamp with resolution less precise than index
+            for fmt in formats[:rnum]:
+                for element, theslice in [[0, slice(None, 1)], [1, slice(1, None)]]:
+                    ts_string = index[element].strftime(fmt)
+                    # Series should return slice
+                    result = df["a"][ts_string]
+                    expected = df["a"][theslice]
+                    tm.assert_series_equal(result, expected)
+                    # pre-2.0 df[ts_string] was overloaded to interpret this
+                    #  as slicing along index
+                    with pytest.raises(KeyError, match=ts_string):
+                        df[ts_string]
+            # Timestamp with resolution more precise than index
+            # Compatible with existing key
+            # Should return scalar for Series
+            # and raise KeyError for Frame
+            for fmt in formats[rnum + 1 :]:
+                ts_string = index[1].strftime(fmt)
+                result = df["a"][ts_string]
+                assert isinstance(result, np.int64)
+                assert result == 2
+                msg = rf"^'{ts_string}'$"
+                with pytest.raises(KeyError, match=msg):
+                    df[ts_string]
+            # Not compatible with existing key
+            # Should raise KeyError
+            for fmt, res in list(zip(formats, resolutions))[rnum + 1 :]:
+                ts = index[1] + Timedelta("1 " + res)
+                ts_string = ts.strftime(fmt)
+                msg = rf"^'{ts_string}'$"
+                with pytest.raises(KeyError, match=msg):
+                    df["a"][ts_string]
+                with pytest.raises(KeyError, match=msg):
+                    df[ts_string]
+    def test_partial_slicing_with_multiindex(self):
+        # GH 4758
+        # partial string indexing with a multi-index buggy
+        df = DataFrame(
+            {
+                "ACCOUNT": ["ACCT1", "ACCT1", "ACCT1", "ACCT2"],
+                "TICKER": ["ABC", "MNP", "XYZ", "XYZ"],
+                "val": [1, 2, 3, 4],
+            },
+            index=date_range("2013-06-19 09:30:00", periods=4, freq="5min"),
+        )
+        df_multi = df.set_index(["ACCOUNT", "TICKER"], append=True)
+        expected = DataFrame(
+            [[1]], index=Index(["ABC"], name="TICKER"), columns=["val"]
+        )
+        result = df_multi.loc[("2013-06-19 09:30:00", "ACCT1")]
+        tm.assert_frame_equal(result, expected)
+        expected = df_multi.loc[
+            (Timestamp("2013-06-19 09:30:00", tz=None), "ACCT1", "ABC")
+        ]
+        result = df_multi.loc[("2013-06-19 09:30:00", "ACCT1", "ABC")]
+        tm.assert_series_equal(result, expected)
+        # partial string indexing on first level, scalar indexing on the other two
+        result = df_multi.loc[("2013-06-19", "ACCT1", "ABC")]
+        expected = df_multi.iloc[:1].droplevel([1, 2])
+        tm.assert_frame_equal(result, expected)
+    def test_partial_slicing_with_multiindex_series(self):
+        # GH 4294
+        # partial slice on a series mi
+        ser = Series(
+            range(250),
+            index=MultiIndex.from_product(
+                [date_range("2000-1-1", periods=50), range(5)]
+            ),
+        )
+        s2 = ser[:-1].copy()
+        expected = s2["2000-1-4"]
+        result = s2[Timestamp("2000-1-4")]
+        tm.assert_series_equal(result, expected)
+        result = ser[Timestamp("2000-1-4")]
+        expected = ser["2000-1-4"]
+        tm.assert_series_equal(result, expected)
+        df2 = DataFrame(ser)
+        expected = df2.xs("2000-1-4")
+        result = df2.loc[Timestamp("2000-1-4")]
+        tm.assert_frame_equal(result, expected)
+    def test_partial_slice_requires_monotonicity(self):
+        # Disallowed since 2.0 (GH 37819)
+        ser = Series(np.arange(10), date_range("2014-01-01", periods=10))
+        nonmonotonic = ser.iloc[[3, 5, 4]]
+        timestamp = Timestamp("2014-01-10")
+        with pytest.raises(
+            KeyError, match="Value based partial slicing on non-monotonic"
+        ):
+            nonmonotonic["2014-01-10":]
+        with pytest.raises(KeyError, match=r"Timestamp\('2014-01-10 00:00:00'\)"):
+            nonmonotonic[timestamp:]
+        with pytest.raises(
+            KeyError, match="Value based partial slicing on non-monotonic"
+        ):
+            nonmonotonic.loc["2014-01-10":]
+        with pytest.raises(KeyError, match=r"Timestamp\('2014-01-10 00:00:00'\)"):
+            nonmonotonic.loc[timestamp:]
+    def test_loc_datetime_length_one(self):
+        # GH16071
+        df = DataFrame(
+            columns=["1"],
+            index=date_range("2016-10-01T00:00:00", "2016-10-01T23:59:59"),
+        )
+        result = df.loc[datetime(2016, 10, 1) :]
+        tm.assert_frame_equal(result, df)
+        result = df.loc["2016-10-01T00:00:00":]
+        tm.assert_frame_equal(result, df)
+    @pytest.mark.parametrize(
+        "start",
+        [
+            "2018-12-02 21:50:00+00:00",
+            Timestamp("2018-12-02 21:50:00+00:00"),
+            Timestamp("2018-12-02 21:50:00+00:00").to_pydatetime(),
+        ],
+    )
+    @pytest.mark.parametrize(
+        "end",
+        [
+            "2018-12-02 21:52:00+00:00",
+            Timestamp("2018-12-02 21:52:00+00:00"),
+            Timestamp("2018-12-02 21:52:00+00:00").to_pydatetime(),
+        ],
+    )
+    def test_getitem_with_datestring_with_UTC_offset(self, start, end):
+        # GH 24076
+        idx = date_range(
+            start="2018-12-02 14:50:00-07:00",
+            end="2018-12-02 14:50:00-07:00",
+            freq="1min",
+        )
+        df = DataFrame(1, index=idx, columns=["A"])
+        result = df[start:end]
+        expected = df.iloc[0:3, :]
+        tm.assert_frame_equal(result, expected)
+        # GH 16785
+        start = str(start)
+        end = str(end)
+        with pytest.raises(ValueError, match="Both dates must"):
+            df[start : end[:-4] + "1:00"]
+        with pytest.raises(ValueError, match="The index must be timezone"):
+            df = df.tz_localize(None)
+            df[start:end]
+    def test_slice_reduce_to_series(self):
+        # GH 27516
+        df = DataFrame(
+            {"A": range(24)}, index=date_range("2000", periods=24, freq="ME")
+        )
+        expected = Series(
+            range(12), index=date_range("2000", periods=12, freq="ME"), name="A"
+        )
+        result = df.loc["2000", "A"]
+        tm.assert_series_equal(result, expected)

py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_pickle.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import pytest
+from pandas import (
+    NaT,
+    date_range,
+    to_datetime,
+)
+import pandas._testing as tm
+class TestPickle:
+    def test_pickle(self):
+        # GH#4606
+        idx = to_datetime(["2013-01-01", NaT, "2014-01-06"])
+        idx_p = tm.round_trip_pickle(idx)
+        assert idx_p[0] == idx[0]
+        assert idx_p[1] is NaT
+        assert idx_p[2] == idx[2]
+    def test_pickle_dont_infer_freq(self):
+        # GH#11002
+        # don't infer freq
+        idx = date_range("1750-1-1", "2050-1-1", freq="7D")
+        idx_p = tm.round_trip_pickle(idx)
+        tm.assert_index_equal(idx, idx_p)
+    def test_pickle_after_set_freq(self):
+        dti = date_range("20130101", periods=3, tz="US/Eastern", name="foo")
+        dti = dti._with_freq(None)
+        res = tm.round_trip_pickle(dti)
+        tm.assert_index_equal(res, dti)
+    def test_roundtrip_pickle_with_tz(self):
+        # GH#8367
+        # round-trip of timezone
+        index = date_range("20130101", periods=3, tz="US/Eastern", name="foo")
+        unpickled = tm.round_trip_pickle(index)
+        tm.assert_index_equal(index, unpickled)
+    @pytest.mark.parametrize("freq", ["B", "C"])
+    def test_pickle_unpickle(self, freq):
+        rng = date_range("2009-01-01", "2010-01-01", freq=freq)
+        unpickled = tm.round_trip_pickle(rng)
+        assert unpickled.freq == freq

py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_reindex.py ADDED Viewed

	@@ -0,0 +1,56 @@

+from datetime import timedelta
+import numpy as np
+from pandas import (
+    DatetimeIndex,
+    date_range,
+)
+import pandas._testing as tm
+class TestDatetimeIndexReindex:
+    def test_reindex_preserves_tz_if_target_is_empty_list_or_array(self):
+        # GH#7774
+        index = date_range("2013-01-01", periods=3, tz="US/Eastern")
+        assert str(index.reindex([])[0].tz) == "US/Eastern"
+        assert str(index.reindex(np.array([]))[0].tz) == "US/Eastern"
+    def test_reindex_with_same_tz_nearest(self):
+        # GH#32740
+        rng_a = date_range("2010-01-01", "2010-01-02", periods=24, tz="utc")
+        rng_b = date_range("2010-01-01", "2010-01-02", periods=23, tz="utc")
+        result1, result2 = rng_a.reindex(
+            rng_b, method="nearest", tolerance=timedelta(seconds=20)
+        )
+        expected_list1 = [
+            "2010-01-01 00:00:00",
+            "2010-01-01 01:05:27.272727272",
+            "2010-01-01 02:10:54.545454545",
+            "2010-01-01 03:16:21.818181818",
+            "2010-01-01 04:21:49.090909090",
+            "2010-01-01 05:27:16.363636363",
+            "2010-01-01 06:32:43.636363636",
+            "2010-01-01 07:38:10.909090909",
+            "2010-01-01 08:43:38.181818181",
+            "2010-01-01 09:49:05.454545454",
+            "2010-01-01 10:54:32.727272727",
+            "2010-01-01 12:00:00",
+            "2010-01-01 13:05:27.272727272",
+            "2010-01-01 14:10:54.545454545",
+            "2010-01-01 15:16:21.818181818",
+            "2010-01-01 16:21:49.090909090",
+            "2010-01-01 17:27:16.363636363",
+            "2010-01-01 18:32:43.636363636",
+            "2010-01-01 19:38:10.909090909",
+            "2010-01-01 20:43:38.181818181",
+            "2010-01-01 21:49:05.454545454",
+            "2010-01-01 22:54:32.727272727",
+            "2010-01-02 00:00:00",
+        ]
+        expected1 = DatetimeIndex(
+            expected_list1, dtype="datetime64[ns, UTC]", freq=None
+        )
+        expected2 = np.array([0] + [-1] * 21 + [23], dtype=np.dtype("intp"))
+        tm.assert_index_equal(result1, expected1)
+        tm.assert_numpy_array_equal(result2, expected2)

py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_scalar_compat.py ADDED Viewed

	@@ -0,0 +1,329 @@

+"""
+Tests for DatetimeIndex methods behaving like their Timestamp counterparts
+"""
+import calendar
+from datetime import (
+    date,
+    datetime,
+    time,
+)
+import locale
+import unicodedata
+import numpy as np
+import pytest
+from pandas._libs.tslibs import timezones
+from pandas import (
+    DatetimeIndex,
+    Index,
+    NaT,
+    Timestamp,
+    date_range,
+    offsets,
+)
+import pandas._testing as tm
+from pandas.core.arrays import DatetimeArray
+class TestDatetimeIndexOps:
+    def test_dti_no_millisecond_field(self):
+        msg = "type object 'DatetimeIndex' has no attribute 'millisecond'"
+        with pytest.raises(AttributeError, match=msg):
+            DatetimeIndex.millisecond
+        msg = "'DatetimeIndex' object has no attribute 'millisecond'"
+        with pytest.raises(AttributeError, match=msg):
+            DatetimeIndex([]).millisecond
+    def test_dti_time(self):
+        rng = date_range("1/1/2000", freq="12min", periods=10)
+        result = Index(rng).time
+        expected = [t.time() for t in rng]
+        assert (result == expected).all()
+    def test_dti_date(self):
+        rng = date_range("1/1/2000", freq="12h", periods=10)
+        result = Index(rng).date
+        expected = [t.date() for t in rng]
+        assert (result == expected).all()
+    @pytest.mark.parametrize(
+        "dtype",
+        [None, "datetime64[ns, CET]", "datetime64[ns, EST]", "datetime64[ns, UTC]"],
+    )
+    def test_dti_date2(self, dtype):
+        # Regression test for GH#21230
+        expected = np.array([date(2018, 6, 4), NaT])
+        index = DatetimeIndex(["2018-06-04 10:00:00", NaT], dtype=dtype)
+        result = index.date
+        tm.assert_numpy_array_equal(result, expected)
+    @pytest.mark.parametrize(
+        "dtype",
+        [None, "datetime64[ns, CET]", "datetime64[ns, EST]", "datetime64[ns, UTC]"],
+    )
+    def test_dti_time2(self, dtype):
+        # Regression test for GH#21267
+        expected = np.array([time(10, 20, 30), NaT])
+        index = DatetimeIndex(["2018-06-04 10:20:30", NaT], dtype=dtype)
+        result = index.time
+        tm.assert_numpy_array_equal(result, expected)
+    def test_dti_timetz(self, tz_naive_fixture):
+        # GH#21358
+        tz = timezones.maybe_get_tz(tz_naive_fixture)
+        expected = np.array([time(10, 20, 30, tzinfo=tz), NaT])
+        index = DatetimeIndex(["2018-06-04 10:20:30", NaT], tz=tz)
+        result = index.timetz
+        tm.assert_numpy_array_equal(result, expected)
+    @pytest.mark.parametrize(
+        "field",
+        [
+            "dayofweek",
+            "day_of_week",
+            "dayofyear",
+            "day_of_year",
+            "quarter",
+            "days_in_month",
+            "is_month_start",
+            "is_month_end",
+            "is_quarter_start",
+            "is_quarter_end",
+            "is_year_start",
+            "is_year_end",
+        ],
+    )
+    def test_dti_timestamp_fields(self, field):
+        # extra fields from DatetimeIndex like quarter and week
+        idx = date_range("2020-01-01", periods=10)
+        expected = getattr(idx, field)[-1]
+        result = getattr(Timestamp(idx[-1]), field)
+        assert result == expected
+    def test_dti_nanosecond(self):
+        dti = DatetimeIndex(np.arange(10))
+        expected = Index(np.arange(10, dtype=np.int32))
+        tm.assert_index_equal(dti.nanosecond, expected)
+    @pytest.mark.parametrize("prefix", ["", "dateutil/"])
+    def test_dti_hour_tzaware(self, prefix):
+        strdates = ["1/1/2012", "3/1/2012", "4/1/2012"]
+        rng = DatetimeIndex(strdates, tz=prefix + "US/Eastern")
+        assert (rng.hour == 0).all()
+        # a more unusual time zone, GH#1946
+        dr = date_range(
+            "2011-10-02 00:00", freq="h", periods=10, tz=prefix + "America/Atikokan"
+        )
+        expected = Index(np.arange(10, dtype=np.int32))
+        tm.assert_index_equal(dr.hour, expected)
+    # GH#12806
+    # error: Unsupported operand types for + ("List[None]" and "List[str]")
+    @pytest.mark.parametrize(
+        "time_locale", [None] + tm.get_locales()  # type: ignore[operator]
+    )
+    def test_day_name_month_name(self, time_locale):
+        # Test Monday -> Sunday and January -> December, in that sequence
+        if time_locale is None:
+            # If the time_locale is None, day-name and month_name should
+            # return the english attributes
+            expected_days = [
+                "Monday",
+                "Tuesday",
+                "Wednesday",
+                "Thursday",
+                "Friday",
+                "Saturday",
+                "Sunday",
+            ]
+            expected_months = [
+                "January",
+                "February",
+                "March",
+                "April",
+                "May",
+                "June",
+                "July",
+                "August",
+                "September",
+                "October",
+                "November",
+                "December",
+            ]
+        else:
+            with tm.set_locale(time_locale, locale.LC_TIME):
+                expected_days = calendar.day_name[:]
+                expected_months = calendar.month_name[1:]
+        # GH#11128
+        dti = date_range(freq="D", start=datetime(1998, 1, 1), periods=365)
+        english_days = [
+            "Monday",
+            "Tuesday",
+            "Wednesday",
+            "Thursday",
+            "Friday",
+            "Saturday",
+            "Sunday",
+        ]
+        for day, name, eng_name in zip(range(4, 11), expected_days, english_days):
+            name = name.capitalize()
+            assert dti.day_name(locale=time_locale)[day] == name
+            assert dti.day_name(locale=None)[day] == eng_name
+            ts = Timestamp(datetime(2016, 4, day))
+            assert ts.day_name(locale=time_locale) == name
+        dti = dti.append(DatetimeIndex([NaT]))
+        assert np.isnan(dti.day_name(locale=time_locale)[-1])
+        ts = Timestamp(NaT)
+        assert np.isnan(ts.day_name(locale=time_locale))
+        # GH#12805
+        dti = date_range(freq="ME", start="2012", end="2013")
+        result = dti.month_name(locale=time_locale)
+        expected = Index([month.capitalize() for month in expected_months])
+        # work around different normalization schemes GH#22342
+        result = result.str.normalize("NFD")
+        expected = expected.str.normalize("NFD")
+        tm.assert_index_equal(result, expected)
+        for item, expected in zip(dti, expected_months):
+            result = item.month_name(locale=time_locale)
+            expected = expected.capitalize()
+            result = unicodedata.normalize("NFD", result)
+            expected = unicodedata.normalize("NFD", result)
+            assert result == expected
+        dti = dti.append(DatetimeIndex([NaT]))
+        assert np.isnan(dti.month_name(locale=time_locale)[-1])
+    def test_dti_week(self):
+        # GH#6538: Check that DatetimeIndex and its TimeStamp elements
+        # return the same weekofyear accessor close to new year w/ tz
+        dates = ["2013/12/29", "2013/12/30", "2013/12/31"]
+        dates = DatetimeIndex(dates, tz="Europe/Brussels")
+        expected = [52, 1, 1]
+        assert dates.isocalendar().week.tolist() == expected
+        assert [d.weekofyear for d in dates] == expected
+    @pytest.mark.parametrize("tz", [None, "US/Eastern"])
+    def test_dti_fields(self, tz):
+        # GH#13303
+        dti = date_range(freq="D", start=datetime(1998, 1, 1), periods=365, tz=tz)
+        assert dti.year[0] == 1998
+        assert dti.month[0] == 1
+        assert dti.day[0] == 1
+        assert dti.hour[0] == 0
+        assert dti.minute[0] == 0
+        assert dti.second[0] == 0
+        assert dti.microsecond[0] == 0
+        assert dti.dayofweek[0] == 3
+        assert dti.dayofyear[0] == 1
+        assert dti.dayofyear[120] == 121
+        assert dti.isocalendar().week.iloc[0] == 1
+        assert dti.isocalendar().week.iloc[120] == 18
+        assert dti.quarter[0] == 1
+        assert dti.quarter[120] == 2
+        assert dti.days_in_month[0] == 31
+        assert dti.days_in_month[90] == 30
+        assert dti.is_month_start[0]
+        assert not dti.is_month_start[1]
+        assert dti.is_month_start[31]
+        assert dti.is_quarter_start[0]
+        assert dti.is_quarter_start[90]
+        assert dti.is_year_start[0]
+        assert not dti.is_year_start[364]
+        assert not dti.is_month_end[0]
+        assert dti.is_month_end[30]
+        assert not dti.is_month_end[31]
+        assert dti.is_month_end[364]
+        assert not dti.is_quarter_end[0]
+        assert not dti.is_quarter_end[30]
+        assert dti.is_quarter_end[89]
+        assert dti.is_quarter_end[364]
+        assert not dti.is_year_end[0]
+        assert dti.is_year_end[364]
+        assert len(dti.year) == 365
+        assert len(dti.month) == 365
+        assert len(dti.day) == 365
+        assert len(dti.hour) == 365
+        assert len(dti.minute) == 365
+        assert len(dti.second) == 365
+        assert len(dti.microsecond) == 365
+        assert len(dti.dayofweek) == 365
+        assert len(dti.dayofyear) == 365
+        assert len(dti.isocalendar()) == 365
+        assert len(dti.quarter) == 365
+        assert len(dti.is_month_start) == 365
+        assert len(dti.is_month_end) == 365
+        assert len(dti.is_quarter_start) == 365
+        assert len(dti.is_quarter_end) == 365
+        assert len(dti.is_year_start) == 365
+        assert len(dti.is_year_end) == 365
+        dti.name = "name"
+        # non boolean accessors -> return Index
+        for accessor in DatetimeArray._field_ops:
+            res = getattr(dti, accessor)
+            assert len(res) == 365
+            assert isinstance(res, Index)
+            assert res.name == "name"
+        # boolean accessors -> return array
+        for accessor in DatetimeArray._bool_ops:
+            res = getattr(dti, accessor)
+            assert len(res) == 365
+            assert isinstance(res, np.ndarray)
+        # test boolean indexing
+        res = dti[dti.is_quarter_start]
+        exp = dti[[0, 90, 181, 273]]
+        tm.assert_index_equal(res, exp)
+        res = dti[dti.is_leap_year]
+        exp = DatetimeIndex([], freq="D", tz=dti.tz, name="name").as_unit("ns")
+        tm.assert_index_equal(res, exp)
+    def test_dti_is_year_quarter_start(self):
+        dti = date_range(freq="BQE-FEB", start=datetime(1998, 1, 1), periods=4)
+        assert sum(dti.is_quarter_start) == 0
+        assert sum(dti.is_quarter_end) == 4
+        assert sum(dti.is_year_start) == 0
+        assert sum(dti.is_year_end) == 1
+    def test_dti_is_month_start(self):
+        dti = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-03"])
+        assert dti.is_month_start[0] == 1
+    def test_dti_is_month_start_custom(self):
+        # Ensure is_start/end accessors throw ValueError for CustomBusinessDay,
+        bday_egypt = offsets.CustomBusinessDay(weekmask="Sun Mon Tue Wed Thu")
+        dti = date_range(datetime(2013, 4, 30), periods=5, freq=bday_egypt)
+        msg = "Custom business days is not supported by is_month_start"
+        with pytest.raises(ValueError, match=msg):
+            dti.is_month_start

py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_setops.py ADDED Viewed

	@@ -0,0 +1,666 @@

+from datetime import (
+    datetime,
+    timezone,
+)
+import numpy as np
+import pytest
+import pytz
+import pandas.util._test_decorators as td
+import pandas as pd
+from pandas import (
+    DataFrame,
+    DatetimeIndex,
+    Index,
+    Series,
+    Timestamp,
+    bdate_range,
+    date_range,
+)
+import pandas._testing as tm
+from pandas.tseries.offsets import (
+    BMonthEnd,
+    Minute,
+    MonthEnd,
+)
+START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
+class TestDatetimeIndexSetOps:
+    tz = [
+        None,
+        "UTC",
+        "Asia/Tokyo",
+        "US/Eastern",
+        "dateutil/Asia/Singapore",
+        "dateutil/US/Pacific",
+    ]
+    # TODO: moved from test_datetimelike; dedup with version below
+    def test_union2(self, sort):
+        everything = date_range("2020-01-01", periods=10)
+        first = everything[:5]
+        second = everything[5:]
+        union = first.union(second, sort=sort)
+        tm.assert_index_equal(union, everything)
+    @pytest.mark.parametrize("box", [np.array, Series, list])
+    def test_union3(self, sort, box):
+        everything = date_range("2020-01-01", periods=10)
+        first = everything[:5]
+        second = everything[5:]
+        # GH 10149 support listlike inputs other than Index objects
+        expected = first.union(second, sort=sort)
+        case = box(second.values)
+        result = first.union(case, sort=sort)
+        tm.assert_index_equal(result, expected)
+    @pytest.mark.parametrize("tz", tz)
+    def test_union(self, tz, sort):
+        rng1 = date_range("1/1/2000", freq="D", periods=5, tz=tz)
+        other1 = date_range("1/6/2000", freq="D", periods=5, tz=tz)
+        expected1 = date_range("1/1/2000", freq="D", periods=10, tz=tz)
+        expected1_notsorted = DatetimeIndex(list(other1) + list(rng1))
+        rng2 = date_range("1/1/2000", freq="D", periods=5, tz=tz)
+        other2 = date_range("1/4/2000", freq="D", periods=5, tz=tz)
+        expected2 = date_range("1/1/2000", freq="D", periods=8, tz=tz)
+        expected2_notsorted = DatetimeIndex(list(other2) + list(rng2[:3]))
+        rng3 = date_range("1/1/2000", freq="D", periods=5, tz=tz)
+        other3 = DatetimeIndex([], tz=tz).as_unit("ns")
+        expected3 = date_range("1/1/2000", freq="D", periods=5, tz=tz)
+        expected3_notsorted = rng3
+        for rng, other, exp, exp_notsorted in [
+            (rng1, other1, expected1, expected1_notsorted),
+            (rng2, other2, expected2, expected2_notsorted),
+            (rng3, other3, expected3, expected3_notsorted),
+        ]:
+            result_union = rng.union(other, sort=sort)
+            tm.assert_index_equal(result_union, exp)
+            result_union = other.union(rng, sort=sort)
+            if sort is None:
+                tm.assert_index_equal(result_union, exp)
+            else:
+                tm.assert_index_equal(result_union, exp_notsorted)
+    def test_union_coverage(self, sort):
+        idx = DatetimeIndex(["2000-01-03", "2000-01-01", "2000-01-02"])
+        ordered = DatetimeIndex(idx.sort_values(), freq="infer")
+        result = ordered.union(idx, sort=sort)
+        tm.assert_index_equal(result, ordered)
+        result = ordered[:0].union(ordered, sort=sort)
+        tm.assert_index_equal(result, ordered)
+        assert result.freq == ordered.freq
+    def test_union_bug_1730(self, sort):
+        rng_a = date_range("1/1/2012", periods=4, freq="3h")
+        rng_b = date_range("1/1/2012", periods=4, freq="4h")
+        result = rng_a.union(rng_b, sort=sort)
+        exp = list(rng_a) + list(rng_b[1:])
+        if sort is None:
+            exp = DatetimeIndex(sorted(exp))
+        else:
+            exp = DatetimeIndex(exp)
+        tm.assert_index_equal(result, exp)
+    def test_union_bug_1745(self, sort):
+        left = DatetimeIndex(["2012-05-11 15:19:49.695000"])
+        right = DatetimeIndex(
+            [
+                "2012-05-29 13:04:21.322000",
+                "2012-05-11 15:27:24.873000",
+                "2012-05-11 15:31:05.350000",
+            ]
+        )
+        result = left.union(right, sort=sort)
+        exp = DatetimeIndex(
+            [
+                "2012-05-11 15:19:49.695000",
+                "2012-05-29 13:04:21.322000",
+                "2012-05-11 15:27:24.873000",
+                "2012-05-11 15:31:05.350000",
+            ]
+        )
+        if sort is None:
+            exp = exp.sort_values()
+        tm.assert_index_equal(result, exp)
+    def test_union_bug_4564(self, sort):
+        from pandas import DateOffset
+        left = date_range("2013-01-01", "2013-02-01")
+        right = left + DateOffset(minutes=15)
+        result = left.union(right, sort=sort)
+        exp = list(left) + list(right)
+        if sort is None:
+            exp = DatetimeIndex(sorted(exp))
+        else:
+            exp = DatetimeIndex(exp)
+        tm.assert_index_equal(result, exp)
+    def test_union_freq_both_none(self, sort):
+        # GH11086
+        expected = bdate_range("20150101", periods=10)
+        expected._data.freq = None
+        result = expected.union(expected, sort=sort)
+        tm.assert_index_equal(result, expected)
+        assert result.freq is None
+    def test_union_freq_infer(self):
+        # When taking the union of two DatetimeIndexes, we infer
+        #  a freq even if the arguments don't have freq.  This matches
+        #  TimedeltaIndex behavior.
+        dti = date_range("2016-01-01", periods=5)
+        left = dti[[0, 1, 3, 4]]
+        right = dti[[2, 3, 1]]
+        assert left.freq is None
+        assert right.freq is None
+        result = left.union(right)
+        tm.assert_index_equal(result, dti)
+        assert result.freq == "D"
+    def test_union_dataframe_index(self):
+        rng1 = date_range("1/1/1999", "1/1/2012", freq="MS")
+        s1 = Series(np.random.default_rng(2).standard_normal(len(rng1)), rng1)
+        rng2 = date_range("1/1/1980", "12/1/2001", freq="MS")
+        s2 = Series(np.random.default_rng(2).standard_normal(len(rng2)), rng2)
+        df = DataFrame({"s1": s1, "s2": s2})
+        exp = date_range("1/1/1980", "1/1/2012", freq="MS")
+        tm.assert_index_equal(df.index, exp)
+    def test_union_with_DatetimeIndex(self, sort):
+        i1 = Index(np.arange(0, 20, 2, dtype=np.int64))
+        i2 = date_range(start="2012-01-03 00:00:00", periods=10, freq="D")
+        # Works
+        i1.union(i2, sort=sort)
+        # Fails with "AttributeError: can't set attribute"
+        i2.union(i1, sort=sort)
+    def test_union_same_timezone_different_units(self):
+        # GH 55238
+        idx1 = date_range("2000-01-01", periods=3, tz="UTC").as_unit("ms")
+        idx2 = date_range("2000-01-01", periods=3, tz="UTC").as_unit("us")
+        result = idx1.union(idx2)
+        expected = date_range("2000-01-01", periods=3, tz="UTC").as_unit("us")
+        tm.assert_index_equal(result, expected)
+    # TODO: moved from test_datetimelike; de-duplicate with version below
+    def test_intersection2(self):
+        first = date_range("2020-01-01", periods=10)
+        second = first[5:]
+        intersect = first.intersection(second)
+        tm.assert_index_equal(intersect, second)
+        # GH 10149
+        cases = [klass(second.values) for klass in [np.array, Series, list]]
+        for case in cases:
+            result = first.intersection(case)
+            tm.assert_index_equal(result, second)
+        third = Index(["a", "b", "c"])
+        result = first.intersection(third)
+        expected = Index([], dtype=object)
+        tm.assert_index_equal(result, expected)
+    @pytest.mark.parametrize(
+        "tz", [None, "Asia/Tokyo", "US/Eastern", "dateutil/US/Pacific"]
+    )
+    def test_intersection(self, tz, sort):
+        # GH 4690 (with tz)
+        base = date_range("6/1/2000", "6/30/2000", freq="D", name="idx")
+        # if target has the same name, it is preserved
+        rng2 = date_range("5/15/2000", "6/20/2000", freq="D", name="idx")
+        expected2 = date_range("6/1/2000", "6/20/2000", freq="D", name="idx")
+        # if target name is different, it will be reset
+        rng3 = date_range("5/15/2000", "6/20/2000", freq="D", name="other")
+        expected3 = date_range("6/1/2000", "6/20/2000", freq="D", name=None)
+        rng4 = date_range("7/1/2000", "7/31/2000", freq="D", name="idx")
+        expected4 = DatetimeIndex([], freq="D", name="idx", dtype="M8[ns]")
+        for rng, expected in [
+            (rng2, expected2),
+            (rng3, expected3),
+            (rng4, expected4),
+        ]:
+            result = base.intersection(rng)
+            tm.assert_index_equal(result, expected)
+            assert result.freq == expected.freq
+        # non-monotonic
+        base = DatetimeIndex(
+            ["2011-01-05", "2011-01-04", "2011-01-02", "2011-01-03"], tz=tz, name="idx"
+        ).as_unit("ns")
+        rng2 = DatetimeIndex(
+            ["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"], tz=tz, name="idx"
+        ).as_unit("ns")
+        expected2 = DatetimeIndex(
+            ["2011-01-04", "2011-01-02"], tz=tz, name="idx"
+        ).as_unit("ns")
+        rng3 = DatetimeIndex(
+            ["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"],
+            tz=tz,
+            name="other",
+        ).as_unit("ns")
+        expected3 = DatetimeIndex(
+            ["2011-01-04", "2011-01-02"], tz=tz, name=None
+        ).as_unit("ns")
+        # GH 7880
+        rng4 = date_range("7/1/2000", "7/31/2000", freq="D", tz=tz, name="idx")
+        expected4 = DatetimeIndex([], tz=tz, name="idx").as_unit("ns")
+        assert expected4.freq is None
+        for rng, expected in [
+            (rng2, expected2),
+            (rng3, expected3),
+            (rng4, expected4),
+        ]:
+            result = base.intersection(rng, sort=sort)
+            if sort is None:
+                expected = expected.sort_values()
+            tm.assert_index_equal(result, expected)
+            assert result.freq == expected.freq
+    # parametrize over both anchored and non-anchored freqs, as they
+    #  have different code paths
+    @pytest.mark.parametrize("freq", ["min", "B"])
+    def test_intersection_empty(self, tz_aware_fixture, freq):
+        # empty same freq GH2129
+        tz = tz_aware_fixture
+        rng = date_range("6/1/2000", "6/15/2000", freq=freq, tz=tz)
+        result = rng[0:0].intersection(rng)
+        assert len(result) == 0
+        assert result.freq == rng.freq
+        result = rng.intersection(rng[0:0])
+        assert len(result) == 0
+        assert result.freq == rng.freq
+        # no overlap GH#33604
+        check_freq = freq != "min"  # We don't preserve freq on non-anchored offsets
+        result = rng[:3].intersection(rng[-3:])
+        tm.assert_index_equal(result, rng[:0])
+        if check_freq:
+            # We don't preserve freq on non-anchored offsets
+            assert result.freq == rng.freq
+        # swapped left and right
+        result = rng[-3:].intersection(rng[:3])
+        tm.assert_index_equal(result, rng[:0])
+        if check_freq:
+            # We don't preserve freq on non-anchored offsets
+            assert result.freq == rng.freq
+    def test_intersection_bug_1708(self):
+        from pandas import DateOffset
+        index_1 = date_range("1/1/2012", periods=4, freq="12h")
+        index_2 = index_1 + DateOffset(hours=1)
+        result = index_1.intersection(index_2)
+        assert len(result) == 0
+    @pytest.mark.parametrize("tz", tz)
+    def test_difference(self, tz, sort):
+        rng_dates = ["1/2/2000", "1/3/2000", "1/1/2000", "1/4/2000", "1/5/2000"]
+        rng1 = DatetimeIndex(rng_dates, tz=tz)
+        other1 = date_range("1/6/2000", freq="D", periods=5, tz=tz)
+        expected1 = DatetimeIndex(rng_dates, tz=tz)
+        rng2 = DatetimeIndex(rng_dates, tz=tz)
+        other2 = date_range("1/4/2000", freq="D", periods=5, tz=tz)
+        expected2 = DatetimeIndex(rng_dates[:3], tz=tz)
+        rng3 = DatetimeIndex(rng_dates, tz=tz)
+        other3 = DatetimeIndex([], tz=tz)
+        expected3 = DatetimeIndex(rng_dates, tz=tz)
+        for rng, other, expected in [
+            (rng1, other1, expected1),
+            (rng2, other2, expected2),
+            (rng3, other3, expected3),
+        ]:
+            result_diff = rng.difference(other, sort)
+            if sort is None and len(other):
+                # We dont sort (yet?) when empty GH#24959
+                expected = expected.sort_values()
+            tm.assert_index_equal(result_diff, expected)
+    def test_difference_freq(self, sort):
+        # GH14323: difference of DatetimeIndex should not preserve frequency
+        index = date_range("20160920", "20160925", freq="D")
+        other = date_range("20160921", "20160924", freq="D")
+        expected = DatetimeIndex(["20160920", "20160925"], dtype="M8[ns]", freq=None)
+        idx_diff = index.difference(other, sort)
+        tm.assert_index_equal(idx_diff, expected)
+        tm.assert_attr_equal("freq", idx_diff, expected)
+        # preserve frequency when the difference is a contiguous
+        # subset of the original range
+        other = date_range("20160922", "20160925", freq="D")
+        idx_diff = index.difference(other, sort)
+        expected = DatetimeIndex(["20160920", "20160921"], dtype="M8[ns]", freq="D")
+        tm.assert_index_equal(idx_diff, expected)
+        tm.assert_attr_equal("freq", idx_diff, expected)
+    def test_datetimeindex_diff(self, sort):
+        dti1 = date_range(freq="QE-JAN", start=datetime(1997, 12, 31), periods=100)
+        dti2 = date_range(freq="QE-JAN", start=datetime(1997, 12, 31), periods=98)
+        assert len(dti1.difference(dti2, sort)) == 2
+    @pytest.mark.parametrize("tz", [None, "Asia/Tokyo", "US/Eastern"])
+    def test_setops_preserve_freq(self, tz):
+        rng = date_range("1/1/2000", "1/1/2002", name="idx", tz=tz)
+        result = rng[:50].union(rng[50:100])
+        assert result.name == rng.name
+        assert result.freq == rng.freq
+        assert result.tz == rng.tz
+        result = rng[:50].union(rng[30:100])
+        assert result.name == rng.name
+        assert result.freq == rng.freq
+        assert result.tz == rng.tz
+        result = rng[:50].union(rng[60:100])
+        assert result.name == rng.name
+        assert result.freq is None
+        assert result.tz == rng.tz
+        result = rng[:50].intersection(rng[25:75])
+        assert result.name == rng.name
+        assert result.freqstr == "D"
+        assert result.tz == rng.tz
+        nofreq = DatetimeIndex(list(rng[25:75]), name="other")
+        result = rng[:50].union(nofreq)
+        assert result.name is None
+        assert result.freq == rng.freq
+        assert result.tz == rng.tz
+        result = rng[:50].intersection(nofreq)
+        assert result.name is None
+        assert result.freq == rng.freq
+        assert result.tz == rng.tz
+    def test_intersection_non_tick_no_fastpath(self):
+        # GH#42104
+        dti = DatetimeIndex(
+            [
+                "2018-12-31",
+                "2019-03-31",
+                "2019-06-30",
+                "2019-09-30",
+                "2019-12-31",
+                "2020-03-31",
+            ],
+            freq="QE-DEC",
+        )
+        result = dti[::2].intersection(dti[1::2])
+        expected = dti[:0]
+        tm.assert_index_equal(result, expected)
+    def test_dti_intersection(self):
+        rng = date_range("1/1/2011", periods=100, freq="h", tz="utc")
+        left = rng[10:90][::-1]
+        right = rng[20:80][::-1]
+        assert left.tz == rng.tz
+        result = left.intersection(right)
+        assert result.tz == left.tz
+    # Note: not difference, as there is no symmetry requirement there
+    @pytest.mark.parametrize("setop", ["union", "intersection", "symmetric_difference"])
+    def test_dti_setop_aware(self, setop):
+        # non-overlapping
+        # GH#39328 as of 2.0 we cast these to UTC instead of object
+        rng = date_range("2012-11-15 00:00:00", periods=6, freq="h", tz="US/Central")
+        rng2 = date_range("2012-11-15 12:00:00", periods=6, freq="h", tz="US/Eastern")
+        result = getattr(rng, setop)(rng2)
+        left = rng.tz_convert("UTC")
+        right = rng2.tz_convert("UTC")
+        expected = getattr(left, setop)(right)
+        tm.assert_index_equal(result, expected)
+        assert result.tz == left.tz
+        if len(result):
+            assert result[0].tz is timezone.utc
+            assert result[-1].tz is timezone.utc
+    def test_dti_union_mixed(self):
+        # GH#21671
+        rng = DatetimeIndex([Timestamp("2011-01-01"), pd.NaT])
+        rng2 = DatetimeIndex(["2012-01-01", "2012-01-02"], tz="Asia/Tokyo")
+        result = rng.union(rng2)
+        expected = Index(
+            [
+                Timestamp("2011-01-01"),
+                pd.NaT,
+                Timestamp("2012-01-01", tz="Asia/Tokyo"),
+                Timestamp("2012-01-02", tz="Asia/Tokyo"),
+            ],
+            dtype=object,
+        )
+        tm.assert_index_equal(result, expected)
+class TestBusinessDatetimeIndex:
+    def test_union(self, sort):
+        rng = bdate_range(START, END)
+        # overlapping
+        left = rng[:10]
+        right = rng[5:10]
+        the_union = left.union(right, sort=sort)
+        assert isinstance(the_union, DatetimeIndex)
+        # non-overlapping, gap in middle
+        left = rng[:5]
+        right = rng[10:]
+        the_union = left.union(right, sort=sort)
+        assert isinstance(the_union, Index)
+        # non-overlapping, no gap
+        left = rng[:5]
+        right = rng[5:10]
+        the_union = left.union(right, sort=sort)
+        assert isinstance(the_union, DatetimeIndex)
+        # order does not matter
+        if sort is None:
+            tm.assert_index_equal(right.union(left, sort=sort), the_union)
+        else:
+            expected = DatetimeIndex(list(right) + list(left))
+            tm.assert_index_equal(right.union(left, sort=sort), expected)
+        # overlapping, but different offset
+        rng = date_range(START, END, freq=BMonthEnd())
+        the_union = rng.union(rng, sort=sort)
+        assert isinstance(the_union, DatetimeIndex)
+    def test_union_not_cacheable(self, sort):
+        rng = date_range("1/1/2000", periods=50, freq=Minute())
+        rng1 = rng[10:]
+        rng2 = rng[:25]
+        the_union = rng1.union(rng2, sort=sort)
+        if sort is None:
+            tm.assert_index_equal(the_union, rng)
+        else:
+            expected = DatetimeIndex(list(rng[10:]) + list(rng[:10]))
+            tm.assert_index_equal(the_union, expected)
+        rng1 = rng[10:]
+        rng2 = rng[15:35]
+        the_union = rng1.union(rng2, sort=sort)
+        expected = rng[10:]
+        tm.assert_index_equal(the_union, expected)
+    def test_intersection(self):
+        rng = date_range("1/1/2000", periods=50, freq=Minute())
+        rng1 = rng[10:]
+        rng2 = rng[:25]
+        the_int = rng1.intersection(rng2)
+        expected = rng[10:25]
+        tm.assert_index_equal(the_int, expected)
+        assert isinstance(the_int, DatetimeIndex)
+        assert the_int.freq == rng.freq
+        the_int = rng1.intersection(rng2)
+        tm.assert_index_equal(the_int, expected)
+        # non-overlapping
+        the_int = rng[:10].intersection(rng[10:])
+        expected = DatetimeIndex([]).as_unit("ns")
+        tm.assert_index_equal(the_int, expected)
+    def test_intersection_bug(self):
+        # GH #771
+        a = bdate_range("11/30/2011", "12/31/2011")
+        b = bdate_range("12/10/2011", "12/20/2011")
+        result = a.intersection(b)
+        tm.assert_index_equal(result, b)
+        assert result.freq == b.freq
+    def test_intersection_list(self):
+        # GH#35876
+        # values is not an Index -> no name -> retain "a"
+        values = [Timestamp("2020-01-01"), Timestamp("2020-02-01")]
+        idx = DatetimeIndex(values, name="a")
+        res = idx.intersection(values)
+        tm.assert_index_equal(res, idx)
+    def test_month_range_union_tz_pytz(self, sort):
+        tz = pytz.timezone("US/Eastern")
+        early_start = datetime(2011, 1, 1)
+        early_end = datetime(2011, 3, 1)
+        late_start = datetime(2011, 3, 1)
+        late_end = datetime(2011, 5, 1)
+        early_dr = date_range(start=early_start, end=early_end, tz=tz, freq=MonthEnd())
+        late_dr = date_range(start=late_start, end=late_end, tz=tz, freq=MonthEnd())
+        early_dr.union(late_dr, sort=sort)
+    @td.skip_if_windows
+    def test_month_range_union_tz_dateutil(self, sort):
+        from pandas._libs.tslibs.timezones import dateutil_gettz
+        tz = dateutil_gettz("US/Eastern")
+        early_start = datetime(2011, 1, 1)
+        early_end = datetime(2011, 3, 1)
+        late_start = datetime(2011, 3, 1)
+        late_end = datetime(2011, 5, 1)
+        early_dr = date_range(start=early_start, end=early_end, tz=tz, freq=MonthEnd())
+        late_dr = date_range(start=late_start, end=late_end, tz=tz, freq=MonthEnd())
+        early_dr.union(late_dr, sort=sort)
+    @pytest.mark.parametrize("sort", [False, None])
+    def test_intersection_duplicates(self, sort):
+        # GH#38196
+        idx1 = Index(
+            [
+                Timestamp("2019-12-13"),
+                Timestamp("2019-12-12"),
+                Timestamp("2019-12-12"),
+            ]
+        )
+        result = idx1.intersection(idx1, sort=sort)
+        expected = Index([Timestamp("2019-12-13"), Timestamp("2019-12-12")])
+        tm.assert_index_equal(result, expected)
+class TestCustomDatetimeIndex:
+    def test_union(self, sort):
+        # overlapping
+        rng = bdate_range(START, END, freq="C")
+        left = rng[:10]
+        right = rng[5:10]
+        the_union = left.union(right, sort=sort)
+        assert isinstance(the_union, DatetimeIndex)
+        # non-overlapping, gap in middle
+        left = rng[:5]
+        right = rng[10:]
+        the_union = left.union(right, sort)
+        assert isinstance(the_union, Index)
+        # non-overlapping, no gap
+        left = rng[:5]
+        right = rng[5:10]
+        the_union = left.union(right, sort=sort)
+        assert isinstance(the_union, DatetimeIndex)
+        # order does not matter
+        if sort is None:
+            tm.assert_index_equal(right.union(left, sort=sort), the_union)
+        # overlapping, but different offset
+        rng = date_range(START, END, freq=BMonthEnd())
+        the_union = rng.union(rng, sort=sort)
+        assert isinstance(the_union, DatetimeIndex)
+    def test_intersection_bug(self):
+        # GH #771
+        a = bdate_range("11/30/2011", "12/31/2011", freq="C")
+        b = bdate_range("12/10/2011", "12/20/2011", freq="C")
+        result = a.intersection(b)
+        tm.assert_index_equal(result, b)
+        assert result.freq == b.freq
+    @pytest.mark.parametrize(
+        "tz", [None, "UTC", "Europe/Berlin", pytz.FixedOffset(-60)]
+    )
+    def test_intersection_dst_transition(self, tz):
+        # GH 46702: Europe/Berlin has DST transition
+        idx1 = date_range("2020-03-27", periods=5, freq="D", tz=tz)
+        idx2 = date_range("2020-03-30", periods=5, freq="D", tz=tz)
+        result = idx1.intersection(idx2)
+        expected = date_range("2020-03-30", periods=2, freq="D", tz=tz)
+        tm.assert_index_equal(result, expected)
+        # GH#45863 same problem for union
+        index1 = date_range("2021-10-28", periods=3, freq="D", tz="Europe/London")
+        index2 = date_range("2021-10-30", periods=4, freq="D", tz="Europe/London")
+        result = index1.union(index2)
+        expected = date_range("2021-10-28", periods=6, freq="D", tz="Europe/London")
+        tm.assert_index_equal(result, expected)