JustinTX commited on
Commit
a7e0016
·
verified ·
1 Parent(s): 31ac681

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/__init__.py +0 -0
  2. py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_can_hold_element.py +79 -0
  3. py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_construct_from_scalar.py +55 -0
  4. py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_construct_ndarray.py +36 -0
  5. py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_construct_object_arr.py +20 -0
  6. py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_dict_compat.py +14 -0
  7. py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_downcast.py +97 -0
  8. py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_find_common_type.py +175 -0
  9. py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_infer_datetimelike.py +28 -0
  10. py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_infer_dtype.py +216 -0
  11. py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_maybe_box_native.py +40 -0
  12. py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_promote.py +530 -0
  13. py311/lib/python3.11/site-packages/pandas/tests/extension/base/casting.py +87 -0
  14. py311/lib/python3.11/site-packages/pandas/tests/extension/base/constructors.py +142 -0
  15. py311/lib/python3.11/site-packages/pandas/tests/extension/base/dim2.py +345 -0
  16. py311/lib/python3.11/site-packages/pandas/tests/extension/base/setitem.py +451 -0
  17. py311/lib/python3.11/site-packages/pandas/tests/extension/decimal/__init__.py +8 -0
  18. py311/lib/python3.11/site-packages/pandas/tests/extension/decimal/array.py +311 -0
  19. py311/lib/python3.11/site-packages/pandas/tests/extension/decimal/test_decimal.py +587 -0
  20. py311/lib/python3.11/site-packages/pandas/tests/extension/list/__init__.py +7 -0
  21. py311/lib/python3.11/site-packages/pandas/tests/extension/list/array.py +137 -0
  22. py311/lib/python3.11/site-packages/pandas/tests/extension/list/test_list.py +33 -0
  23. py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/__init__.py +0 -0
  24. py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_append.py +62 -0
  25. py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_category.py +391 -0
  26. py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_constructors.py +142 -0
  27. py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_equals.py +96 -0
  28. py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_fillna.py +54 -0
  29. py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_formats.py +120 -0
  30. py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_indexing.py +420 -0
  31. py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_map.py +144 -0
  32. py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_reindex.py +78 -0
  33. py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_setops.py +18 -0
  34. py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/__init__.py +0 -0
  35. py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_arithmetic.py +56 -0
  36. py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_constructors.py +1204 -0
  37. py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_date_range.py +1721 -0
  38. py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_datetime.py +216 -0
  39. py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_formats.py +356 -0
  40. py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_freq_attr.py +61 -0
  41. py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_indexing.py +717 -0
  42. py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_iter.py +76 -0
  43. py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_join.py +153 -0
  44. py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_npfuncs.py +13 -0
  45. py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_ops.py +56 -0
  46. py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_partial_slicing.py +466 -0
  47. py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_pickle.py +45 -0
  48. py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_reindex.py +56 -0
  49. py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_scalar_compat.py +329 -0
  50. py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_setops.py +666 -0
py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/__init__.py ADDED
File without changes
py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_can_hold_element.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+ from pandas.core.dtypes.cast import can_hold_element
4
+
5
+
6
+ def test_can_hold_element_range(any_int_numpy_dtype):
7
+ # GH#44261
8
+ dtype = np.dtype(any_int_numpy_dtype)
9
+ arr = np.array([], dtype=dtype)
10
+
11
+ rng = range(2, 127)
12
+ assert can_hold_element(arr, rng)
13
+
14
+ # negatives -> can't be held by uint dtypes
15
+ rng = range(-2, 127)
16
+ if dtype.kind == "i":
17
+ assert can_hold_element(arr, rng)
18
+ else:
19
+ assert not can_hold_element(arr, rng)
20
+
21
+ rng = range(2, 255)
22
+ if dtype == "int8":
23
+ assert not can_hold_element(arr, rng)
24
+ else:
25
+ assert can_hold_element(arr, rng)
26
+
27
+ rng = range(-255, 65537)
28
+ if dtype.kind == "u":
29
+ assert not can_hold_element(arr, rng)
30
+ elif dtype.itemsize < 4:
31
+ assert not can_hold_element(arr, rng)
32
+ else:
33
+ assert can_hold_element(arr, rng)
34
+
35
+ # empty
36
+ rng = range(-(10**10), -(10**10))
37
+ assert len(rng) == 0
38
+ # assert can_hold_element(arr, rng)
39
+
40
+ rng = range(10**10, 10**10)
41
+ assert len(rng) == 0
42
+ assert can_hold_element(arr, rng)
43
+
44
+
45
+ def test_can_hold_element_int_values_float_ndarray():
46
+ arr = np.array([], dtype=np.int64)
47
+
48
+ element = np.array([1.0, 2.0])
49
+ assert can_hold_element(arr, element)
50
+
51
+ assert not can_hold_element(arr, element + 0.5)
52
+
53
+ # integer but not losslessly castable to int64
54
+ element = np.array([3, 2**65], dtype=np.float64)
55
+ assert not can_hold_element(arr, element)
56
+
57
+
58
+ def test_can_hold_element_int8_int():
59
+ arr = np.array([], dtype=np.int8)
60
+
61
+ element = 2
62
+ assert can_hold_element(arr, element)
63
+ assert can_hold_element(arr, np.int8(element))
64
+ assert can_hold_element(arr, np.uint8(element))
65
+ assert can_hold_element(arr, np.int16(element))
66
+ assert can_hold_element(arr, np.uint16(element))
67
+ assert can_hold_element(arr, np.int32(element))
68
+ assert can_hold_element(arr, np.uint32(element))
69
+ assert can_hold_element(arr, np.int64(element))
70
+ assert can_hold_element(arr, np.uint64(element))
71
+
72
+ element = 2**9
73
+ assert not can_hold_element(arr, element)
74
+ assert not can_hold_element(arr, np.int16(element))
75
+ assert not can_hold_element(arr, np.uint16(element))
76
+ assert not can_hold_element(arr, np.int32(element))
77
+ assert not can_hold_element(arr, np.uint32(element))
78
+ assert not can_hold_element(arr, np.int64(element))
79
+ assert not can_hold_element(arr, np.uint64(element))
py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_construct_from_scalar.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ from pandas.core.dtypes.cast import construct_1d_arraylike_from_scalar
5
+ from pandas.core.dtypes.dtypes import CategoricalDtype
6
+
7
+ from pandas import (
8
+ Categorical,
9
+ Timedelta,
10
+ )
11
+ import pandas._testing as tm
12
+
13
+
14
+ def test_cast_1d_array_like_from_scalar_categorical():
15
+ # see gh-19565
16
+ #
17
+ # Categorical result from scalar did not maintain
18
+ # categories and ordering of the passed dtype.
19
+ cats = ["a", "b", "c"]
20
+ cat_type = CategoricalDtype(categories=cats, ordered=False)
21
+ expected = Categorical(["a", "a"], categories=cats)
22
+
23
+ result = construct_1d_arraylike_from_scalar("a", len(expected), cat_type)
24
+ tm.assert_categorical_equal(result, expected)
25
+
26
+
27
+ def test_cast_1d_array_like_from_timestamp(fixed_now_ts):
28
+ # check we dont lose nanoseconds
29
+ ts = fixed_now_ts + Timedelta(1)
30
+ res = construct_1d_arraylike_from_scalar(ts, 2, np.dtype("M8[ns]"))
31
+ assert res[0] == ts
32
+
33
+
34
+ def test_cast_1d_array_like_from_timedelta():
35
+ # check we dont lose nanoseconds
36
+ td = Timedelta(1)
37
+ res = construct_1d_arraylike_from_scalar(td, 2, np.dtype("m8[ns]"))
38
+ assert res[0] == td
39
+
40
+
41
+ def test_cast_1d_array_like_mismatched_datetimelike():
42
+ td = np.timedelta64("NaT", "ns")
43
+ dt = np.datetime64("NaT", "ns")
44
+
45
+ with pytest.raises(TypeError, match="Cannot cast"):
46
+ construct_1d_arraylike_from_scalar(td, 2, dt.dtype)
47
+
48
+ with pytest.raises(TypeError, match="Cannot cast"):
49
+ construct_1d_arraylike_from_scalar(np.timedelta64(4, "ns"), 2, dt.dtype)
50
+
51
+ with pytest.raises(TypeError, match="Cannot cast"):
52
+ construct_1d_arraylike_from_scalar(dt, 2, td.dtype)
53
+
54
+ with pytest.raises(TypeError, match="Cannot cast"):
55
+ construct_1d_arraylike_from_scalar(np.datetime64(4, "ns"), 2, td.dtype)
py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_construct_ndarray.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ import pandas as pd
5
+ import pandas._testing as tm
6
+ from pandas.core.construction import sanitize_array
7
+
8
+
9
+ @pytest.mark.parametrize(
10
+ "values, dtype, expected",
11
+ [
12
+ ([1, 2, 3], None, np.array([1, 2, 3], dtype=np.int64)),
13
+ (np.array([1, 2, 3]), None, np.array([1, 2, 3])),
14
+ (["1", "2", None], None, np.array(["1", "2", None])),
15
+ (["1", "2", None], np.dtype("str"), np.array(["1", "2", None])),
16
+ ([1, 2, None], np.dtype("str"), np.array(["1", "2", None])),
17
+ ],
18
+ )
19
+ def test_construct_1d_ndarray_preserving_na(
20
+ values, dtype, expected, using_infer_string
21
+ ):
22
+ result = sanitize_array(values, index=None, dtype=dtype)
23
+ if using_infer_string and expected.dtype == object and dtype is None:
24
+ tm.assert_extension_array_equal(result, pd.array(expected, dtype="str"))
25
+ else:
26
+ tm.assert_numpy_array_equal(result, expected)
27
+
28
+
29
+ @pytest.mark.parametrize("dtype", ["m8[ns]", "M8[ns]"])
30
+ def test_construct_1d_ndarray_preserving_na_datetimelike(dtype):
31
+ arr = np.arange(5, dtype=np.int64).view(dtype)
32
+ expected = np.array(list(arr), dtype=object)
33
+ assert all(isinstance(x, type(arr[0])) for x in expected)
34
+
35
+ result = sanitize_array(arr, index=None, dtype=np.dtype(object))
36
+ tm.assert_numpy_array_equal(result, expected)
py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_construct_object_arr.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+
3
+ from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
4
+
5
+
6
+ @pytest.mark.parametrize("datum1", [1, 2.0, "3", (4, 5), [6, 7], None])
7
+ @pytest.mark.parametrize("datum2", [8, 9.0, "10", (11, 12), [13, 14], None])
8
+ def test_cast_1d_array(datum1, datum2):
9
+ data = [datum1, datum2]
10
+ result = construct_1d_object_array_from_listlike(data)
11
+
12
+ # Direct comparison fails: https://github.com/numpy/numpy/issues/10218
13
+ assert result.dtype == "object"
14
+ assert list(result) == data
15
+
16
+
17
+ @pytest.mark.parametrize("val", [1, 2.0, None])
18
+ def test_cast_1d_array_invalid_scalar(val):
19
+ with pytest.raises(TypeError, match="has no len()"):
20
+ construct_1d_object_array_from_listlike(val)
py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_dict_compat.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+ from pandas.core.dtypes.cast import dict_compat
4
+
5
+ from pandas import Timestamp
6
+
7
+
8
+ def test_dict_compat():
9
+ data_datetime64 = {np.datetime64("1990-03-15"): 1, np.datetime64("2015-03-15"): 2}
10
+ data_unchanged = {1: 2, 3: 4, 5: 6}
11
+ expected = {Timestamp("1990-3-15"): 1, Timestamp("2015-03-15"): 2}
12
+ assert dict_compat(data_datetime64) == expected
13
+ assert dict_compat(expected) == expected
14
+ assert dict_compat(data_unchanged) == data_unchanged
py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_downcast.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import decimal
2
+
3
+ import numpy as np
4
+ import pytest
5
+
6
+ from pandas.core.dtypes.cast import maybe_downcast_to_dtype
7
+
8
+ from pandas import (
9
+ Series,
10
+ Timedelta,
11
+ )
12
+ import pandas._testing as tm
13
+
14
+
15
+ @pytest.mark.parametrize(
16
+ "arr,dtype,expected",
17
+ [
18
+ (
19
+ np.array([8.5, 8.6, 8.7, 8.8, 8.9999999999995]),
20
+ "infer",
21
+ np.array([8.5, 8.6, 8.7, 8.8, 8.9999999999995]),
22
+ ),
23
+ (
24
+ np.array([8.0, 8.0, 8.0, 8.0, 8.9999999999995]),
25
+ "infer",
26
+ np.array([8, 8, 8, 8, 9], dtype=np.int64),
27
+ ),
28
+ (
29
+ np.array([8.0, 8.0, 8.0, 8.0, 9.0000000000005]),
30
+ "infer",
31
+ np.array([8, 8, 8, 8, 9], dtype=np.int64),
32
+ ),
33
+ (
34
+ # This is a judgement call, but we do _not_ downcast Decimal
35
+ # objects
36
+ np.array([decimal.Decimal(0.0)]),
37
+ "int64",
38
+ np.array([decimal.Decimal(0.0)]),
39
+ ),
40
+ (
41
+ # GH#45837
42
+ np.array([Timedelta(days=1), Timedelta(days=2)], dtype=object),
43
+ "infer",
44
+ np.array([1, 2], dtype="m8[D]").astype("m8[ns]"),
45
+ ),
46
+ # TODO: similar for dt64, dt64tz, Period, Interval?
47
+ ],
48
+ )
49
+ def test_downcast(arr, expected, dtype):
50
+ result = maybe_downcast_to_dtype(arr, dtype)
51
+ tm.assert_numpy_array_equal(result, expected)
52
+
53
+
54
+ def test_downcast_booleans():
55
+ # see gh-16875: coercing of booleans.
56
+ ser = Series([True, True, False])
57
+ result = maybe_downcast_to_dtype(ser, np.dtype(np.float64))
58
+
59
+ expected = ser.values
60
+ tm.assert_numpy_array_equal(result, expected)
61
+
62
+
63
+ def test_downcast_conversion_no_nan(any_real_numpy_dtype):
64
+ dtype = any_real_numpy_dtype
65
+ expected = np.array([1, 2])
66
+ arr = np.array([1.0, 2.0], dtype=dtype)
67
+
68
+ result = maybe_downcast_to_dtype(arr, "infer")
69
+ tm.assert_almost_equal(result, expected, check_dtype=False)
70
+
71
+
72
+ def test_downcast_conversion_nan(float_numpy_dtype):
73
+ dtype = float_numpy_dtype
74
+ data = [1.0, 2.0, np.nan]
75
+
76
+ expected = np.array(data, dtype=dtype)
77
+ arr = np.array(data, dtype=dtype)
78
+
79
+ result = maybe_downcast_to_dtype(arr, "infer")
80
+ tm.assert_almost_equal(result, expected)
81
+
82
+
83
+ def test_downcast_conversion_empty(any_real_numpy_dtype):
84
+ dtype = any_real_numpy_dtype
85
+ arr = np.array([], dtype=dtype)
86
+ result = maybe_downcast_to_dtype(arr, np.dtype("int64"))
87
+ tm.assert_numpy_array_equal(result, np.array([], dtype=np.int64))
88
+
89
+
90
+ @pytest.mark.parametrize("klass", [np.datetime64, np.timedelta64])
91
+ def test_datetime_likes_nan(klass):
92
+ dtype = klass.__name__ + "[ns]"
93
+ arr = np.array([1, 2, np.nan])
94
+
95
+ exp = np.array([1, 2, klass("NaT")], dtype)
96
+ res = maybe_downcast_to_dtype(arr, dtype)
97
+ tm.assert_numpy_array_equal(res, exp)
py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_find_common_type.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ from pandas.core.dtypes.cast import find_common_type
5
+ from pandas.core.dtypes.common import pandas_dtype
6
+ from pandas.core.dtypes.dtypes import (
7
+ CategoricalDtype,
8
+ DatetimeTZDtype,
9
+ IntervalDtype,
10
+ PeriodDtype,
11
+ )
12
+
13
+ from pandas import (
14
+ Categorical,
15
+ Index,
16
+ )
17
+
18
+
19
+ @pytest.mark.parametrize(
20
+ "source_dtypes,expected_common_dtype",
21
+ [
22
+ ((np.int64,), np.int64),
23
+ ((np.uint64,), np.uint64),
24
+ ((np.float32,), np.float32),
25
+ ((object,), object),
26
+ # Into ints.
27
+ ((np.int16, np.int64), np.int64),
28
+ ((np.int32, np.uint32), np.int64),
29
+ ((np.uint16, np.uint64), np.uint64),
30
+ # Into floats.
31
+ ((np.float16, np.float32), np.float32),
32
+ ((np.float16, np.int16), np.float32),
33
+ ((np.float32, np.int16), np.float32),
34
+ ((np.uint64, np.int64), np.float64),
35
+ ((np.int16, np.float64), np.float64),
36
+ ((np.float16, np.int64), np.float64),
37
+ # Into others.
38
+ ((np.complex128, np.int32), np.complex128),
39
+ ((object, np.float32), object),
40
+ ((object, np.int16), object),
41
+ # Bool with int.
42
+ ((np.dtype("bool"), np.int64), object),
43
+ ((np.dtype("bool"), np.int32), object),
44
+ ((np.dtype("bool"), np.int16), object),
45
+ ((np.dtype("bool"), np.int8), object),
46
+ ((np.dtype("bool"), np.uint64), object),
47
+ ((np.dtype("bool"), np.uint32), object),
48
+ ((np.dtype("bool"), np.uint16), object),
49
+ ((np.dtype("bool"), np.uint8), object),
50
+ # Bool with float.
51
+ ((np.dtype("bool"), np.float64), object),
52
+ ((np.dtype("bool"), np.float32), object),
53
+ (
54
+ (np.dtype("datetime64[ns]"), np.dtype("datetime64[ns]")),
55
+ np.dtype("datetime64[ns]"),
56
+ ),
57
+ (
58
+ (np.dtype("timedelta64[ns]"), np.dtype("timedelta64[ns]")),
59
+ np.dtype("timedelta64[ns]"),
60
+ ),
61
+ (
62
+ (np.dtype("datetime64[ns]"), np.dtype("datetime64[ms]")),
63
+ np.dtype("datetime64[ns]"),
64
+ ),
65
+ (
66
+ (np.dtype("timedelta64[ms]"), np.dtype("timedelta64[ns]")),
67
+ np.dtype("timedelta64[ns]"),
68
+ ),
69
+ ((np.dtype("datetime64[ns]"), np.dtype("timedelta64[ns]")), object),
70
+ ((np.dtype("datetime64[ns]"), np.int64), object),
71
+ ],
72
+ )
73
+ def test_numpy_dtypes(source_dtypes, expected_common_dtype):
74
+ source_dtypes = [pandas_dtype(x) for x in source_dtypes]
75
+ assert find_common_type(source_dtypes) == expected_common_dtype
76
+
77
+
78
+ def test_raises_empty_input():
79
+ with pytest.raises(ValueError, match="no types given"):
80
+ find_common_type([])
81
+
82
+
83
+ @pytest.mark.parametrize(
84
+ "dtypes,exp_type",
85
+ [
86
+ ([CategoricalDtype()], "category"),
87
+ ([object, CategoricalDtype()], object),
88
+ ([CategoricalDtype(), CategoricalDtype()], "category"),
89
+ ],
90
+ )
91
+ def test_categorical_dtype(dtypes, exp_type):
92
+ assert find_common_type(dtypes) == exp_type
93
+
94
+
95
+ def test_datetimetz_dtype_match():
96
+ dtype = DatetimeTZDtype(unit="ns", tz="US/Eastern")
97
+ assert find_common_type([dtype, dtype]) == "datetime64[ns, US/Eastern]"
98
+
99
+
100
+ @pytest.mark.parametrize(
101
+ "dtype2",
102
+ [
103
+ DatetimeTZDtype(unit="ns", tz="Asia/Tokyo"),
104
+ np.dtype("datetime64[ns]"),
105
+ object,
106
+ np.int64,
107
+ ],
108
+ )
109
+ def test_datetimetz_dtype_mismatch(dtype2):
110
+ dtype = DatetimeTZDtype(unit="ns", tz="US/Eastern")
111
+ assert find_common_type([dtype, dtype2]) == object
112
+ assert find_common_type([dtype2, dtype]) == object
113
+
114
+
115
+ def test_period_dtype_match():
116
+ dtype = PeriodDtype(freq="D")
117
+ assert find_common_type([dtype, dtype]) == "period[D]"
118
+
119
+
120
+ @pytest.mark.parametrize(
121
+ "dtype2",
122
+ [
123
+ DatetimeTZDtype(unit="ns", tz="Asia/Tokyo"),
124
+ PeriodDtype(freq="2D"),
125
+ PeriodDtype(freq="h"),
126
+ np.dtype("datetime64[ns]"),
127
+ object,
128
+ np.int64,
129
+ ],
130
+ )
131
+ def test_period_dtype_mismatch(dtype2):
132
+ dtype = PeriodDtype(freq="D")
133
+ assert find_common_type([dtype, dtype2]) == object
134
+ assert find_common_type([dtype2, dtype]) == object
135
+
136
+
137
+ interval_dtypes = [
138
+ IntervalDtype(np.int64, "right"),
139
+ IntervalDtype(np.float64, "right"),
140
+ IntervalDtype(np.uint64, "right"),
141
+ IntervalDtype(DatetimeTZDtype(unit="ns", tz="US/Eastern"), "right"),
142
+ IntervalDtype("M8[ns]", "right"),
143
+ IntervalDtype("m8[ns]", "right"),
144
+ ]
145
+
146
+
147
+ @pytest.mark.parametrize("left", interval_dtypes)
148
+ @pytest.mark.parametrize("right", interval_dtypes)
149
+ def test_interval_dtype(left, right):
150
+ result = find_common_type([left, right])
151
+
152
+ if left is right:
153
+ assert result is left
154
+
155
+ elif left.subtype.kind in ["i", "u", "f"]:
156
+ # i.e. numeric
157
+ if right.subtype.kind in ["i", "u", "f"]:
158
+ # both numeric -> common numeric subtype
159
+ expected = IntervalDtype(np.float64, "right")
160
+ assert result == expected
161
+ else:
162
+ assert result == object
163
+
164
+ else:
165
+ assert result == object
166
+
167
+
168
+ @pytest.mark.parametrize("dtype", interval_dtypes)
169
+ def test_interval_dtype_with_categorical(dtype):
170
+ obj = Index([], dtype=dtype)
171
+
172
+ cat = Categorical([], categories=obj)
173
+
174
+ result = find_common_type([dtype, cat.dtype])
175
+ assert result == dtype
py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_infer_datetimelike.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ from pandas import (
5
+ DataFrame,
6
+ NaT,
7
+ Series,
8
+ Timestamp,
9
+ )
10
+
11
+
12
+ @pytest.mark.parametrize(
13
+ "data,exp_size",
14
+ [
15
+ # see gh-16362.
16
+ ([[NaT, "a", "b", 0], [NaT, "b", "c", 1]], 8),
17
+ ([[NaT, "a", 0], [NaT, "b", 1]], 6),
18
+ ],
19
+ )
20
+ def test_maybe_infer_to_datetimelike_df_construct(data, exp_size):
21
+ result = DataFrame(np.array(data))
22
+ assert result.size == exp_size
23
+
24
+
25
+ def test_maybe_infer_to_datetimelike_ser_construct():
26
+ # see gh-19671.
27
+ result = Series(["M1701", Timestamp("20130101")])
28
+ assert result.dtype.kind == "O"
py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_infer_dtype.py ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import (
2
+ date,
3
+ datetime,
4
+ timedelta,
5
+ )
6
+
7
+ import numpy as np
8
+ import pytest
9
+
10
+ from pandas.core.dtypes.cast import (
11
+ infer_dtype_from,
12
+ infer_dtype_from_array,
13
+ infer_dtype_from_scalar,
14
+ )
15
+ from pandas.core.dtypes.common import is_dtype_equal
16
+
17
+ from pandas import (
18
+ Categorical,
19
+ Interval,
20
+ Period,
21
+ Series,
22
+ Timedelta,
23
+ Timestamp,
24
+ date_range,
25
+ )
26
+
27
+
28
+ def test_infer_dtype_from_int_scalar(any_int_numpy_dtype):
29
+ # Test that infer_dtype_from_scalar is
30
+ # returning correct dtype for int and float.
31
+ data = np.dtype(any_int_numpy_dtype).type(12)
32
+ dtype, val = infer_dtype_from_scalar(data)
33
+ assert dtype == type(data)
34
+
35
+
36
+ def test_infer_dtype_from_float_scalar(float_numpy_dtype):
37
+ float_numpy_dtype = np.dtype(float_numpy_dtype).type
38
+ data = float_numpy_dtype(12)
39
+
40
+ dtype, val = infer_dtype_from_scalar(data)
41
+ assert dtype == float_numpy_dtype
42
+
43
+
44
+ @pytest.mark.parametrize(
45
+ "data,exp_dtype", [(12, np.int64), (np.float64(12), np.float64)]
46
+ )
47
+ def test_infer_dtype_from_python_scalar(data, exp_dtype):
48
+ dtype, val = infer_dtype_from_scalar(data)
49
+ assert dtype == exp_dtype
50
+
51
+
52
+ @pytest.mark.parametrize("bool_val", [True, False])
53
+ def test_infer_dtype_from_boolean(bool_val):
54
+ dtype, val = infer_dtype_from_scalar(bool_val)
55
+ assert dtype == np.bool_
56
+
57
+
58
+ def test_infer_dtype_from_complex(complex_dtype):
59
+ data = np.dtype(complex_dtype).type(1)
60
+ dtype, val = infer_dtype_from_scalar(data)
61
+ assert dtype == np.complex128
62
+
63
+
64
+ def test_infer_dtype_from_datetime():
65
+ dt64 = np.datetime64(1, "ns")
66
+ dtype, val = infer_dtype_from_scalar(dt64)
67
+ assert dtype == "M8[ns]"
68
+
69
+ ts = Timestamp(1)
70
+ dtype, val = infer_dtype_from_scalar(ts)
71
+ assert dtype == "M8[ns]"
72
+
73
+ dt = datetime(2000, 1, 1, 0, 0)
74
+ dtype, val = infer_dtype_from_scalar(dt)
75
+ assert dtype == "M8[us]"
76
+
77
+
78
+ def test_infer_dtype_from_timedelta():
79
+ td64 = np.timedelta64(1, "ns")
80
+ dtype, val = infer_dtype_from_scalar(td64)
81
+ assert dtype == "m8[ns]"
82
+
83
+ pytd = timedelta(1)
84
+ dtype, val = infer_dtype_from_scalar(pytd)
85
+ assert dtype == "m8[us]"
86
+
87
+ td = Timedelta(1)
88
+ dtype, val = infer_dtype_from_scalar(td)
89
+ assert dtype == "m8[ns]"
90
+
91
+
92
+ @pytest.mark.parametrize("freq", ["M", "D"])
93
+ def test_infer_dtype_from_period(freq):
94
+ p = Period("2011-01-01", freq=freq)
95
+ dtype, val = infer_dtype_from_scalar(p)
96
+
97
+ exp_dtype = f"period[{freq}]"
98
+
99
+ assert dtype == exp_dtype
100
+ assert val == p
101
+
102
+
103
+ def test_infer_dtype_misc():
104
+ dt = date(2000, 1, 1)
105
+ dtype, val = infer_dtype_from_scalar(dt)
106
+ assert dtype == np.object_
107
+
108
+ ts = Timestamp(1, tz="US/Eastern")
109
+ dtype, val = infer_dtype_from_scalar(ts)
110
+ assert dtype == "datetime64[ns, US/Eastern]"
111
+
112
+
113
+ @pytest.mark.parametrize("tz", ["UTC", "US/Eastern", "Asia/Tokyo"])
114
+ def test_infer_from_scalar_tz(tz):
115
+ dt = Timestamp(1, tz=tz)
116
+ dtype, val = infer_dtype_from_scalar(dt)
117
+
118
+ exp_dtype = f"datetime64[ns, {tz}]"
119
+
120
+ assert dtype == exp_dtype
121
+ assert val == dt
122
+
123
+
124
+ @pytest.mark.parametrize(
125
+ "left, right, subtype",
126
+ [
127
+ (0, 1, "int64"),
128
+ (0.0, 1.0, "float64"),
129
+ (Timestamp(0), Timestamp(1), "datetime64[ns]"),
130
+ (Timestamp(0, tz="UTC"), Timestamp(1, tz="UTC"), "datetime64[ns, UTC]"),
131
+ (Timedelta(0), Timedelta(1), "timedelta64[ns]"),
132
+ ],
133
+ )
134
+ def test_infer_from_interval(left, right, subtype, closed):
135
+ # GH 30337
136
+ interval = Interval(left, right, closed)
137
+ result_dtype, result_value = infer_dtype_from_scalar(interval)
138
+ expected_dtype = f"interval[{subtype}, {closed}]"
139
+ assert result_dtype == expected_dtype
140
+ assert result_value == interval
141
+
142
+
143
+ def test_infer_dtype_from_scalar_errors():
144
+ msg = "invalid ndarray passed to infer_dtype_from_scalar"
145
+
146
+ with pytest.raises(ValueError, match=msg):
147
+ infer_dtype_from_scalar(np.array([1]))
148
+
149
+
150
+ @pytest.mark.parametrize(
151
+ "value, expected",
152
+ [
153
+ ("foo", np.object_),
154
+ (b"foo", np.object_),
155
+ (1, np.int64),
156
+ (1.5, np.float64),
157
+ (np.datetime64("2016-01-01"), np.dtype("M8[s]")),
158
+ (Timestamp("20160101"), np.dtype("M8[s]")),
159
+ (Timestamp("20160101", tz="UTC"), "datetime64[s, UTC]"),
160
+ ],
161
+ )
162
+ def test_infer_dtype_from_scalar(value, expected, using_infer_string):
163
+ dtype, _ = infer_dtype_from_scalar(value)
164
+ if using_infer_string and value == "foo":
165
+ expected = "string"
166
+ assert is_dtype_equal(dtype, expected)
167
+
168
+ with pytest.raises(TypeError, match="must be list-like"):
169
+ infer_dtype_from_array(value)
170
+
171
+
172
+ @pytest.mark.parametrize(
173
+ "arr, expected",
174
+ [
175
+ ([1], np.dtype(int)),
176
+ (np.array([1], dtype=np.int64), np.int64),
177
+ ([np.nan, 1, ""], np.object_),
178
+ (np.array([[1.0, 2.0]]), np.float64),
179
+ (Categorical(list("aabc")), "category"),
180
+ (Categorical([1, 2, 3]), "category"),
181
+ (date_range("20160101", periods=3), np.dtype("=M8[ns]")),
182
+ (
183
+ date_range("20160101", periods=3, tz="US/Eastern"),
184
+ "datetime64[ns, US/Eastern]",
185
+ ),
186
+ (Series([1.0, 2, 3]), np.float64),
187
+ (Series(list("abc")), np.object_),
188
+ (
189
+ Series(date_range("20160101", periods=3, tz="US/Eastern")),
190
+ "datetime64[ns, US/Eastern]",
191
+ ),
192
+ ],
193
+ )
194
+ def test_infer_dtype_from_array(arr, expected, using_infer_string):
195
+ dtype, _ = infer_dtype_from_array(arr)
196
+ if (
197
+ using_infer_string
198
+ and isinstance(arr, Series)
199
+ and arr.tolist() == ["a", "b", "c"]
200
+ ):
201
+ expected = "string"
202
+ assert is_dtype_equal(dtype, expected)
203
+
204
+
205
+ @pytest.mark.parametrize("cls", [np.datetime64, np.timedelta64])
206
+ def test_infer_dtype_from_scalar_zerodim_datetimelike(cls):
207
+ # ndarray.item() can incorrectly return int instead of td64/dt64
208
+ val = cls(1234, "ns")
209
+ arr = np.array(val)
210
+
211
+ dtype, res = infer_dtype_from_scalar(arr)
212
+ assert dtype.type is cls
213
+ assert isinstance(res, cls)
214
+
215
+ dtype, res = infer_dtype_from(arr)
216
+ assert dtype.type is cls
py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_maybe_box_native.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime
2
+
3
+ import numpy as np
4
+ import pytest
5
+
6
+ from pandas.core.dtypes.cast import maybe_box_native
7
+
8
+ from pandas import (
9
+ Interval,
10
+ Period,
11
+ Timedelta,
12
+ Timestamp,
13
+ )
14
+
15
+
16
+ @pytest.mark.parametrize(
17
+ "obj,expected_dtype",
18
+ [
19
+ (b"\x00\x10", bytes),
20
+ (int(4), int),
21
+ (np.uint(4), int),
22
+ (np.int32(-4), int),
23
+ (np.uint8(4), int),
24
+ (float(454.98), float),
25
+ (np.float16(0.4), float),
26
+ (np.float64(1.4), float),
27
+ (np.bool_(False), bool),
28
+ (datetime(2005, 2, 25), datetime),
29
+ (np.datetime64("2005-02-25"), Timestamp),
30
+ (Timestamp("2005-02-25"), Timestamp),
31
+ (np.timedelta64(1, "D"), Timedelta),
32
+ (Timedelta(1, "D"), Timedelta),
33
+ (Interval(0, 1), Interval),
34
+ (Period("4Q2005"), Period),
35
+ ],
36
+ )
37
+ def test_maybe_box_native(obj, expected_dtype):
38
+ boxed_obj = maybe_box_native(obj)
39
+ result_dtype = type(boxed_obj)
40
+ assert result_dtype is expected_dtype
py311/lib/python3.11/site-packages/pandas/tests/dtypes/cast/test_promote.py ADDED
@@ -0,0 +1,530 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ These test the method maybe_promote from core/dtypes/cast.py
3
+ """
4
+
5
+ import datetime
6
+ from decimal import Decimal
7
+
8
+ import numpy as np
9
+ import pytest
10
+
11
+ from pandas._libs.tslibs import NaT
12
+
13
+ from pandas.core.dtypes.cast import maybe_promote
14
+ from pandas.core.dtypes.common import is_scalar
15
+ from pandas.core.dtypes.dtypes import DatetimeTZDtype
16
+ from pandas.core.dtypes.missing import isna
17
+
18
+ import pandas as pd
19
+
20
+
21
+ def _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar=None):
22
+ """
23
+ Auxiliary function to unify testing of scalar/array promotion.
24
+
25
+ Parameters
26
+ ----------
27
+ dtype : dtype
28
+ The value to pass on as the first argument to maybe_promote.
29
+ fill_value : scalar
30
+ The value to pass on as the second argument to maybe_promote as
31
+ a scalar.
32
+ expected_dtype : dtype
33
+ The expected dtype returned by maybe_promote (by design this is the
34
+ same regardless of whether fill_value was passed as a scalar or in an
35
+ array!).
36
+ exp_val_for_scalar : scalar
37
+ The expected value for the (potentially upcast) fill_value returned by
38
+ maybe_promote.
39
+ """
40
+ assert is_scalar(fill_value)
41
+
42
+ # here, we pass on fill_value as a scalar directly; the expected value
43
+ # returned from maybe_promote is fill_value, potentially upcast to the
44
+ # returned dtype.
45
+ result_dtype, result_fill_value = maybe_promote(dtype, fill_value)
46
+ expected_fill_value = exp_val_for_scalar
47
+
48
+ assert result_dtype == expected_dtype
49
+ _assert_match(result_fill_value, expected_fill_value)
50
+
51
+
52
+ def _assert_match(result_fill_value, expected_fill_value):
53
+ # GH#23982/25425 require the same type in addition to equality/NA-ness
54
+ res_type = type(result_fill_value)
55
+ ex_type = type(expected_fill_value)
56
+
57
+ if hasattr(result_fill_value, "dtype"):
58
+ # Compare types in a way that is robust to platform-specific
59
+ # idiosyncrasies where e.g. sometimes we get "ulonglong" as an alias
60
+ # for "uint64" or "intc" as an alias for "int32"
61
+ assert result_fill_value.dtype.kind == expected_fill_value.dtype.kind
62
+ assert result_fill_value.dtype.itemsize == expected_fill_value.dtype.itemsize
63
+ else:
64
+ # On some builds, type comparison fails, e.g. np.int32 != np.int32
65
+ assert res_type == ex_type or res_type.__name__ == ex_type.__name__
66
+
67
+ match_value = result_fill_value == expected_fill_value
68
+ if match_value is pd.NA:
69
+ match_value = False
70
+
71
+ # Note: type check above ensures that we have the _same_ NA value
72
+ # for missing values, None == None (which is checked
73
+ # through match_value above), but np.nan != np.nan and pd.NaT != pd.NaT
74
+ match_missing = isna(result_fill_value) and isna(expected_fill_value)
75
+
76
+ assert match_value or match_missing
77
+
78
+
79
+ @pytest.mark.parametrize(
80
+ "dtype, fill_value, expected_dtype",
81
+ [
82
+ # size 8
83
+ ("int8", 1, "int8"),
84
+ ("int8", np.iinfo("int8").max + 1, "int16"),
85
+ ("int8", np.iinfo("int16").max + 1, "int32"),
86
+ ("int8", np.iinfo("int32").max + 1, "int64"),
87
+ ("int8", np.iinfo("int64").max + 1, "object"),
88
+ ("int8", -1, "int8"),
89
+ ("int8", np.iinfo("int8").min - 1, "int16"),
90
+ ("int8", np.iinfo("int16").min - 1, "int32"),
91
+ ("int8", np.iinfo("int32").min - 1, "int64"),
92
+ ("int8", np.iinfo("int64").min - 1, "object"),
93
+ # keep signed-ness as long as possible
94
+ ("uint8", 1, "uint8"),
95
+ ("uint8", np.iinfo("int8").max + 1, "uint8"),
96
+ ("uint8", np.iinfo("uint8").max + 1, "uint16"),
97
+ ("uint8", np.iinfo("int16").max + 1, "uint16"),
98
+ ("uint8", np.iinfo("uint16").max + 1, "uint32"),
99
+ ("uint8", np.iinfo("int32").max + 1, "uint32"),
100
+ ("uint8", np.iinfo("uint32").max + 1, "uint64"),
101
+ ("uint8", np.iinfo("int64").max + 1, "uint64"),
102
+ ("uint8", np.iinfo("uint64").max + 1, "object"),
103
+ # max of uint8 cannot be contained in int8
104
+ ("uint8", -1, "int16"),
105
+ ("uint8", np.iinfo("int8").min - 1, "int16"),
106
+ ("uint8", np.iinfo("int16").min - 1, "int32"),
107
+ ("uint8", np.iinfo("int32").min - 1, "int64"),
108
+ ("uint8", np.iinfo("int64").min - 1, "object"),
109
+ # size 16
110
+ ("int16", 1, "int16"),
111
+ ("int16", np.iinfo("int8").max + 1, "int16"),
112
+ ("int16", np.iinfo("int16").max + 1, "int32"),
113
+ ("int16", np.iinfo("int32").max + 1, "int64"),
114
+ ("int16", np.iinfo("int64").max + 1, "object"),
115
+ ("int16", -1, "int16"),
116
+ ("int16", np.iinfo("int8").min - 1, "int16"),
117
+ ("int16", np.iinfo("int16").min - 1, "int32"),
118
+ ("int16", np.iinfo("int32").min - 1, "int64"),
119
+ ("int16", np.iinfo("int64").min - 1, "object"),
120
+ ("uint16", 1, "uint16"),
121
+ ("uint16", np.iinfo("int8").max + 1, "uint16"),
122
+ ("uint16", np.iinfo("uint8").max + 1, "uint16"),
123
+ ("uint16", np.iinfo("int16").max + 1, "uint16"),
124
+ ("uint16", np.iinfo("uint16").max + 1, "uint32"),
125
+ ("uint16", np.iinfo("int32").max + 1, "uint32"),
126
+ ("uint16", np.iinfo("uint32").max + 1, "uint64"),
127
+ ("uint16", np.iinfo("int64").max + 1, "uint64"),
128
+ ("uint16", np.iinfo("uint64").max + 1, "object"),
129
+ ("uint16", -1, "int32"),
130
+ ("uint16", np.iinfo("int8").min - 1, "int32"),
131
+ ("uint16", np.iinfo("int16").min - 1, "int32"),
132
+ ("uint16", np.iinfo("int32").min - 1, "int64"),
133
+ ("uint16", np.iinfo("int64").min - 1, "object"),
134
+ # size 32
135
+ ("int32", 1, "int32"),
136
+ ("int32", np.iinfo("int8").max + 1, "int32"),
137
+ ("int32", np.iinfo("int16").max + 1, "int32"),
138
+ ("int32", np.iinfo("int32").max + 1, "int64"),
139
+ ("int32", np.iinfo("int64").max + 1, "object"),
140
+ ("int32", -1, "int32"),
141
+ ("int32", np.iinfo("int8").min - 1, "int32"),
142
+ ("int32", np.iinfo("int16").min - 1, "int32"),
143
+ ("int32", np.iinfo("int32").min - 1, "int64"),
144
+ ("int32", np.iinfo("int64").min - 1, "object"),
145
+ ("uint32", 1, "uint32"),
146
+ ("uint32", np.iinfo("int8").max + 1, "uint32"),
147
+ ("uint32", np.iinfo("uint8").max + 1, "uint32"),
148
+ ("uint32", np.iinfo("int16").max + 1, "uint32"),
149
+ ("uint32", np.iinfo("uint16").max + 1, "uint32"),
150
+ ("uint32", np.iinfo("int32").max + 1, "uint32"),
151
+ ("uint32", np.iinfo("uint32").max + 1, "uint64"),
152
+ ("uint32", np.iinfo("int64").max + 1, "uint64"),
153
+ ("uint32", np.iinfo("uint64").max + 1, "object"),
154
+ ("uint32", -1, "int64"),
155
+ ("uint32", np.iinfo("int8").min - 1, "int64"),
156
+ ("uint32", np.iinfo("int16").min - 1, "int64"),
157
+ ("uint32", np.iinfo("int32").min - 1, "int64"),
158
+ ("uint32", np.iinfo("int64").min - 1, "object"),
159
+ # size 64
160
+ ("int64", 1, "int64"),
161
+ ("int64", np.iinfo("int8").max + 1, "int64"),
162
+ ("int64", np.iinfo("int16").max + 1, "int64"),
163
+ ("int64", np.iinfo("int32").max + 1, "int64"),
164
+ ("int64", np.iinfo("int64").max + 1, "object"),
165
+ ("int64", -1, "int64"),
166
+ ("int64", np.iinfo("int8").min - 1, "int64"),
167
+ ("int64", np.iinfo("int16").min - 1, "int64"),
168
+ ("int64", np.iinfo("int32").min - 1, "int64"),
169
+ ("int64", np.iinfo("int64").min - 1, "object"),
170
+ ("uint64", 1, "uint64"),
171
+ ("uint64", np.iinfo("int8").max + 1, "uint64"),
172
+ ("uint64", np.iinfo("uint8").max + 1, "uint64"),
173
+ ("uint64", np.iinfo("int16").max + 1, "uint64"),
174
+ ("uint64", np.iinfo("uint16").max + 1, "uint64"),
175
+ ("uint64", np.iinfo("int32").max + 1, "uint64"),
176
+ ("uint64", np.iinfo("uint32").max + 1, "uint64"),
177
+ ("uint64", np.iinfo("int64").max + 1, "uint64"),
178
+ ("uint64", np.iinfo("uint64").max + 1, "object"),
179
+ ("uint64", -1, "object"),
180
+ ("uint64", np.iinfo("int8").min - 1, "object"),
181
+ ("uint64", np.iinfo("int16").min - 1, "object"),
182
+ ("uint64", np.iinfo("int32").min - 1, "object"),
183
+ ("uint64", np.iinfo("int64").min - 1, "object"),
184
+ ],
185
+ )
186
+ def test_maybe_promote_int_with_int(dtype, fill_value, expected_dtype):
187
+ dtype = np.dtype(dtype)
188
+ expected_dtype = np.dtype(expected_dtype)
189
+
190
+ # output is not a generic int, but corresponds to expected_dtype
191
+ exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0]
192
+
193
+ _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
194
+
195
+
196
+ def test_maybe_promote_int_with_float(any_int_numpy_dtype, float_numpy_dtype):
197
+ dtype = np.dtype(any_int_numpy_dtype)
198
+ fill_dtype = np.dtype(float_numpy_dtype)
199
+
200
+ # create array of given dtype; casts "1" to correct dtype
201
+ fill_value = np.array([1], dtype=fill_dtype)[0]
202
+
203
+ # filling int with float always upcasts to float64
204
+ expected_dtype = np.float64
205
+ # fill_value can be different float type
206
+ exp_val_for_scalar = np.float64(fill_value)
207
+
208
+ _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
209
+
210
+
211
+ def test_maybe_promote_float_with_int(float_numpy_dtype, any_int_numpy_dtype):
212
+ dtype = np.dtype(float_numpy_dtype)
213
+ fill_dtype = np.dtype(any_int_numpy_dtype)
214
+
215
+ # create array of given dtype; casts "1" to correct dtype
216
+ fill_value = np.array([1], dtype=fill_dtype)[0]
217
+
218
+ # filling float with int always keeps float dtype
219
+ # because: np.finfo('float32').max > np.iinfo('uint64').max
220
+ expected_dtype = dtype
221
+ # output is not a generic float, but corresponds to expected_dtype
222
+ exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0]
223
+
224
+ _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
225
+
226
+
227
+ @pytest.mark.parametrize(
228
+ "dtype, fill_value, expected_dtype",
229
+ [
230
+ # float filled with float
231
+ ("float32", 1, "float32"),
232
+ ("float32", float(np.finfo("float32").max) * 1.1, "float64"),
233
+ ("float64", 1, "float64"),
234
+ ("float64", float(np.finfo("float32").max) * 1.1, "float64"),
235
+ # complex filled with float
236
+ ("complex64", 1, "complex64"),
237
+ ("complex64", float(np.finfo("float32").max) * 1.1, "complex128"),
238
+ ("complex128", 1, "complex128"),
239
+ ("complex128", float(np.finfo("float32").max) * 1.1, "complex128"),
240
+ # float filled with complex
241
+ ("float32", 1 + 1j, "complex64"),
242
+ ("float32", float(np.finfo("float32").max) * (1.1 + 1j), "complex128"),
243
+ ("float64", 1 + 1j, "complex128"),
244
+ ("float64", float(np.finfo("float32").max) * (1.1 + 1j), "complex128"),
245
+ # complex filled with complex
246
+ ("complex64", 1 + 1j, "complex64"),
247
+ ("complex64", float(np.finfo("float32").max) * (1.1 + 1j), "complex128"),
248
+ ("complex128", 1 + 1j, "complex128"),
249
+ ("complex128", float(np.finfo("float32").max) * (1.1 + 1j), "complex128"),
250
+ ],
251
+ )
252
+ def test_maybe_promote_float_with_float(dtype, fill_value, expected_dtype):
253
+ dtype = np.dtype(dtype)
254
+ expected_dtype = np.dtype(expected_dtype)
255
+
256
+ # output is not a generic float, but corresponds to expected_dtype
257
+ exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0]
258
+
259
+ _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
260
+
261
+
262
+ def test_maybe_promote_bool_with_any(any_numpy_dtype):
263
+ dtype = np.dtype(bool)
264
+ fill_dtype = np.dtype(any_numpy_dtype)
265
+
266
+ # create array of given dtype; casts "1" to correct dtype
267
+ fill_value = np.array([1], dtype=fill_dtype)[0]
268
+
269
+ # filling bool with anything but bool casts to object
270
+ expected_dtype = np.dtype(object) if fill_dtype != bool else fill_dtype
271
+ exp_val_for_scalar = fill_value
272
+
273
+ _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
274
+
275
+
276
+ def test_maybe_promote_any_with_bool(any_numpy_dtype):
277
+ dtype = np.dtype(any_numpy_dtype)
278
+ fill_value = True
279
+
280
+ # filling anything but bool with bool casts to object
281
+ expected_dtype = np.dtype(object) if dtype != bool else dtype
282
+ # output is not a generic bool, but corresponds to expected_dtype
283
+ exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0]
284
+
285
+ _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
286
+
287
+
288
+ def test_maybe_promote_bytes_with_any(bytes_dtype, any_numpy_dtype):
289
+ dtype = np.dtype(bytes_dtype)
290
+ fill_dtype = np.dtype(any_numpy_dtype)
291
+
292
+ # create array of given dtype; casts "1" to correct dtype
293
+ fill_value = np.array([1], dtype=fill_dtype)[0]
294
+
295
+ # we never use bytes dtype internally, always promote to object
296
+ expected_dtype = np.dtype(np.object_)
297
+ exp_val_for_scalar = fill_value
298
+
299
+ _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
300
+
301
+
302
+ def test_maybe_promote_any_with_bytes(any_numpy_dtype):
303
+ dtype = np.dtype(any_numpy_dtype)
304
+
305
+ # create array of given dtype
306
+ fill_value = b"abc"
307
+
308
+ # we never use bytes dtype internally, always promote to object
309
+ expected_dtype = np.dtype(np.object_)
310
+ # output is not a generic bytes, but corresponds to expected_dtype
311
+ exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0]
312
+
313
+ _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
314
+
315
+
316
+ def test_maybe_promote_datetime64_with_any(datetime64_dtype, any_numpy_dtype):
317
+ dtype = np.dtype(datetime64_dtype)
318
+ fill_dtype = np.dtype(any_numpy_dtype)
319
+
320
+ # create array of given dtype; casts "1" to correct dtype
321
+ fill_value = np.array([1], dtype=fill_dtype)[0]
322
+
323
+ # filling datetime with anything but datetime casts to object
324
+ if fill_dtype.kind == "M":
325
+ expected_dtype = dtype
326
+ # for datetime dtypes, scalar values get cast to to_datetime64
327
+ exp_val_for_scalar = pd.Timestamp(fill_value).to_datetime64()
328
+ else:
329
+ expected_dtype = np.dtype(object)
330
+ exp_val_for_scalar = fill_value
331
+
332
+ _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
333
+
334
+
335
+ @pytest.mark.parametrize(
336
+ "fill_value",
337
+ [
338
+ pd.Timestamp("now"),
339
+ np.datetime64("now"),
340
+ datetime.datetime.now(),
341
+ datetime.date.today(),
342
+ ],
343
+ ids=["pd.Timestamp", "np.datetime64", "datetime.datetime", "datetime.date"],
344
+ )
345
+ def test_maybe_promote_any_with_datetime64(any_numpy_dtype, fill_value):
346
+ dtype = np.dtype(any_numpy_dtype)
347
+
348
+ # filling datetime with anything but datetime casts to object
349
+ if dtype.kind == "M":
350
+ expected_dtype = dtype
351
+ # for datetime dtypes, scalar values get cast to pd.Timestamp.value
352
+ exp_val_for_scalar = pd.Timestamp(fill_value).to_datetime64()
353
+ else:
354
+ expected_dtype = np.dtype(object)
355
+ exp_val_for_scalar = fill_value
356
+
357
+ if type(fill_value) is datetime.date and dtype.kind == "M":
358
+ # Casting date to dt64 is deprecated, in 2.0 enforced to cast to object
359
+ expected_dtype = np.dtype(object)
360
+ exp_val_for_scalar = fill_value
361
+
362
+ _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
363
+
364
+
365
+ @pytest.mark.parametrize(
366
+ "fill_value",
367
+ [
368
+ pd.Timestamp(2023, 1, 1),
369
+ np.datetime64("2023-01-01"),
370
+ datetime.datetime(2023, 1, 1),
371
+ datetime.date(2023, 1, 1),
372
+ ],
373
+ ids=["pd.Timestamp", "np.datetime64", "datetime.datetime", "datetime.date"],
374
+ )
375
+ def test_maybe_promote_any_numpy_dtype_with_datetimetz(
376
+ any_numpy_dtype, tz_aware_fixture, fill_value
377
+ ):
378
+ dtype = np.dtype(any_numpy_dtype)
379
+ fill_dtype = DatetimeTZDtype(tz=tz_aware_fixture)
380
+
381
+ fill_value = pd.Series([fill_value], dtype=fill_dtype)[0]
382
+
383
+ # filling any numpy dtype with datetimetz casts to object
384
+ expected_dtype = np.dtype(object)
385
+ exp_val_for_scalar = fill_value
386
+
387
+ _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
388
+
389
+
390
+ def test_maybe_promote_timedelta64_with_any(timedelta64_dtype, any_numpy_dtype):
391
+ dtype = np.dtype(timedelta64_dtype)
392
+ fill_dtype = np.dtype(any_numpy_dtype)
393
+
394
+ # create array of given dtype; casts "1" to correct dtype
395
+ fill_value = np.array([1], dtype=fill_dtype)[0]
396
+
397
+ # filling timedelta with anything but timedelta casts to object
398
+ if fill_dtype.kind == "m":
399
+ expected_dtype = dtype
400
+ # for timedelta dtypes, scalar values get cast to pd.Timedelta.value
401
+ exp_val_for_scalar = pd.Timedelta(fill_value).to_timedelta64()
402
+ else:
403
+ expected_dtype = np.dtype(object)
404
+ exp_val_for_scalar = fill_value
405
+
406
+ _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
407
+
408
+
409
+ @pytest.mark.parametrize(
410
+ "fill_value",
411
+ [pd.Timedelta(days=1), np.timedelta64(24, "h"), datetime.timedelta(1)],
412
+ ids=["pd.Timedelta", "np.timedelta64", "datetime.timedelta"],
413
+ )
414
+ def test_maybe_promote_any_with_timedelta64(any_numpy_dtype, fill_value):
415
+ dtype = np.dtype(any_numpy_dtype)
416
+
417
+ # filling anything but timedelta with timedelta casts to object
418
+ if dtype.kind == "m":
419
+ expected_dtype = dtype
420
+ # for timedelta dtypes, scalar values get cast to pd.Timedelta.value
421
+ exp_val_for_scalar = pd.Timedelta(fill_value).to_timedelta64()
422
+ else:
423
+ expected_dtype = np.dtype(object)
424
+ exp_val_for_scalar = fill_value
425
+
426
+ _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
427
+
428
+
429
+ def test_maybe_promote_string_with_any(string_dtype, any_numpy_dtype):
430
+ dtype = np.dtype(string_dtype)
431
+ fill_dtype = np.dtype(any_numpy_dtype)
432
+
433
+ # create array of given dtype; casts "1" to correct dtype
434
+ fill_value = np.array([1], dtype=fill_dtype)[0]
435
+
436
+ # filling string with anything casts to object
437
+ expected_dtype = np.dtype(object)
438
+ exp_val_for_scalar = fill_value
439
+
440
+ _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
441
+
442
+
443
+ def test_maybe_promote_any_with_string(any_numpy_dtype):
444
+ dtype = np.dtype(any_numpy_dtype)
445
+
446
+ # create array of given dtype
447
+ fill_value = "abc"
448
+
449
+ # filling anything with a string casts to object
450
+ expected_dtype = np.dtype(object)
451
+ exp_val_for_scalar = fill_value
452
+
453
+ _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
454
+
455
+
456
+ def test_maybe_promote_object_with_any(object_dtype, any_numpy_dtype):
457
+ dtype = np.dtype(object_dtype)
458
+ fill_dtype = np.dtype(any_numpy_dtype)
459
+
460
+ # create array of given dtype; casts "1" to correct dtype
461
+ fill_value = np.array([1], dtype=fill_dtype)[0]
462
+
463
+ # filling object with anything stays object
464
+ expected_dtype = np.dtype(object)
465
+ exp_val_for_scalar = fill_value
466
+
467
+ _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
468
+
469
+
470
+ def test_maybe_promote_any_with_object(any_numpy_dtype):
471
+ dtype = np.dtype(any_numpy_dtype)
472
+
473
+ # create array of object dtype from a scalar value (i.e. passing
474
+ # dtypes.common.is_scalar), which can however not be cast to int/float etc.
475
+ fill_value = pd.DateOffset(1)
476
+
477
+ # filling object with anything stays object
478
+ expected_dtype = np.dtype(object)
479
+ exp_val_for_scalar = fill_value
480
+
481
+ _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
482
+
483
+
484
+ def test_maybe_promote_any_numpy_dtype_with_na(any_numpy_dtype, nulls_fixture):
485
+ fill_value = nulls_fixture
486
+ dtype = np.dtype(any_numpy_dtype)
487
+
488
+ if isinstance(fill_value, Decimal):
489
+ # Subject to change, but ATM (When Decimal(NAN) is being added to nulls_fixture)
490
+ # this is the existing behavior in maybe_promote,
491
+ # hinges on is_valid_na_for_dtype
492
+ if dtype.kind in "iufc":
493
+ if dtype.kind in "iu":
494
+ expected_dtype = np.dtype(np.float64)
495
+ else:
496
+ expected_dtype = dtype
497
+ exp_val_for_scalar = np.nan
498
+ else:
499
+ expected_dtype = np.dtype(object)
500
+ exp_val_for_scalar = fill_value
501
+ elif dtype.kind in "iu" and fill_value is not NaT:
502
+ # integer + other missing value (np.nan / None) casts to float
503
+ expected_dtype = np.float64
504
+ exp_val_for_scalar = np.nan
505
+ elif dtype == object and fill_value is NaT:
506
+ # inserting into object does not cast the value
507
+ # but *does* cast None to np.nan
508
+ expected_dtype = np.dtype(object)
509
+ exp_val_for_scalar = fill_value
510
+ elif dtype.kind in "mM":
511
+ # datetime / timedelta cast all missing values to dtyped-NaT
512
+ expected_dtype = dtype
513
+ exp_val_for_scalar = dtype.type("NaT", "ns")
514
+ elif fill_value is NaT:
515
+ # NaT upcasts everything that's not datetime/timedelta to object
516
+ expected_dtype = np.dtype(object)
517
+ exp_val_for_scalar = NaT
518
+ elif dtype.kind in "fc":
519
+ # float / complex + missing value (!= NaT) stays the same
520
+ expected_dtype = dtype
521
+ exp_val_for_scalar = np.nan
522
+ else:
523
+ # all other cases cast to object, and use np.nan as missing value
524
+ expected_dtype = np.dtype(object)
525
+ if fill_value is pd.NA:
526
+ exp_val_for_scalar = pd.NA
527
+ else:
528
+ exp_val_for_scalar = np.nan
529
+
530
+ _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar)
py311/lib/python3.11/site-packages/pandas/tests/extension/base/casting.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ import pandas.util._test_decorators as td
5
+
6
+ import pandas as pd
7
+ import pandas._testing as tm
8
+ from pandas.core.internals.blocks import NumpyBlock
9
+
10
+
11
+ class BaseCastingTests:
12
+ """Casting to and from ExtensionDtypes"""
13
+
14
+ def test_astype_object_series(self, all_data):
15
+ ser = pd.Series(all_data, name="A")
16
+ result = ser.astype(object)
17
+ assert result.dtype == np.dtype(object)
18
+ if hasattr(result._mgr, "blocks"):
19
+ blk = result._mgr.blocks[0]
20
+ assert isinstance(blk, NumpyBlock)
21
+ assert blk.is_object
22
+ assert isinstance(result._mgr.array, np.ndarray)
23
+ assert result._mgr.array.dtype == np.dtype(object)
24
+
25
+ def test_astype_object_frame(self, all_data):
26
+ df = pd.DataFrame({"A": all_data})
27
+
28
+ result = df.astype(object)
29
+ if hasattr(result._mgr, "blocks"):
30
+ blk = result._mgr.blocks[0]
31
+ assert isinstance(blk, NumpyBlock), type(blk)
32
+ assert blk.is_object
33
+ assert isinstance(result._mgr.arrays[0], np.ndarray)
34
+ assert result._mgr.arrays[0].dtype == np.dtype(object)
35
+
36
+ # check that we can compare the dtypes
37
+ comp = result.dtypes == df.dtypes
38
+ assert not comp.any()
39
+
40
+ def test_tolist(self, data):
41
+ result = pd.Series(data).tolist()
42
+ expected = list(data)
43
+ assert result == expected
44
+
45
+ def test_astype_str(self, data):
46
+ result = pd.Series(data[:2]).astype(str)
47
+ expected = pd.Series([str(x) for x in data[:2]], dtype=str)
48
+ tm.assert_series_equal(result, expected)
49
+
50
+ @pytest.mark.parametrize(
51
+ "nullable_string_dtype",
52
+ [
53
+ "string[python]",
54
+ pytest.param("string[pyarrow]", marks=td.skip_if_no("pyarrow")),
55
+ ],
56
+ )
57
+ def test_astype_string(self, data, nullable_string_dtype):
58
+ # GH-33465, GH#45326 as of 2.0 we decode bytes instead of calling str(obj)
59
+ result = pd.Series(data[:5]).astype(nullable_string_dtype)
60
+ expected = pd.Series(
61
+ [str(x) if not isinstance(x, bytes) else x.decode() for x in data[:5]],
62
+ dtype=nullable_string_dtype,
63
+ )
64
+ tm.assert_series_equal(result, expected)
65
+
66
+ def test_to_numpy(self, data):
67
+ expected = np.asarray(data)
68
+
69
+ result = data.to_numpy()
70
+ tm.assert_equal(result, expected)
71
+
72
+ result = pd.Series(data).to_numpy()
73
+ tm.assert_equal(result, expected)
74
+
75
+ def test_astype_empty_dataframe(self, dtype):
76
+ # https://github.com/pandas-dev/pandas/issues/33113
77
+ df = pd.DataFrame()
78
+ result = df.astype(dtype)
79
+ tm.assert_frame_equal(result, df)
80
+
81
+ @pytest.mark.parametrize("copy", [True, False])
82
+ def test_astype_own_type(self, data, copy):
83
+ # ensure that astype returns the original object for equal dtype and copy=False
84
+ # https://github.com/pandas-dev/pandas/issues/28488
85
+ result = data.astype(data.dtype, copy=copy)
86
+ assert (result is data) is (not copy)
87
+ tm.assert_extension_array_equal(result, data)
py311/lib/python3.11/site-packages/pandas/tests/extension/base/constructors.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ import pandas as pd
5
+ import pandas._testing as tm
6
+ from pandas.api.extensions import ExtensionArray
7
+ from pandas.core.internals.blocks import EABackedBlock
8
+
9
+
10
+ class BaseConstructorsTests:
11
+ def test_from_sequence_from_cls(self, data):
12
+ result = type(data)._from_sequence(data, dtype=data.dtype)
13
+ tm.assert_extension_array_equal(result, data)
14
+
15
+ data = data[:0]
16
+ result = type(data)._from_sequence(data, dtype=data.dtype)
17
+ tm.assert_extension_array_equal(result, data)
18
+
19
+ def test_array_from_scalars(self, data):
20
+ scalars = [data[0], data[1], data[2]]
21
+ result = data._from_sequence(scalars, dtype=data.dtype)
22
+ assert isinstance(result, type(data))
23
+
24
+ def test_series_constructor(self, data):
25
+ result = pd.Series(data, copy=False)
26
+ assert result.dtype == data.dtype
27
+ assert len(result) == len(data)
28
+ if hasattr(result._mgr, "blocks"):
29
+ assert isinstance(result._mgr.blocks[0], EABackedBlock)
30
+ assert result._mgr.array is data
31
+
32
+ # Series[EA] is unboxed / boxed correctly
33
+ result2 = pd.Series(result)
34
+ assert result2.dtype == data.dtype
35
+ if hasattr(result._mgr, "blocks"):
36
+ assert isinstance(result2._mgr.blocks[0], EABackedBlock)
37
+
38
+ def test_series_constructor_no_data_with_index(self, dtype, na_value):
39
+ result = pd.Series(index=[1, 2, 3], dtype=dtype)
40
+ expected = pd.Series([na_value] * 3, index=[1, 2, 3], dtype=dtype)
41
+ tm.assert_series_equal(result, expected)
42
+
43
+ # GH 33559 - empty index
44
+ result = pd.Series(index=[], dtype=dtype)
45
+ expected = pd.Series([], index=pd.Index([], dtype="object"), dtype=dtype)
46
+ tm.assert_series_equal(result, expected)
47
+
48
+ def test_series_constructor_scalar_na_with_index(self, dtype, na_value):
49
+ result = pd.Series(na_value, index=[1, 2, 3], dtype=dtype)
50
+ expected = pd.Series([na_value] * 3, index=[1, 2, 3], dtype=dtype)
51
+ tm.assert_series_equal(result, expected)
52
+
53
+ def test_series_constructor_scalar_with_index(self, data, dtype):
54
+ scalar = data[0]
55
+ result = pd.Series(scalar, index=[1, 2, 3], dtype=dtype)
56
+ expected = pd.Series([scalar] * 3, index=[1, 2, 3], dtype=dtype)
57
+ tm.assert_series_equal(result, expected)
58
+
59
+ result = pd.Series(scalar, index=["foo"], dtype=dtype)
60
+ expected = pd.Series([scalar], index=["foo"], dtype=dtype)
61
+ tm.assert_series_equal(result, expected)
62
+
63
+ @pytest.mark.parametrize("from_series", [True, False])
64
+ def test_dataframe_constructor_from_dict(self, data, from_series):
65
+ if from_series:
66
+ data = pd.Series(data)
67
+ result = pd.DataFrame({"A": data})
68
+ assert result.dtypes["A"] == data.dtype
69
+ assert result.shape == (len(data), 1)
70
+ if hasattr(result._mgr, "blocks"):
71
+ assert isinstance(result._mgr.blocks[0], EABackedBlock)
72
+ assert isinstance(result._mgr.arrays[0], ExtensionArray)
73
+
74
+ def test_dataframe_from_series(self, data):
75
+ result = pd.DataFrame(pd.Series(data))
76
+ assert result.dtypes[0] == data.dtype
77
+ assert result.shape == (len(data), 1)
78
+ if hasattr(result._mgr, "blocks"):
79
+ assert isinstance(result._mgr.blocks[0], EABackedBlock)
80
+ assert isinstance(result._mgr.arrays[0], ExtensionArray)
81
+
82
+ def test_series_given_mismatched_index_raises(self, data):
83
+ msg = r"Length of values \(3\) does not match length of index \(5\)"
84
+ with pytest.raises(ValueError, match=msg):
85
+ pd.Series(data[:3], index=[0, 1, 2, 3, 4])
86
+
87
+ def test_from_dtype(self, data):
88
+ # construct from our dtype & string dtype
89
+ dtype = data.dtype
90
+
91
+ expected = pd.Series(data)
92
+ result = pd.Series(list(data), dtype=dtype)
93
+ tm.assert_series_equal(result, expected)
94
+
95
+ result = pd.Series(list(data), dtype=str(dtype))
96
+ tm.assert_series_equal(result, expected)
97
+
98
+ # gh-30280
99
+
100
+ expected = pd.DataFrame(data).astype(dtype)
101
+ result = pd.DataFrame(list(data), dtype=dtype)
102
+ tm.assert_frame_equal(result, expected)
103
+
104
+ result = pd.DataFrame(list(data), dtype=str(dtype))
105
+ tm.assert_frame_equal(result, expected)
106
+
107
+ def test_pandas_array(self, data):
108
+ # pd.array(extension_array) should be idempotent...
109
+ result = pd.array(data)
110
+ tm.assert_extension_array_equal(result, data)
111
+
112
+ def test_pandas_array_dtype(self, data):
113
+ # ... but specifying dtype will override idempotency
114
+ result = pd.array(data, dtype=np.dtype(object))
115
+ expected = pd.arrays.NumpyExtensionArray(np.asarray(data, dtype=object))
116
+ tm.assert_equal(result, expected)
117
+
118
+ def test_construct_empty_dataframe(self, dtype):
119
+ # GH 33623
120
+ result = pd.DataFrame(columns=["a"], dtype=dtype)
121
+ expected = pd.DataFrame(
122
+ {"a": pd.array([], dtype=dtype)}, index=pd.RangeIndex(0)
123
+ )
124
+ tm.assert_frame_equal(result, expected)
125
+
126
+ def test_empty(self, dtype):
127
+ cls = dtype.construct_array_type()
128
+ result = cls._empty((4,), dtype=dtype)
129
+ assert isinstance(result, cls)
130
+ assert result.dtype == dtype
131
+ assert result.shape == (4,)
132
+
133
+ # GH#19600 method on ExtensionDtype
134
+ result2 = dtype.empty((4,))
135
+ assert isinstance(result2, cls)
136
+ assert result2.dtype == dtype
137
+ assert result2.shape == (4,)
138
+
139
+ result2 = dtype.empty(4)
140
+ assert isinstance(result2, cls)
141
+ assert result2.dtype == dtype
142
+ assert result2.shape == (4,)
py311/lib/python3.11/site-packages/pandas/tests/extension/base/dim2.py ADDED
@@ -0,0 +1,345 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Tests for 2D compatibility.
3
+ """
4
+ import numpy as np
5
+ import pytest
6
+
7
+ from pandas._libs.missing import is_matching_na
8
+
9
+ from pandas.core.dtypes.common import (
10
+ is_bool_dtype,
11
+ is_integer_dtype,
12
+ )
13
+
14
+ import pandas as pd
15
+ import pandas._testing as tm
16
+ from pandas.core.arrays.integer import NUMPY_INT_TO_DTYPE
17
+
18
+
19
+ class Dim2CompatTests:
20
+ # Note: these are ONLY for ExtensionArray subclasses that support 2D arrays.
21
+ # i.e. not for pyarrow-backed EAs.
22
+
23
+ @pytest.fixture(autouse=True)
24
+ def skip_if_doesnt_support_2d(self, dtype, request):
25
+ if not dtype._supports_2d:
26
+ node = request.node
27
+ # In cases where we are mixed in to ExtensionTests, we only want to
28
+ # skip tests that are defined in Dim2CompatTests
29
+ test_func = node._obj
30
+ if test_func.__qualname__.startswith("Dim2CompatTests"):
31
+ # TODO: is there a less hacky way of checking this?
32
+ pytest.skip(f"{dtype} does not support 2D.")
33
+
34
+ def test_transpose(self, data):
35
+ arr2d = data.repeat(2).reshape(-1, 2)
36
+ shape = arr2d.shape
37
+ assert shape[0] != shape[-1] # otherwise the rest of the test is useless
38
+
39
+ assert arr2d.T.shape == shape[::-1]
40
+
41
+ def test_frame_from_2d_array(self, data):
42
+ arr2d = data.repeat(2).reshape(-1, 2)
43
+
44
+ df = pd.DataFrame(arr2d)
45
+ expected = pd.DataFrame({0: arr2d[:, 0], 1: arr2d[:, 1]})
46
+ tm.assert_frame_equal(df, expected)
47
+
48
+ def test_swapaxes(self, data):
49
+ arr2d = data.repeat(2).reshape(-1, 2)
50
+
51
+ result = arr2d.swapaxes(0, 1)
52
+ expected = arr2d.T
53
+ tm.assert_extension_array_equal(result, expected)
54
+
55
+ def test_delete_2d(self, data):
56
+ arr2d = data.repeat(3).reshape(-1, 3)
57
+
58
+ # axis = 0
59
+ result = arr2d.delete(1, axis=0)
60
+ expected = data.delete(1).repeat(3).reshape(-1, 3)
61
+ tm.assert_extension_array_equal(result, expected)
62
+
63
+ # axis = 1
64
+ result = arr2d.delete(1, axis=1)
65
+ expected = data.repeat(2).reshape(-1, 2)
66
+ tm.assert_extension_array_equal(result, expected)
67
+
68
+ def test_take_2d(self, data):
69
+ arr2d = data.reshape(-1, 1)
70
+
71
+ result = arr2d.take([0, 0, -1], axis=0)
72
+
73
+ expected = data.take([0, 0, -1]).reshape(-1, 1)
74
+ tm.assert_extension_array_equal(result, expected)
75
+
76
+ def test_repr_2d(self, data):
77
+ # this could fail in a corner case where an element contained the name
78
+ res = repr(data.reshape(1, -1))
79
+ assert res.count(f"<{type(data).__name__}") == 1
80
+
81
+ res = repr(data.reshape(-1, 1))
82
+ assert res.count(f"<{type(data).__name__}") == 1
83
+
84
+ def test_reshape(self, data):
85
+ arr2d = data.reshape(-1, 1)
86
+ assert arr2d.shape == (data.size, 1)
87
+ assert len(arr2d) == len(data)
88
+
89
+ arr2d = data.reshape((-1, 1))
90
+ assert arr2d.shape == (data.size, 1)
91
+ assert len(arr2d) == len(data)
92
+
93
+ with pytest.raises(ValueError):
94
+ data.reshape((data.size, 2))
95
+ with pytest.raises(ValueError):
96
+ data.reshape(data.size, 2)
97
+
98
+ def test_getitem_2d(self, data):
99
+ arr2d = data.reshape(1, -1)
100
+
101
+ result = arr2d[0]
102
+ tm.assert_extension_array_equal(result, data)
103
+
104
+ with pytest.raises(IndexError):
105
+ arr2d[1]
106
+
107
+ with pytest.raises(IndexError):
108
+ arr2d[-2]
109
+
110
+ result = arr2d[:]
111
+ tm.assert_extension_array_equal(result, arr2d)
112
+
113
+ result = arr2d[:, :]
114
+ tm.assert_extension_array_equal(result, arr2d)
115
+
116
+ result = arr2d[:, 0]
117
+ expected = data[[0]]
118
+ tm.assert_extension_array_equal(result, expected)
119
+
120
+ # dimension-expanding getitem on 1D
121
+ result = data[:, np.newaxis]
122
+ tm.assert_extension_array_equal(result, arr2d.T)
123
+
124
+ def test_iter_2d(self, data):
125
+ arr2d = data.reshape(1, -1)
126
+
127
+ objs = list(iter(arr2d))
128
+ assert len(objs) == arr2d.shape[0]
129
+
130
+ for obj in objs:
131
+ assert isinstance(obj, type(data))
132
+ assert obj.dtype == data.dtype
133
+ assert obj.ndim == 1
134
+ assert len(obj) == arr2d.shape[1]
135
+
136
+ def test_tolist_2d(self, data):
137
+ arr2d = data.reshape(1, -1)
138
+
139
+ result = arr2d.tolist()
140
+ expected = [data.tolist()]
141
+
142
+ assert isinstance(result, list)
143
+ assert all(isinstance(x, list) for x in result)
144
+
145
+ assert result == expected
146
+
147
+ def test_concat_2d(self, data):
148
+ left = type(data)._concat_same_type([data, data]).reshape(-1, 2)
149
+ right = left.copy()
150
+
151
+ # axis=0
152
+ result = left._concat_same_type([left, right], axis=0)
153
+ expected = data._concat_same_type([data] * 4).reshape(-1, 2)
154
+ tm.assert_extension_array_equal(result, expected)
155
+
156
+ # axis=1
157
+ result = left._concat_same_type([left, right], axis=1)
158
+ assert result.shape == (len(data), 4)
159
+ tm.assert_extension_array_equal(result[:, :2], left)
160
+ tm.assert_extension_array_equal(result[:, 2:], right)
161
+
162
+ # axis > 1 -> invalid
163
+ msg = "axis 2 is out of bounds for array of dimension 2"
164
+ with pytest.raises(ValueError, match=msg):
165
+ left._concat_same_type([left, right], axis=2)
166
+
167
+ @pytest.mark.parametrize("method", ["backfill", "pad"])
168
+ def test_fillna_2d_method(self, data_missing, method):
169
+ # pad_or_backfill is always along axis=0
170
+ arr = data_missing.repeat(2).reshape(2, 2)
171
+ assert arr[0].isna().all()
172
+ assert not arr[1].isna().any()
173
+
174
+ result = arr._pad_or_backfill(method=method, limit=None)
175
+
176
+ expected = data_missing._pad_or_backfill(method=method).repeat(2).reshape(2, 2)
177
+ tm.assert_extension_array_equal(result, expected)
178
+
179
+ # Reverse so that backfill is not a no-op.
180
+ arr2 = arr[::-1]
181
+ assert not arr2[0].isna().any()
182
+ assert arr2[1].isna().all()
183
+
184
+ result2 = arr2._pad_or_backfill(method=method, limit=None)
185
+
186
+ expected2 = (
187
+ data_missing[::-1]._pad_or_backfill(method=method).repeat(2).reshape(2, 2)
188
+ )
189
+ tm.assert_extension_array_equal(result2, expected2)
190
+
191
+ @pytest.mark.parametrize("method", ["mean", "median", "var", "std", "sum", "prod"])
192
+ def test_reductions_2d_axis_none(self, data, method):
193
+ arr2d = data.reshape(1, -1)
194
+
195
+ err_expected = None
196
+ err_result = None
197
+ try:
198
+ expected = getattr(data, method)()
199
+ except Exception as err:
200
+ # if the 1D reduction is invalid, the 2D reduction should be as well
201
+ err_expected = err
202
+ try:
203
+ result = getattr(arr2d, method)(axis=None)
204
+ except Exception as err2:
205
+ err_result = err2
206
+
207
+ else:
208
+ result = getattr(arr2d, method)(axis=None)
209
+
210
+ if err_result is not None or err_expected is not None:
211
+ assert type(err_result) == type(err_expected)
212
+ return
213
+
214
+ assert is_matching_na(result, expected) or result == expected
215
+
216
+ @pytest.mark.parametrize("method", ["mean", "median", "var", "std", "sum", "prod"])
217
+ @pytest.mark.parametrize("min_count", [0, 1])
218
+ def test_reductions_2d_axis0(self, data, method, min_count):
219
+ if min_count == 1 and method not in ["sum", "prod"]:
220
+ pytest.skip(f"min_count not relevant for {method}")
221
+
222
+ arr2d = data.reshape(1, -1)
223
+
224
+ kwargs = {}
225
+ if method in ["std", "var"]:
226
+ # pass ddof=0 so we get all-zero std instead of all-NA std
227
+ kwargs["ddof"] = 0
228
+ elif method in ["prod", "sum"]:
229
+ kwargs["min_count"] = min_count
230
+
231
+ try:
232
+ result = getattr(arr2d, method)(axis=0, **kwargs)
233
+ except Exception as err:
234
+ try:
235
+ getattr(data, method)()
236
+ except Exception as err2:
237
+ assert type(err) == type(err2)
238
+ return
239
+ else:
240
+ raise AssertionError("Both reductions should raise or neither")
241
+
242
+ def get_reduction_result_dtype(dtype):
243
+ # windows and 32bit builds will in some cases have int32/uint32
244
+ # where other builds will have int64/uint64.
245
+ if dtype.itemsize == 8:
246
+ return dtype
247
+ elif dtype.kind in "ib":
248
+ return NUMPY_INT_TO_DTYPE[np.dtype(int)]
249
+ else:
250
+ # i.e. dtype.kind == "u"
251
+ return NUMPY_INT_TO_DTYPE[np.dtype("uint")]
252
+
253
+ if method in ["sum", "prod"]:
254
+ # std and var are not dtype-preserving
255
+ expected = data
256
+ if data.dtype.kind in "iub":
257
+ dtype = get_reduction_result_dtype(data.dtype)
258
+ expected = data.astype(dtype)
259
+ assert dtype == expected.dtype
260
+
261
+ if min_count == 0:
262
+ fill_value = 1 if method == "prod" else 0
263
+ expected = expected.fillna(fill_value)
264
+
265
+ tm.assert_extension_array_equal(result, expected)
266
+ elif method == "median":
267
+ # std and var are not dtype-preserving
268
+ expected = data
269
+ tm.assert_extension_array_equal(result, expected)
270
+ elif method in ["mean", "std", "var"]:
271
+ if is_integer_dtype(data) or is_bool_dtype(data):
272
+ data = data.astype("Float64")
273
+ if method == "mean":
274
+ tm.assert_extension_array_equal(result, data)
275
+ else:
276
+ tm.assert_extension_array_equal(result, data - data)
277
+
278
+ @pytest.mark.parametrize("method", ["mean", "median", "var", "std", "sum", "prod"])
279
+ def test_reductions_2d_axis1(self, data, method):
280
+ arr2d = data.reshape(1, -1)
281
+
282
+ try:
283
+ result = getattr(arr2d, method)(axis=1)
284
+ except Exception as err:
285
+ try:
286
+ getattr(data, method)()
287
+ except Exception as err2:
288
+ assert type(err) == type(err2)
289
+ return
290
+ else:
291
+ raise AssertionError("Both reductions should raise or neither")
292
+
293
+ # not necessarily type/dtype-preserving, so weaker assertions
294
+ assert result.shape == (1,)
295
+ expected_scalar = getattr(data, method)()
296
+ res = result[0]
297
+ assert is_matching_na(res, expected_scalar) or res == expected_scalar
298
+
299
+
300
+ class NDArrayBacked2DTests(Dim2CompatTests):
301
+ # More specific tests for NDArrayBackedExtensionArray subclasses
302
+
303
+ def test_copy_order(self, data):
304
+ # We should be matching numpy semantics for the "order" keyword in 'copy'
305
+ arr2d = data.repeat(2).reshape(-1, 2)
306
+ assert arr2d._ndarray.flags["C_CONTIGUOUS"]
307
+
308
+ res = arr2d.copy()
309
+ assert res._ndarray.flags["C_CONTIGUOUS"]
310
+
311
+ res = arr2d[::2, ::2].copy()
312
+ assert res._ndarray.flags["C_CONTIGUOUS"]
313
+
314
+ res = arr2d.copy("F")
315
+ assert not res._ndarray.flags["C_CONTIGUOUS"]
316
+ assert res._ndarray.flags["F_CONTIGUOUS"]
317
+
318
+ res = arr2d.copy("K")
319
+ assert res._ndarray.flags["C_CONTIGUOUS"]
320
+
321
+ res = arr2d.T.copy("K")
322
+ assert not res._ndarray.flags["C_CONTIGUOUS"]
323
+ assert res._ndarray.flags["F_CONTIGUOUS"]
324
+
325
+ # order not accepted by numpy
326
+ msg = r"order must be one of 'C', 'F', 'A', or 'K' \(got 'Q'\)"
327
+ with pytest.raises(ValueError, match=msg):
328
+ arr2d.copy("Q")
329
+
330
+ # neither contiguity
331
+ arr_nc = arr2d[::2]
332
+ assert not arr_nc._ndarray.flags["C_CONTIGUOUS"]
333
+ assert not arr_nc._ndarray.flags["F_CONTIGUOUS"]
334
+
335
+ assert arr_nc.copy()._ndarray.flags["C_CONTIGUOUS"]
336
+ assert not arr_nc.copy()._ndarray.flags["F_CONTIGUOUS"]
337
+
338
+ assert arr_nc.copy("C")._ndarray.flags["C_CONTIGUOUS"]
339
+ assert not arr_nc.copy("C")._ndarray.flags["F_CONTIGUOUS"]
340
+
341
+ assert not arr_nc.copy("F")._ndarray.flags["C_CONTIGUOUS"]
342
+ assert arr_nc.copy("F")._ndarray.flags["F_CONTIGUOUS"]
343
+
344
+ assert arr_nc.copy("K")._ndarray.flags["C_CONTIGUOUS"]
345
+ assert not arr_nc.copy("K")._ndarray.flags["F_CONTIGUOUS"]
py311/lib/python3.11/site-packages/pandas/tests/extension/base/setitem.py ADDED
@@ -0,0 +1,451 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ import pandas as pd
5
+ import pandas._testing as tm
6
+
7
+
8
+ class BaseSetitemTests:
9
+ @pytest.fixture(
10
+ params=[
11
+ lambda x: x.index,
12
+ lambda x: list(x.index),
13
+ lambda x: slice(None),
14
+ lambda x: slice(0, len(x)),
15
+ lambda x: range(len(x)),
16
+ lambda x: list(range(len(x))),
17
+ lambda x: np.ones(len(x), dtype=bool),
18
+ ],
19
+ ids=[
20
+ "index",
21
+ "list[index]",
22
+ "null_slice",
23
+ "full_slice",
24
+ "range",
25
+ "list(range)",
26
+ "mask",
27
+ ],
28
+ )
29
+ def full_indexer(self, request):
30
+ """
31
+ Fixture for an indexer to pass to obj.loc to get/set the full length of the
32
+ object.
33
+
34
+ In some cases, assumes that obj.index is the default RangeIndex.
35
+ """
36
+ return request.param
37
+
38
+ @pytest.fixture(autouse=True)
39
+ def skip_if_immutable(self, dtype, request):
40
+ if dtype._is_immutable:
41
+ node = request.node
42
+ if node.name.split("[")[0] == "test_is_immutable":
43
+ # This fixture is auto-used, but we want to not-skip
44
+ # test_is_immutable.
45
+ return
46
+
47
+ # When BaseSetitemTests is mixed into ExtensionTests, we only
48
+ # want this fixture to operate on the tests defined in this
49
+ # class/file.
50
+ defined_in = node.function.__qualname__.split(".")[0]
51
+ if defined_in == "BaseSetitemTests":
52
+ pytest.skip("__setitem__ test not applicable with immutable dtype")
53
+
54
+ def test_is_immutable(self, data):
55
+ if data.dtype._is_immutable:
56
+ with pytest.raises(TypeError):
57
+ data[0] = data[0]
58
+ else:
59
+ data[0] = data[1]
60
+ assert data[0] == data[1]
61
+
62
+ def test_setitem_scalar_series(self, data, box_in_series):
63
+ if box_in_series:
64
+ data = pd.Series(data)
65
+ data[0] = data[1]
66
+ assert data[0] == data[1]
67
+
68
+ def test_setitem_sequence(self, data, box_in_series):
69
+ if box_in_series:
70
+ data = pd.Series(data)
71
+ original = data.copy()
72
+
73
+ data[[0, 1]] = [data[1], data[0]]
74
+ assert data[0] == original[1]
75
+ assert data[1] == original[0]
76
+
77
+ def test_setitem_sequence_mismatched_length_raises(self, data, as_array):
78
+ ser = pd.Series(data)
79
+ original = ser.copy()
80
+ value = [data[0]]
81
+ if as_array:
82
+ value = data._from_sequence(value, dtype=data.dtype)
83
+
84
+ xpr = "cannot set using a {} indexer with a different length"
85
+ with pytest.raises(ValueError, match=xpr.format("list-like")):
86
+ ser[[0, 1]] = value
87
+ # Ensure no modifications made before the exception
88
+ tm.assert_series_equal(ser, original)
89
+
90
+ with pytest.raises(ValueError, match=xpr.format("slice")):
91
+ ser[slice(3)] = value
92
+ tm.assert_series_equal(ser, original)
93
+
94
+ def test_setitem_empty_indexer(self, data, box_in_series):
95
+ if box_in_series:
96
+ data = pd.Series(data)
97
+ original = data.copy()
98
+ data[np.array([], dtype=int)] = []
99
+ tm.assert_equal(data, original)
100
+
101
+ def test_setitem_sequence_broadcasts(self, data, box_in_series):
102
+ if box_in_series:
103
+ data = pd.Series(data)
104
+ data[[0, 1]] = data[2]
105
+ assert data[0] == data[2]
106
+ assert data[1] == data[2]
107
+
108
+ @pytest.mark.parametrize("setter", ["loc", "iloc"])
109
+ def test_setitem_scalar(self, data, setter):
110
+ arr = pd.Series(data)
111
+ setter = getattr(arr, setter)
112
+ setter[0] = data[1]
113
+ assert arr[0] == data[1]
114
+
115
+ def test_setitem_loc_scalar_mixed(self, data):
116
+ df = pd.DataFrame({"A": np.arange(len(data)), "B": data})
117
+ df.loc[0, "B"] = data[1]
118
+ assert df.loc[0, "B"] == data[1]
119
+
120
+ def test_setitem_loc_scalar_single(self, data):
121
+ df = pd.DataFrame({"B": data})
122
+ df.loc[10, "B"] = data[1]
123
+ assert df.loc[10, "B"] == data[1]
124
+
125
+ def test_setitem_loc_scalar_multiple_homogoneous(self, data):
126
+ df = pd.DataFrame({"A": data, "B": data})
127
+ df.loc[10, "B"] = data[1]
128
+ assert df.loc[10, "B"] == data[1]
129
+
130
+ def test_setitem_iloc_scalar_mixed(self, data):
131
+ df = pd.DataFrame({"A": np.arange(len(data)), "B": data})
132
+ df.iloc[0, 1] = data[1]
133
+ assert df.loc[0, "B"] == data[1]
134
+
135
+ def test_setitem_iloc_scalar_single(self, data):
136
+ df = pd.DataFrame({"B": data})
137
+ df.iloc[10, 0] = data[1]
138
+ assert df.loc[10, "B"] == data[1]
139
+
140
+ def test_setitem_iloc_scalar_multiple_homogoneous(self, data):
141
+ df = pd.DataFrame({"A": data, "B": data})
142
+ df.iloc[10, 1] = data[1]
143
+ assert df.loc[10, "B"] == data[1]
144
+
145
+ @pytest.mark.parametrize(
146
+ "mask",
147
+ [
148
+ np.array([True, True, True, False, False]),
149
+ pd.array([True, True, True, False, False], dtype="boolean"),
150
+ pd.array([True, True, True, pd.NA, pd.NA], dtype="boolean"),
151
+ ],
152
+ ids=["numpy-array", "boolean-array", "boolean-array-na"],
153
+ )
154
+ def test_setitem_mask(self, data, mask, box_in_series):
155
+ arr = data[:5].copy()
156
+ expected = arr.take([0, 0, 0, 3, 4])
157
+ if box_in_series:
158
+ arr = pd.Series(arr)
159
+ expected = pd.Series(expected)
160
+ arr[mask] = data[0]
161
+ tm.assert_equal(expected, arr)
162
+
163
+ def test_setitem_mask_raises(self, data, box_in_series):
164
+ # wrong length
165
+ mask = np.array([True, False])
166
+
167
+ if box_in_series:
168
+ data = pd.Series(data)
169
+
170
+ with pytest.raises(IndexError, match="wrong length"):
171
+ data[mask] = data[0]
172
+
173
+ mask = pd.array(mask, dtype="boolean")
174
+ with pytest.raises(IndexError, match="wrong length"):
175
+ data[mask] = data[0]
176
+
177
+ def test_setitem_mask_boolean_array_with_na(self, data, box_in_series):
178
+ mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean")
179
+ mask[:3] = True
180
+ mask[3:5] = pd.NA
181
+
182
+ if box_in_series:
183
+ data = pd.Series(data)
184
+
185
+ data[mask] = data[0]
186
+
187
+ assert (data[:3] == data[0]).all()
188
+
189
+ @pytest.mark.parametrize(
190
+ "idx",
191
+ [[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])],
192
+ ids=["list", "integer-array", "numpy-array"],
193
+ )
194
+ def test_setitem_integer_array(self, data, idx, box_in_series):
195
+ arr = data[:5].copy()
196
+ expected = data.take([0, 0, 0, 3, 4])
197
+
198
+ if box_in_series:
199
+ arr = pd.Series(arr)
200
+ expected = pd.Series(expected)
201
+
202
+ arr[idx] = arr[0]
203
+ tm.assert_equal(arr, expected)
204
+
205
+ @pytest.mark.parametrize(
206
+ "idx, box_in_series",
207
+ [
208
+ ([0, 1, 2, pd.NA], False),
209
+ pytest.param(
210
+ [0, 1, 2, pd.NA], True, marks=pytest.mark.xfail(reason="GH-31948")
211
+ ),
212
+ (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False),
213
+ (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False),
214
+ ],
215
+ ids=["list-False", "list-True", "integer-array-False", "integer-array-True"],
216
+ )
217
+ def test_setitem_integer_with_missing_raises(self, data, idx, box_in_series):
218
+ arr = data.copy()
219
+
220
+ # TODO(xfail) this raises KeyError about labels not found (it tries label-based)
221
+ # for list of labels with Series
222
+ if box_in_series:
223
+ arr = pd.Series(data, index=[chr(100 + i) for i in range(len(data))])
224
+
225
+ msg = "Cannot index with an integer indexer containing NA values"
226
+ with pytest.raises(ValueError, match=msg):
227
+ arr[idx] = arr[0]
228
+
229
+ @pytest.mark.parametrize("as_callable", [True, False])
230
+ @pytest.mark.parametrize("setter", ["loc", None])
231
+ def test_setitem_mask_aligned(self, data, as_callable, setter):
232
+ ser = pd.Series(data)
233
+ mask = np.zeros(len(data), dtype=bool)
234
+ mask[:2] = True
235
+
236
+ if as_callable:
237
+ mask2 = lambda x: mask
238
+ else:
239
+ mask2 = mask
240
+
241
+ if setter:
242
+ # loc
243
+ target = getattr(ser, setter)
244
+ else:
245
+ # Series.__setitem__
246
+ target = ser
247
+
248
+ target[mask2] = data[5:7]
249
+
250
+ ser[mask2] = data[5:7]
251
+ assert ser[0] == data[5]
252
+ assert ser[1] == data[6]
253
+
254
+ @pytest.mark.parametrize("setter", ["loc", None])
255
+ def test_setitem_mask_broadcast(self, data, setter):
256
+ ser = pd.Series(data)
257
+ mask = np.zeros(len(data), dtype=bool)
258
+ mask[:2] = True
259
+
260
+ if setter: # loc
261
+ target = getattr(ser, setter)
262
+ else: # __setitem__
263
+ target = ser
264
+
265
+ target[mask] = data[10]
266
+ assert ser[0] == data[10]
267
+ assert ser[1] == data[10]
268
+
269
+ def test_setitem_expand_columns(self, data):
270
+ df = pd.DataFrame({"A": data})
271
+ result = df.copy()
272
+ result["B"] = 1
273
+ expected = pd.DataFrame({"A": data, "B": [1] * len(data)})
274
+ tm.assert_frame_equal(result, expected)
275
+
276
+ result = df.copy()
277
+ result.loc[:, "B"] = 1
278
+ tm.assert_frame_equal(result, expected)
279
+
280
+ # overwrite with new type
281
+ result["B"] = data
282
+ expected = pd.DataFrame({"A": data, "B": data})
283
+ tm.assert_frame_equal(result, expected)
284
+
285
+ def test_setitem_expand_with_extension(self, data):
286
+ df = pd.DataFrame({"A": [1] * len(data)})
287
+ result = df.copy()
288
+ result["B"] = data
289
+ expected = pd.DataFrame({"A": [1] * len(data), "B": data})
290
+ tm.assert_frame_equal(result, expected)
291
+
292
+ result = df.copy()
293
+ result.loc[:, "B"] = data
294
+ tm.assert_frame_equal(result, expected)
295
+
296
+ def test_setitem_frame_invalid_length(self, data):
297
+ df = pd.DataFrame({"A": [1] * len(data)})
298
+ xpr = (
299
+ rf"Length of values \({len(data[:5])}\) "
300
+ rf"does not match length of index \({len(df)}\)"
301
+ )
302
+ with pytest.raises(ValueError, match=xpr):
303
+ df["B"] = data[:5]
304
+
305
+ def test_setitem_tuple_index(self, data):
306
+ ser = pd.Series(data[:2], index=[(0, 0), (0, 1)])
307
+ expected = pd.Series(data.take([1, 1]), index=ser.index)
308
+ ser[(0, 0)] = data[1]
309
+ tm.assert_series_equal(ser, expected)
310
+
311
+ def test_setitem_slice(self, data, box_in_series):
312
+ arr = data[:5].copy()
313
+ expected = data.take([0, 0, 0, 3, 4])
314
+ if box_in_series:
315
+ arr = pd.Series(arr)
316
+ expected = pd.Series(expected)
317
+
318
+ arr[:3] = data[0]
319
+ tm.assert_equal(arr, expected)
320
+
321
+ def test_setitem_loc_iloc_slice(self, data):
322
+ arr = data[:5].copy()
323
+ s = pd.Series(arr, index=["a", "b", "c", "d", "e"])
324
+ expected = pd.Series(data.take([0, 0, 0, 3, 4]), index=s.index)
325
+
326
+ result = s.copy()
327
+ result.iloc[:3] = data[0]
328
+ tm.assert_equal(result, expected)
329
+
330
+ result = s.copy()
331
+ result.loc[:"c"] = data[0]
332
+ tm.assert_equal(result, expected)
333
+
334
+ def test_setitem_slice_mismatch_length_raises(self, data):
335
+ arr = data[:5]
336
+ with pytest.raises(ValueError):
337
+ arr[:1] = arr[:2]
338
+
339
+ def test_setitem_slice_array(self, data):
340
+ arr = data[:5].copy()
341
+ arr[:5] = data[-5:]
342
+ tm.assert_extension_array_equal(arr, data[-5:])
343
+
344
+ def test_setitem_scalar_key_sequence_raise(self, data):
345
+ arr = data[:5].copy()
346
+ with pytest.raises(ValueError):
347
+ arr[0] = arr[[0, 1]]
348
+
349
+ def test_setitem_preserves_views(self, data):
350
+ # GH#28150 setitem shouldn't swap the underlying data
351
+ view1 = data.view()
352
+ view2 = data[:]
353
+
354
+ data[0] = data[1]
355
+ assert view1[0] == data[1]
356
+ assert view2[0] == data[1]
357
+
358
+ def test_setitem_with_expansion_dataframe_column(self, data, full_indexer):
359
+ # https://github.com/pandas-dev/pandas/issues/32395
360
+ df = expected = pd.DataFrame({0: pd.Series(data)})
361
+ result = pd.DataFrame(index=df.index)
362
+
363
+ key = full_indexer(df)
364
+ result.loc[key, 0] = df[0]
365
+
366
+ tm.assert_frame_equal(result, expected)
367
+
368
+ def test_setitem_with_expansion_row(self, data, na_value):
369
+ df = pd.DataFrame({"data": data[:1]})
370
+
371
+ df.loc[1, "data"] = data[1]
372
+ expected = pd.DataFrame({"data": data[:2]})
373
+ tm.assert_frame_equal(df, expected)
374
+
375
+ # https://github.com/pandas-dev/pandas/issues/47284
376
+ df.loc[2, "data"] = na_value
377
+ expected = pd.DataFrame(
378
+ {"data": pd.Series([data[0], data[1], na_value], dtype=data.dtype)}
379
+ )
380
+ tm.assert_frame_equal(df, expected)
381
+
382
+ def test_setitem_series(self, data, full_indexer):
383
+ # https://github.com/pandas-dev/pandas/issues/32395
384
+ ser = pd.Series(data, name="data")
385
+ result = pd.Series(index=ser.index, dtype=object, name="data")
386
+
387
+ # because result has object dtype, the attempt to do setting inplace
388
+ # is successful, and object dtype is retained
389
+ key = full_indexer(ser)
390
+ result.loc[key] = ser
391
+
392
+ expected = pd.Series(
393
+ data.astype(object), index=ser.index, name="data", dtype=object
394
+ )
395
+ tm.assert_series_equal(result, expected)
396
+
397
+ def test_setitem_frame_2d_values(self, data):
398
+ # GH#44514
399
+ df = pd.DataFrame({"A": data})
400
+
401
+ # Avoiding using_array_manager fixture
402
+ # https://github.com/pandas-dev/pandas/pull/44514#discussion_r754002410
403
+ using_array_manager = isinstance(df._mgr, pd.core.internals.ArrayManager)
404
+ using_copy_on_write = pd.options.mode.copy_on_write
405
+
406
+ blk_data = df._mgr.arrays[0]
407
+
408
+ orig = df.copy()
409
+
410
+ df.iloc[:] = df.copy()
411
+ tm.assert_frame_equal(df, orig)
412
+
413
+ df.iloc[:-1] = df.iloc[:-1].copy()
414
+ tm.assert_frame_equal(df, orig)
415
+
416
+ df.iloc[:] = df.values
417
+ tm.assert_frame_equal(df, orig)
418
+ if not using_array_manager and not using_copy_on_write:
419
+ # GH#33457 Check that this setting occurred in-place
420
+ # FIXME(ArrayManager): this should work there too
421
+ assert df._mgr.arrays[0] is blk_data
422
+
423
+ df.iloc[:-1] = df.values[:-1]
424
+ tm.assert_frame_equal(df, orig)
425
+
426
+ def test_delitem_series(self, data):
427
+ # GH#40763
428
+ ser = pd.Series(data, name="data")
429
+
430
+ taker = np.arange(len(ser))
431
+ taker = np.delete(taker, 1)
432
+
433
+ expected = ser[taker]
434
+ del ser[1]
435
+ tm.assert_series_equal(ser, expected)
436
+
437
+ def test_setitem_invalid(self, data, invalid_scalar):
438
+ msg = "" # messages vary by subclass, so we do not test it
439
+ with pytest.raises((ValueError, TypeError), match=msg):
440
+ data[0] = invalid_scalar
441
+
442
+ with pytest.raises((ValueError, TypeError), match=msg):
443
+ data[:] = invalid_scalar
444
+
445
+ def test_setitem_2d_values(self, data):
446
+ # GH50085
447
+ original = data.copy()
448
+ df = pd.DataFrame({"a": data, "b": data})
449
+ df.loc[[0, 1], :] = df.loc[[1, 0], :].values
450
+ assert (df.loc[0, :] == original[1]).all()
451
+ assert (df.loc[1, :] == original[0]).all()
py311/lib/python3.11/site-packages/pandas/tests/extension/decimal/__init__.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from pandas.tests.extension.decimal.array import (
2
+ DecimalArray,
3
+ DecimalDtype,
4
+ make_data,
5
+ to_decimal,
6
+ )
7
+
8
+ __all__ = ["DecimalArray", "DecimalDtype", "to_decimal", "make_data"]
py311/lib/python3.11/site-packages/pandas/tests/extension/decimal/array.py ADDED
@@ -0,0 +1,311 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import decimal
4
+ import numbers
5
+ import sys
6
+ from typing import TYPE_CHECKING
7
+
8
+ import numpy as np
9
+
10
+ from pandas.core.dtypes.base import ExtensionDtype
11
+ from pandas.core.dtypes.common import (
12
+ is_dtype_equal,
13
+ is_float,
14
+ is_integer,
15
+ pandas_dtype,
16
+ )
17
+
18
+ import pandas as pd
19
+ from pandas.api.extensions import (
20
+ no_default,
21
+ register_extension_dtype,
22
+ )
23
+ from pandas.api.types import (
24
+ is_list_like,
25
+ is_scalar,
26
+ )
27
+ from pandas.core import arraylike
28
+ from pandas.core.algorithms import value_counts_internal as value_counts
29
+ from pandas.core.arraylike import OpsMixin
30
+ from pandas.core.arrays import (
31
+ ExtensionArray,
32
+ ExtensionScalarOpsMixin,
33
+ )
34
+ from pandas.core.indexers import check_array_indexer
35
+
36
+ if TYPE_CHECKING:
37
+ from pandas._typing import type_t
38
+
39
+
40
+ @register_extension_dtype
41
+ class DecimalDtype(ExtensionDtype):
42
+ type = decimal.Decimal
43
+ name = "decimal"
44
+ na_value = decimal.Decimal("NaN")
45
+ _metadata = ("context",)
46
+
47
+ def __init__(self, context=None) -> None:
48
+ self.context = context or decimal.getcontext()
49
+
50
+ def __repr__(self) -> str:
51
+ return f"DecimalDtype(context={self.context})"
52
+
53
+ @classmethod
54
+ def construct_array_type(cls) -> type_t[DecimalArray]:
55
+ """
56
+ Return the array type associated with this dtype.
57
+
58
+ Returns
59
+ -------
60
+ type
61
+ """
62
+ return DecimalArray
63
+
64
+ @property
65
+ def _is_numeric(self) -> bool:
66
+ return True
67
+
68
+
69
+ class DecimalArray(OpsMixin, ExtensionScalarOpsMixin, ExtensionArray):
70
+ __array_priority__ = 1000
71
+
72
+ def __init__(self, values, dtype=None, copy=False, context=None) -> None:
73
+ for i, val in enumerate(values):
74
+ if is_float(val) or is_integer(val):
75
+ if np.isnan(val):
76
+ values[i] = DecimalDtype.na_value
77
+ else:
78
+ # error: Argument 1 has incompatible type "float | int |
79
+ # integer[Any]"; expected "Decimal | float | str | tuple[int,
80
+ # Sequence[int], int]"
81
+ values[i] = DecimalDtype.type(val) # type: ignore[arg-type]
82
+ elif not isinstance(val, decimal.Decimal):
83
+ raise TypeError("All values must be of type " + str(decimal.Decimal))
84
+ values = np.asarray(values, dtype=object)
85
+
86
+ self._data = values
87
+ # Some aliases for common attribute names to ensure pandas supports
88
+ # these
89
+ self._items = self.data = self._data
90
+ # those aliases are currently not working due to assumptions
91
+ # in internal code (GH-20735)
92
+ # self._values = self.values = self.data
93
+ self._dtype = DecimalDtype(context)
94
+
95
+ @property
96
+ def dtype(self):
97
+ return self._dtype
98
+
99
+ @classmethod
100
+ def _from_sequence(cls, scalars, *, dtype=None, copy=False):
101
+ return cls(scalars)
102
+
103
+ @classmethod
104
+ def _from_sequence_of_strings(cls, strings, dtype=None, copy=False):
105
+ return cls._from_sequence(
106
+ [decimal.Decimal(x) for x in strings], dtype=dtype, copy=copy
107
+ )
108
+
109
+ @classmethod
110
+ def _from_factorized(cls, values, original):
111
+ return cls(values)
112
+
113
+ _HANDLED_TYPES = (decimal.Decimal, numbers.Number, np.ndarray)
114
+
115
+ def to_numpy(
116
+ self,
117
+ dtype=None,
118
+ copy: bool = False,
119
+ na_value: object = no_default,
120
+ decimals=None,
121
+ ) -> np.ndarray:
122
+ result = np.asarray(self, dtype=dtype)
123
+ if decimals is not None:
124
+ result = np.asarray([round(x, decimals) for x in result])
125
+ return result
126
+
127
+ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
128
+ #
129
+ if not all(
130
+ isinstance(t, self._HANDLED_TYPES + (DecimalArray,)) for t in inputs
131
+ ):
132
+ return NotImplemented
133
+
134
+ result = arraylike.maybe_dispatch_ufunc_to_dunder_op(
135
+ self, ufunc, method, *inputs, **kwargs
136
+ )
137
+ if result is not NotImplemented:
138
+ # e.g. test_array_ufunc_series_scalar_other
139
+ return result
140
+
141
+ if "out" in kwargs:
142
+ return arraylike.dispatch_ufunc_with_out(
143
+ self, ufunc, method, *inputs, **kwargs
144
+ )
145
+
146
+ inputs = tuple(x._data if isinstance(x, DecimalArray) else x for x in inputs)
147
+ result = getattr(ufunc, method)(*inputs, **kwargs)
148
+
149
+ if method == "reduce":
150
+ result = arraylike.dispatch_reduction_ufunc(
151
+ self, ufunc, method, *inputs, **kwargs
152
+ )
153
+ if result is not NotImplemented:
154
+ return result
155
+
156
+ def reconstruct(x):
157
+ if isinstance(x, (decimal.Decimal, numbers.Number)):
158
+ return x
159
+ else:
160
+ return type(self)._from_sequence(x, dtype=self.dtype)
161
+
162
+ if ufunc.nout > 1:
163
+ return tuple(reconstruct(x) for x in result)
164
+ else:
165
+ return reconstruct(result)
166
+
167
+ def __getitem__(self, item):
168
+ if isinstance(item, numbers.Integral):
169
+ return self._data[item]
170
+ else:
171
+ # array, slice.
172
+ item = pd.api.indexers.check_array_indexer(self, item)
173
+ return type(self)(self._data[item])
174
+
175
+ def take(self, indexer, allow_fill=False, fill_value=None):
176
+ from pandas.api.extensions import take
177
+
178
+ data = self._data
179
+ if allow_fill and fill_value is None:
180
+ fill_value = self.dtype.na_value
181
+
182
+ result = take(data, indexer, fill_value=fill_value, allow_fill=allow_fill)
183
+ return self._from_sequence(result, dtype=self.dtype)
184
+
185
+ def copy(self):
186
+ return type(self)(self._data.copy(), dtype=self.dtype)
187
+
188
+ def astype(self, dtype, copy=True):
189
+ if is_dtype_equal(dtype, self._dtype):
190
+ if not copy:
191
+ return self
192
+ dtype = pandas_dtype(dtype)
193
+ if isinstance(dtype, type(self.dtype)):
194
+ return type(self)(self._data, copy=copy, context=dtype.context)
195
+
196
+ return super().astype(dtype, copy=copy)
197
+
198
+ def __setitem__(self, key, value) -> None:
199
+ if is_list_like(value):
200
+ if is_scalar(key):
201
+ raise ValueError("setting an array element with a sequence.")
202
+ value = [decimal.Decimal(v) for v in value]
203
+ else:
204
+ value = decimal.Decimal(value)
205
+
206
+ key = check_array_indexer(self, key)
207
+ self._data[key] = value
208
+
209
+ def __len__(self) -> int:
210
+ return len(self._data)
211
+
212
+ def __contains__(self, item) -> bool | np.bool_:
213
+ if not isinstance(item, decimal.Decimal):
214
+ return False
215
+ elif item.is_nan():
216
+ return self.isna().any()
217
+ else:
218
+ return super().__contains__(item)
219
+
220
+ @property
221
+ def nbytes(self) -> int:
222
+ n = len(self)
223
+ if n:
224
+ return n * sys.getsizeof(self[0])
225
+ return 0
226
+
227
+ def isna(self):
228
+ return np.array([x.is_nan() for x in self._data], dtype=bool)
229
+
230
+ @property
231
+ def _na_value(self):
232
+ return decimal.Decimal("NaN")
233
+
234
+ def _formatter(self, boxed=False):
235
+ if boxed:
236
+ return "Decimal: {}".format
237
+ return repr
238
+
239
+ @classmethod
240
+ def _concat_same_type(cls, to_concat):
241
+ return cls(np.concatenate([x._data for x in to_concat]))
242
+
243
+ def _reduce(
244
+ self, name: str, *, skipna: bool = True, keepdims: bool = False, **kwargs
245
+ ):
246
+ if skipna and self.isna().any():
247
+ # If we don't have any NAs, we can ignore skipna
248
+ other = self[~self.isna()]
249
+ result = other._reduce(name, **kwargs)
250
+ elif name == "sum" and len(self) == 0:
251
+ # GH#29630 avoid returning int 0 or np.bool_(False) on old numpy
252
+ result = decimal.Decimal(0)
253
+ else:
254
+ try:
255
+ op = getattr(self.data, name)
256
+ except AttributeError as err:
257
+ raise NotImplementedError(
258
+ f"decimal does not support the {name} operation"
259
+ ) from err
260
+ result = op(axis=0)
261
+
262
+ if keepdims:
263
+ return type(self)([result])
264
+ else:
265
+ return result
266
+
267
+ def _cmp_method(self, other, op):
268
+ # For use with OpsMixin
269
+ def convert_values(param):
270
+ if isinstance(param, ExtensionArray) or is_list_like(param):
271
+ ovalues = param
272
+ else:
273
+ # Assume it's an object
274
+ ovalues = [param] * len(self)
275
+ return ovalues
276
+
277
+ lvalues = self
278
+ rvalues = convert_values(other)
279
+
280
+ # If the operator is not defined for the underlying objects,
281
+ # a TypeError should be raised
282
+ res = [op(a, b) for (a, b) in zip(lvalues, rvalues)]
283
+
284
+ return np.asarray(res, dtype=bool)
285
+
286
+ def value_counts(self, dropna: bool = True):
287
+ return value_counts(self.to_numpy(), dropna=dropna)
288
+
289
+ # We override fillna here to simulate a 3rd party EA that has done so. This
290
+ # lets us test the deprecation telling authors to implement _pad_or_backfill
291
+ # Simulate a 3rd-party EA that has not yet updated to include a "copy"
292
+ # keyword in its fillna method.
293
+ # error: Signature of "fillna" incompatible with supertype "ExtensionArray"
294
+ def fillna( # type: ignore[override]
295
+ self,
296
+ value=None,
297
+ method=None,
298
+ limit: int | None = None,
299
+ ):
300
+ return super().fillna(value=value, method=method, limit=limit, copy=True)
301
+
302
+
303
+ def to_decimal(values, context=None):
304
+ return DecimalArray([decimal.Decimal(x) for x in values], context=context)
305
+
306
+
307
+ def make_data():
308
+ return [decimal.Decimal(val) for val in np.random.default_rng(2).random(100)]
309
+
310
+
311
+ DecimalArray._add_arithmetic_ops()
py311/lib/python3.11/site-packages/pandas/tests/extension/decimal/test_decimal.py ADDED
@@ -0,0 +1,587 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import decimal
4
+ import operator
5
+
6
+ import numpy as np
7
+ import pytest
8
+
9
+ from pandas.compat.numpy import np_version_gt2
10
+
11
+ import pandas as pd
12
+ import pandas._testing as tm
13
+ from pandas.tests.extension import base
14
+ from pandas.tests.extension.decimal.array import (
15
+ DecimalArray,
16
+ DecimalDtype,
17
+ make_data,
18
+ to_decimal,
19
+ )
20
+
21
+
22
+ @pytest.fixture
23
+ def dtype():
24
+ return DecimalDtype()
25
+
26
+
27
+ @pytest.fixture
28
+ def data():
29
+ return DecimalArray(make_data())
30
+
31
+
32
+ @pytest.fixture
33
+ def data_for_twos():
34
+ return DecimalArray([decimal.Decimal(2) for _ in range(100)])
35
+
36
+
37
+ @pytest.fixture
38
+ def data_missing():
39
+ return DecimalArray([decimal.Decimal("NaN"), decimal.Decimal(1)])
40
+
41
+
42
+ @pytest.fixture
43
+ def data_for_sorting():
44
+ return DecimalArray(
45
+ [decimal.Decimal("1"), decimal.Decimal("2"), decimal.Decimal("0")]
46
+ )
47
+
48
+
49
+ @pytest.fixture
50
+ def data_missing_for_sorting():
51
+ return DecimalArray(
52
+ [decimal.Decimal("1"), decimal.Decimal("NaN"), decimal.Decimal("0")]
53
+ )
54
+
55
+
56
+ @pytest.fixture
57
+ def na_cmp():
58
+ return lambda x, y: x.is_nan() and y.is_nan()
59
+
60
+
61
+ @pytest.fixture
62
+ def data_for_grouping():
63
+ b = decimal.Decimal("1.0")
64
+ a = decimal.Decimal("0.0")
65
+ c = decimal.Decimal("2.0")
66
+ na = decimal.Decimal("NaN")
67
+ return DecimalArray([b, b, na, na, a, a, b, c])
68
+
69
+
70
+ class TestDecimalArray(base.ExtensionTests):
71
+ def _get_expected_exception(
72
+ self, op_name: str, obj, other
73
+ ) -> type[Exception] | tuple[type[Exception], ...] | None:
74
+ return None
75
+
76
+ def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool:
77
+ return True
78
+
79
+ def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool):
80
+ if op_name == "count":
81
+ return super().check_reduce(ser, op_name, skipna)
82
+ else:
83
+ result = getattr(ser, op_name)(skipna=skipna)
84
+ expected = getattr(np.asarray(ser), op_name)()
85
+ tm.assert_almost_equal(result, expected)
86
+
87
+ def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna, request):
88
+ if all_numeric_reductions in ["kurt", "skew", "sem", "median"]:
89
+ mark = pytest.mark.xfail(raises=NotImplementedError)
90
+ request.applymarker(mark)
91
+ super().test_reduce_series_numeric(data, all_numeric_reductions, skipna)
92
+
93
+ def test_reduce_frame(self, data, all_numeric_reductions, skipna, request):
94
+ op_name = all_numeric_reductions
95
+ if op_name in ["skew", "median"]:
96
+ mark = pytest.mark.xfail(raises=NotImplementedError)
97
+ request.applymarker(mark)
98
+
99
+ return super().test_reduce_frame(data, all_numeric_reductions, skipna)
100
+
101
+ def test_compare_scalar(self, data, comparison_op):
102
+ ser = pd.Series(data)
103
+ self._compare_other(ser, data, comparison_op, 0.5)
104
+
105
+ def test_compare_array(self, data, comparison_op):
106
+ ser = pd.Series(data)
107
+
108
+ alter = np.random.default_rng(2).choice([-1, 0, 1], len(data))
109
+ # Randomly double, halve or keep same value
110
+ other = pd.Series(data) * [decimal.Decimal(pow(2.0, i)) for i in alter]
111
+ self._compare_other(ser, data, comparison_op, other)
112
+
113
+ def test_arith_series_with_array(self, data, all_arithmetic_operators):
114
+ op_name = all_arithmetic_operators
115
+ ser = pd.Series(data)
116
+
117
+ context = decimal.getcontext()
118
+ divbyzerotrap = context.traps[decimal.DivisionByZero]
119
+ invalidoptrap = context.traps[decimal.InvalidOperation]
120
+ context.traps[decimal.DivisionByZero] = 0
121
+ context.traps[decimal.InvalidOperation] = 0
122
+
123
+ # Decimal supports ops with int, but not float
124
+ other = pd.Series([int(d * 100) for d in data])
125
+ self.check_opname(ser, op_name, other)
126
+
127
+ if "mod" not in op_name:
128
+ self.check_opname(ser, op_name, ser * 2)
129
+
130
+ self.check_opname(ser, op_name, 0)
131
+ self.check_opname(ser, op_name, 5)
132
+ context.traps[decimal.DivisionByZero] = divbyzerotrap
133
+ context.traps[decimal.InvalidOperation] = invalidoptrap
134
+
135
+ def test_fillna_frame(self, data_missing):
136
+ msg = "ExtensionArray.fillna added a 'copy' keyword"
137
+ with tm.assert_produces_warning(
138
+ DeprecationWarning, match=msg, check_stacklevel=False
139
+ ):
140
+ super().test_fillna_frame(data_missing)
141
+
142
+ def test_fillna_limit_pad(self, data_missing):
143
+ msg = "ExtensionArray.fillna 'method' keyword is deprecated"
144
+ with tm.assert_produces_warning(
145
+ DeprecationWarning,
146
+ match=msg,
147
+ check_stacklevel=False,
148
+ raise_on_extra_warnings=False,
149
+ ):
150
+ super().test_fillna_limit_pad(data_missing)
151
+
152
+ msg = "The 'method' keyword in DecimalArray.fillna is deprecated"
153
+ with tm.assert_produces_warning(
154
+ FutureWarning,
155
+ match=msg,
156
+ check_stacklevel=False,
157
+ raise_on_extra_warnings=False,
158
+ ):
159
+ super().test_fillna_limit_pad(data_missing)
160
+
161
+ @pytest.mark.parametrize(
162
+ "limit_area, input_ilocs, expected_ilocs",
163
+ [
164
+ ("outside", [1, 0, 0, 0, 1], [1, 0, 0, 0, 1]),
165
+ ("outside", [1, 0, 1, 0, 1], [1, 0, 1, 0, 1]),
166
+ ("outside", [0, 1, 1, 1, 0], [0, 1, 1, 1, 1]),
167
+ ("outside", [0, 1, 0, 1, 0], [0, 1, 0, 1, 1]),
168
+ ("inside", [1, 0, 0, 0, 1], [1, 1, 1, 1, 1]),
169
+ ("inside", [1, 0, 1, 0, 1], [1, 1, 1, 1, 1]),
170
+ ("inside", [0, 1, 1, 1, 0], [0, 1, 1, 1, 0]),
171
+ ("inside", [0, 1, 0, 1, 0], [0, 1, 1, 1, 0]),
172
+ ],
173
+ )
174
+ def test_ffill_limit_area(
175
+ self, data_missing, limit_area, input_ilocs, expected_ilocs
176
+ ):
177
+ # GH#56616
178
+ msg = "ExtensionArray.fillna 'method' keyword is deprecated"
179
+ with tm.assert_produces_warning(
180
+ DeprecationWarning,
181
+ match=msg,
182
+ check_stacklevel=False,
183
+ raise_on_extra_warnings=False,
184
+ ):
185
+ msg = "DecimalArray does not implement limit_area"
186
+ with pytest.raises(NotImplementedError, match=msg):
187
+ super().test_ffill_limit_area(
188
+ data_missing, limit_area, input_ilocs, expected_ilocs
189
+ )
190
+
191
+ def test_fillna_limit_backfill(self, data_missing):
192
+ msg = "Series.fillna with 'method' is deprecated"
193
+ with tm.assert_produces_warning(
194
+ FutureWarning,
195
+ match=msg,
196
+ check_stacklevel=False,
197
+ raise_on_extra_warnings=False,
198
+ ):
199
+ super().test_fillna_limit_backfill(data_missing)
200
+
201
+ msg = "ExtensionArray.fillna 'method' keyword is deprecated"
202
+ with tm.assert_produces_warning(
203
+ DeprecationWarning,
204
+ match=msg,
205
+ check_stacklevel=False,
206
+ raise_on_extra_warnings=False,
207
+ ):
208
+ super().test_fillna_limit_backfill(data_missing)
209
+
210
+ msg = "The 'method' keyword in DecimalArray.fillna is deprecated"
211
+ with tm.assert_produces_warning(
212
+ FutureWarning,
213
+ match=msg,
214
+ check_stacklevel=False,
215
+ raise_on_extra_warnings=False,
216
+ ):
217
+ super().test_fillna_limit_backfill(data_missing)
218
+
219
+ def test_fillna_no_op_returns_copy(self, data):
220
+ msg = "|".join(
221
+ [
222
+ "ExtensionArray.fillna 'method' keyword is deprecated",
223
+ "The 'method' keyword in DecimalArray.fillna is deprecated",
224
+ ]
225
+ )
226
+ with tm.assert_produces_warning(
227
+ (FutureWarning, DeprecationWarning), match=msg, check_stacklevel=False
228
+ ):
229
+ super().test_fillna_no_op_returns_copy(data)
230
+
231
+ def test_fillna_series(self, data_missing):
232
+ msg = "ExtensionArray.fillna added a 'copy' keyword"
233
+ with tm.assert_produces_warning(
234
+ DeprecationWarning, match=msg, check_stacklevel=False
235
+ ):
236
+ super().test_fillna_series(data_missing)
237
+
238
+ def test_fillna_series_method(self, data_missing, fillna_method):
239
+ msg = "|".join(
240
+ [
241
+ "ExtensionArray.fillna 'method' keyword is deprecated",
242
+ "The 'method' keyword in DecimalArray.fillna is deprecated",
243
+ ]
244
+ )
245
+ with tm.assert_produces_warning(
246
+ (FutureWarning, DeprecationWarning), match=msg, check_stacklevel=False
247
+ ):
248
+ super().test_fillna_series_method(data_missing, fillna_method)
249
+
250
+ def test_fillna_copy_frame(self, data_missing, using_copy_on_write):
251
+ warn = DeprecationWarning if not using_copy_on_write else None
252
+ msg = "ExtensionArray.fillna added a 'copy' keyword"
253
+ with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False):
254
+ super().test_fillna_copy_frame(data_missing)
255
+
256
+ def test_fillna_copy_series(self, data_missing, using_copy_on_write):
257
+ warn = DeprecationWarning if not using_copy_on_write else None
258
+ msg = "ExtensionArray.fillna added a 'copy' keyword"
259
+ with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False):
260
+ super().test_fillna_copy_series(data_missing)
261
+
262
+ @pytest.mark.parametrize("dropna", [True, False])
263
+ def test_value_counts(self, all_data, dropna, request):
264
+ all_data = all_data[:10]
265
+ if dropna:
266
+ other = np.array(all_data[~all_data.isna()])
267
+ else:
268
+ other = all_data
269
+
270
+ vcs = pd.Series(all_data).value_counts(dropna=dropna)
271
+ vcs_ex = pd.Series(other).value_counts(dropna=dropna)
272
+
273
+ with decimal.localcontext() as ctx:
274
+ # avoid raising when comparing Decimal("NAN") < Decimal(2)
275
+ ctx.traps[decimal.InvalidOperation] = False
276
+
277
+ result = vcs.sort_index()
278
+ expected = vcs_ex.sort_index()
279
+
280
+ tm.assert_series_equal(result, expected)
281
+
282
+ def test_series_repr(self, data):
283
+ # Overriding this base test to explicitly test that
284
+ # the custom _formatter is used
285
+ ser = pd.Series(data)
286
+ assert data.dtype.name in repr(ser)
287
+ assert "Decimal: " in repr(ser)
288
+
289
+ @pytest.mark.xfail(reason="Inconsistent array-vs-scalar behavior")
290
+ @pytest.mark.parametrize("ufunc", [np.positive, np.negative, np.abs])
291
+ def test_unary_ufunc_dunder_equivalence(self, data, ufunc):
292
+ super().test_unary_ufunc_dunder_equivalence(data, ufunc)
293
+
294
+ def test_array_interface_copy(self, data):
295
+ result_copy1 = np.array(data, copy=True)
296
+ result_copy2 = np.array(data, copy=True)
297
+ assert not np.may_share_memory(result_copy1, result_copy2)
298
+ if not np_version_gt2:
299
+ # copy=False semantics are only supported in NumPy>=2.
300
+ return
301
+
302
+ try:
303
+ result_nocopy1 = np.array(data, copy=False)
304
+ except ValueError:
305
+ # An error is always acceptable for `copy=False`
306
+ return
307
+
308
+ result_nocopy2 = np.array(data, copy=False)
309
+ # If copy=False was given and did not raise, these must share the same data
310
+ assert np.may_share_memory(result_nocopy1, result_nocopy2)
311
+
312
+
313
+ def test_take_na_value_other_decimal():
314
+ arr = DecimalArray([decimal.Decimal("1.0"), decimal.Decimal("2.0")])
315
+ result = arr.take([0, -1], allow_fill=True, fill_value=decimal.Decimal("-1.0"))
316
+ expected = DecimalArray([decimal.Decimal("1.0"), decimal.Decimal("-1.0")])
317
+ tm.assert_extension_array_equal(result, expected)
318
+
319
+
320
+ def test_series_constructor_coerce_data_to_extension_dtype():
321
+ dtype = DecimalDtype()
322
+ ser = pd.Series([0, 1, 2], dtype=dtype)
323
+
324
+ arr = DecimalArray(
325
+ [decimal.Decimal(0), decimal.Decimal(1), decimal.Decimal(2)],
326
+ dtype=dtype,
327
+ )
328
+ exp = pd.Series(arr)
329
+ tm.assert_series_equal(ser, exp)
330
+
331
+
332
+ def test_series_constructor_with_dtype():
333
+ arr = DecimalArray([decimal.Decimal("10.0")])
334
+ result = pd.Series(arr, dtype=DecimalDtype())
335
+ expected = pd.Series(arr)
336
+ tm.assert_series_equal(result, expected)
337
+
338
+ result = pd.Series(arr, dtype="int64")
339
+ expected = pd.Series([10])
340
+ tm.assert_series_equal(result, expected)
341
+
342
+
343
+ def test_dataframe_constructor_with_dtype():
344
+ arr = DecimalArray([decimal.Decimal("10.0")])
345
+
346
+ result = pd.DataFrame({"A": arr}, dtype=DecimalDtype())
347
+ expected = pd.DataFrame({"A": arr})
348
+ tm.assert_frame_equal(result, expected)
349
+
350
+ arr = DecimalArray([decimal.Decimal("10.0")])
351
+ result = pd.DataFrame({"A": arr}, dtype="int64")
352
+ expected = pd.DataFrame({"A": [10]})
353
+ tm.assert_frame_equal(result, expected)
354
+
355
+
356
+ @pytest.mark.parametrize("frame", [True, False])
357
+ def test_astype_dispatches(frame):
358
+ # This is a dtype-specific test that ensures Series[decimal].astype
359
+ # gets all the way through to ExtensionArray.astype
360
+ # Designing a reliable smoke test that works for arbitrary data types
361
+ # is difficult.
362
+ data = pd.Series(DecimalArray([decimal.Decimal(2)]), name="a")
363
+ ctx = decimal.Context()
364
+ ctx.prec = 5
365
+
366
+ if frame:
367
+ data = data.to_frame()
368
+
369
+ result = data.astype(DecimalDtype(ctx))
370
+
371
+ if frame:
372
+ result = result["a"]
373
+
374
+ assert result.dtype.context.prec == ctx.prec
375
+
376
+
377
+ class DecimalArrayWithoutFromSequence(DecimalArray):
378
+ """Helper class for testing error handling in _from_sequence."""
379
+
380
+ @classmethod
381
+ def _from_sequence(cls, scalars, *, dtype=None, copy=False):
382
+ raise KeyError("For the test")
383
+
384
+
385
+ class DecimalArrayWithoutCoercion(DecimalArrayWithoutFromSequence):
386
+ @classmethod
387
+ def _create_arithmetic_method(cls, op):
388
+ return cls._create_method(op, coerce_to_dtype=False)
389
+
390
+
391
+ DecimalArrayWithoutCoercion._add_arithmetic_ops()
392
+
393
+
394
+ def test_combine_from_sequence_raises(monkeypatch):
395
+ # https://github.com/pandas-dev/pandas/issues/22850
396
+ cls = DecimalArrayWithoutFromSequence
397
+
398
+ @classmethod
399
+ def construct_array_type(cls):
400
+ return DecimalArrayWithoutFromSequence
401
+
402
+ monkeypatch.setattr(DecimalDtype, "construct_array_type", construct_array_type)
403
+
404
+ arr = cls([decimal.Decimal("1.0"), decimal.Decimal("2.0")])
405
+ ser = pd.Series(arr)
406
+ result = ser.combine(ser, operator.add)
407
+
408
+ # note: object dtype
409
+ expected = pd.Series(
410
+ [decimal.Decimal("2.0"), decimal.Decimal("4.0")], dtype="object"
411
+ )
412
+ tm.assert_series_equal(result, expected)
413
+
414
+
415
+ @pytest.mark.parametrize(
416
+ "class_", [DecimalArrayWithoutFromSequence, DecimalArrayWithoutCoercion]
417
+ )
418
+ def test_scalar_ops_from_sequence_raises(class_):
419
+ # op(EA, EA) should return an EA, or an ndarray if it's not possible
420
+ # to return an EA with the return values.
421
+ arr = class_([decimal.Decimal("1.0"), decimal.Decimal("2.0")])
422
+ result = arr + arr
423
+ expected = np.array(
424
+ [decimal.Decimal("2.0"), decimal.Decimal("4.0")], dtype="object"
425
+ )
426
+ tm.assert_numpy_array_equal(result, expected)
427
+
428
+
429
+ @pytest.mark.parametrize(
430
+ "reverse, expected_div, expected_mod",
431
+ [(False, [0, 1, 1, 2], [1, 0, 1, 0]), (True, [2, 1, 0, 0], [0, 0, 2, 2])],
432
+ )
433
+ def test_divmod_array(reverse, expected_div, expected_mod):
434
+ # https://github.com/pandas-dev/pandas/issues/22930
435
+ arr = to_decimal([1, 2, 3, 4])
436
+ if reverse:
437
+ div, mod = divmod(2, arr)
438
+ else:
439
+ div, mod = divmod(arr, 2)
440
+ expected_div = to_decimal(expected_div)
441
+ expected_mod = to_decimal(expected_mod)
442
+
443
+ tm.assert_extension_array_equal(div, expected_div)
444
+ tm.assert_extension_array_equal(mod, expected_mod)
445
+
446
+
447
+ def test_ufunc_fallback(data):
448
+ a = data[:5]
449
+ s = pd.Series(a, index=range(3, 8))
450
+ result = np.abs(s)
451
+ expected = pd.Series(np.abs(a), index=range(3, 8))
452
+ tm.assert_series_equal(result, expected)
453
+
454
+
455
+ def test_array_ufunc():
456
+ a = to_decimal([1, 2, 3])
457
+ result = np.exp(a)
458
+ expected = to_decimal(np.exp(a._data))
459
+ tm.assert_extension_array_equal(result, expected)
460
+
461
+
462
+ def test_array_ufunc_series():
463
+ a = to_decimal([1, 2, 3])
464
+ s = pd.Series(a)
465
+ result = np.exp(s)
466
+ expected = pd.Series(to_decimal(np.exp(a._data)))
467
+ tm.assert_series_equal(result, expected)
468
+
469
+
470
+ def test_array_ufunc_series_scalar_other():
471
+ # check _HANDLED_TYPES
472
+ a = to_decimal([1, 2, 3])
473
+ s = pd.Series(a)
474
+ result = np.add(s, decimal.Decimal(1))
475
+ expected = pd.Series(np.add(a, decimal.Decimal(1)))
476
+ tm.assert_series_equal(result, expected)
477
+
478
+
479
+ def test_array_ufunc_series_defer():
480
+ a = to_decimal([1, 2, 3])
481
+ s = pd.Series(a)
482
+
483
+ expected = pd.Series(to_decimal([2, 4, 6]))
484
+ r1 = np.add(s, a)
485
+ r2 = np.add(a, s)
486
+
487
+ tm.assert_series_equal(r1, expected)
488
+ tm.assert_series_equal(r2, expected)
489
+
490
+
491
+ def test_groupby_agg():
492
+ # Ensure that the result of agg is inferred to be decimal dtype
493
+ # https://github.com/pandas-dev/pandas/issues/29141
494
+
495
+ data = make_data()[:5]
496
+ df = pd.DataFrame(
497
+ {"id1": [0, 0, 0, 1, 1], "id2": [0, 1, 0, 1, 1], "decimals": DecimalArray(data)}
498
+ )
499
+
500
+ # single key, selected column
501
+ expected = pd.Series(to_decimal([data[0], data[3]]))
502
+ result = df.groupby("id1")["decimals"].agg(lambda x: x.iloc[0])
503
+ tm.assert_series_equal(result, expected, check_names=False)
504
+ result = df["decimals"].groupby(df["id1"]).agg(lambda x: x.iloc[0])
505
+ tm.assert_series_equal(result, expected, check_names=False)
506
+
507
+ # multiple keys, selected column
508
+ expected = pd.Series(
509
+ to_decimal([data[0], data[1], data[3]]),
510
+ index=pd.MultiIndex.from_tuples([(0, 0), (0, 1), (1, 1)]),
511
+ )
512
+ result = df.groupby(["id1", "id2"])["decimals"].agg(lambda x: x.iloc[0])
513
+ tm.assert_series_equal(result, expected, check_names=False)
514
+ result = df["decimals"].groupby([df["id1"], df["id2"]]).agg(lambda x: x.iloc[0])
515
+ tm.assert_series_equal(result, expected, check_names=False)
516
+
517
+ # multiple columns
518
+ expected = pd.DataFrame({"id2": [0, 1], "decimals": to_decimal([data[0], data[3]])})
519
+ result = df.groupby("id1").agg(lambda x: x.iloc[0])
520
+ tm.assert_frame_equal(result, expected, check_names=False)
521
+
522
+
523
+ def test_groupby_agg_ea_method(monkeypatch):
524
+ # Ensure that the result of agg is inferred to be decimal dtype
525
+ # https://github.com/pandas-dev/pandas/issues/29141
526
+
527
+ def DecimalArray__my_sum(self):
528
+ return np.sum(np.array(self))
529
+
530
+ monkeypatch.setattr(DecimalArray, "my_sum", DecimalArray__my_sum, raising=False)
531
+
532
+ data = make_data()[:5]
533
+ df = pd.DataFrame({"id": [0, 0, 0, 1, 1], "decimals": DecimalArray(data)})
534
+ expected = pd.Series(to_decimal([data[0] + data[1] + data[2], data[3] + data[4]]))
535
+
536
+ result = df.groupby("id")["decimals"].agg(lambda x: x.values.my_sum())
537
+ tm.assert_series_equal(result, expected, check_names=False)
538
+ s = pd.Series(DecimalArray(data))
539
+ grouper = np.array([0, 0, 0, 1, 1], dtype=np.int64)
540
+ result = s.groupby(grouper).agg(lambda x: x.values.my_sum())
541
+ tm.assert_series_equal(result, expected, check_names=False)
542
+
543
+
544
+ def test_indexing_no_materialize(monkeypatch):
545
+ # See https://github.com/pandas-dev/pandas/issues/29708
546
+ # Ensure that indexing operations do not materialize (convert to a numpy
547
+ # array) the ExtensionArray unnecessary
548
+
549
+ def DecimalArray__array__(self, dtype=None):
550
+ raise Exception("tried to convert a DecimalArray to a numpy array")
551
+
552
+ monkeypatch.setattr(DecimalArray, "__array__", DecimalArray__array__, raising=False)
553
+
554
+ data = make_data()
555
+ s = pd.Series(DecimalArray(data))
556
+ df = pd.DataFrame({"a": s, "b": range(len(s))})
557
+
558
+ # ensure the following operations do not raise an error
559
+ s[s > 0.5]
560
+ df[s > 0.5]
561
+ s.at[0]
562
+ df.at[0, "a"]
563
+
564
+
565
+ def test_to_numpy_keyword():
566
+ # test the extra keyword
567
+ values = [decimal.Decimal("1.1111"), decimal.Decimal("2.2222")]
568
+ expected = np.array(
569
+ [decimal.Decimal("1.11"), decimal.Decimal("2.22")], dtype="object"
570
+ )
571
+ a = pd.array(values, dtype="decimal")
572
+ result = a.to_numpy(decimals=2)
573
+ tm.assert_numpy_array_equal(result, expected)
574
+
575
+ result = pd.Series(a).to_numpy(decimals=2)
576
+ tm.assert_numpy_array_equal(result, expected)
577
+
578
+
579
+ def test_array_copy_on_write(using_copy_on_write):
580
+ df = pd.DataFrame({"a": [decimal.Decimal(2), decimal.Decimal(3)]}, dtype="object")
581
+ df2 = df.astype(DecimalDtype())
582
+ df.iloc[0, 0] = 0
583
+ if using_copy_on_write:
584
+ expected = pd.DataFrame(
585
+ {"a": [decimal.Decimal(2), decimal.Decimal(3)]}, dtype=DecimalDtype()
586
+ )
587
+ tm.assert_equal(df2.values, expected.values)
py311/lib/python3.11/site-packages/pandas/tests/extension/list/__init__.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from pandas.tests.extension.list.array import (
2
+ ListArray,
3
+ ListDtype,
4
+ make_data,
5
+ )
6
+
7
+ __all__ = ["ListArray", "ListDtype", "make_data"]
py311/lib/python3.11/site-packages/pandas/tests/extension/list/array.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Test extension array for storing nested data in a pandas container.
3
+
4
+ The ListArray stores an ndarray of lists.
5
+ """
6
+ from __future__ import annotations
7
+
8
+ import numbers
9
+ import string
10
+ from typing import TYPE_CHECKING
11
+
12
+ import numpy as np
13
+
14
+ from pandas.core.dtypes.base import ExtensionDtype
15
+
16
+ import pandas as pd
17
+ from pandas.api.types import (
18
+ is_object_dtype,
19
+ is_string_dtype,
20
+ )
21
+ from pandas.core.arrays import ExtensionArray
22
+
23
+ if TYPE_CHECKING:
24
+ from pandas._typing import type_t
25
+
26
+
27
+ class ListDtype(ExtensionDtype):
28
+ type = list
29
+ name = "list"
30
+ na_value = np.nan
31
+
32
+ @classmethod
33
+ def construct_array_type(cls) -> type_t[ListArray]:
34
+ """
35
+ Return the array type associated with this dtype.
36
+
37
+ Returns
38
+ -------
39
+ type
40
+ """
41
+ return ListArray
42
+
43
+
44
+ class ListArray(ExtensionArray):
45
+ dtype = ListDtype()
46
+ __array_priority__ = 1000
47
+
48
+ def __init__(self, values, dtype=None, copy=False) -> None:
49
+ if not isinstance(values, np.ndarray):
50
+ raise TypeError("Need to pass a numpy array as values")
51
+ for val in values:
52
+ if not isinstance(val, self.dtype.type) and not pd.isna(val):
53
+ raise TypeError("All values must be of type " + str(self.dtype.type))
54
+ self.data = values
55
+
56
+ @classmethod
57
+ def _from_sequence(cls, scalars, *, dtype=None, copy=False):
58
+ data = np.empty(len(scalars), dtype=object)
59
+ data[:] = scalars
60
+ return cls(data)
61
+
62
+ def __getitem__(self, item):
63
+ if isinstance(item, numbers.Integral):
64
+ return self.data[item]
65
+ else:
66
+ # slice, list-like, mask
67
+ return type(self)(self.data[item])
68
+
69
+ def __len__(self) -> int:
70
+ return len(self.data)
71
+
72
+ def isna(self):
73
+ return np.array(
74
+ [not isinstance(x, list) and np.isnan(x) for x in self.data], dtype=bool
75
+ )
76
+
77
+ def take(self, indexer, allow_fill=False, fill_value=None):
78
+ # re-implement here, since NumPy has trouble setting
79
+ # sized objects like UserDicts into scalar slots of
80
+ # an ndarary.
81
+ indexer = np.asarray(indexer)
82
+ msg = (
83
+ "Index is out of bounds or cannot do a "
84
+ "non-empty take from an empty array."
85
+ )
86
+
87
+ if allow_fill:
88
+ if fill_value is None:
89
+ fill_value = self.dtype.na_value
90
+ # bounds check
91
+ if (indexer < -1).any():
92
+ raise ValueError
93
+ try:
94
+ output = [
95
+ self.data[loc] if loc != -1 else fill_value for loc in indexer
96
+ ]
97
+ except IndexError as err:
98
+ raise IndexError(msg) from err
99
+ else:
100
+ try:
101
+ output = [self.data[loc] for loc in indexer]
102
+ except IndexError as err:
103
+ raise IndexError(msg) from err
104
+
105
+ return self._from_sequence(output)
106
+
107
+ def copy(self):
108
+ return type(self)(self.data[:])
109
+
110
+ def astype(self, dtype, copy=True):
111
+ if isinstance(dtype, type(self.dtype)) and dtype == self.dtype:
112
+ if copy:
113
+ return self.copy()
114
+ return self
115
+ elif is_string_dtype(dtype) and not is_object_dtype(dtype):
116
+ # numpy has problems with astype(str) for nested elements
117
+ return np.array([str(x) for x in self.data], dtype=dtype)
118
+ elif not copy:
119
+ return np.asarray(self.data, dtype=dtype)
120
+ else:
121
+ return np.array(self.data, dtype=dtype, copy=copy)
122
+
123
+ @classmethod
124
+ def _concat_same_type(cls, to_concat):
125
+ data = np.concatenate([x.data for x in to_concat])
126
+ return cls(data)
127
+
128
+
129
+ def make_data():
130
+ # TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer
131
+ rng = np.random.default_rng(2)
132
+ data = np.empty(100, dtype=object)
133
+ data[:] = [
134
+ [rng.choice(list(string.ascii_letters)) for _ in range(rng.integers(0, 10))]
135
+ for _ in range(100)
136
+ ]
137
+ return data
py311/lib/python3.11/site-packages/pandas/tests/extension/list/test_list.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+
3
+ import pandas as pd
4
+ from pandas.tests.extension.list.array import (
5
+ ListArray,
6
+ ListDtype,
7
+ make_data,
8
+ )
9
+
10
+
11
+ @pytest.fixture
12
+ def dtype():
13
+ return ListDtype()
14
+
15
+
16
+ @pytest.fixture
17
+ def data():
18
+ """Length-100 ListArray for semantics test."""
19
+ data = make_data()
20
+
21
+ while len(data[0]) == len(data[1]):
22
+ data = make_data()
23
+
24
+ return ListArray(data)
25
+
26
+
27
+ def test_to_csv(data):
28
+ # https://github.com/pandas-dev/pandas/issues/28840
29
+ # array with list-likes fail when doing astype(str) on the numpy array
30
+ # which was done in get_values_for_csv
31
+ df = pd.DataFrame({"a": data})
32
+ res = df.to_csv()
33
+ assert str(data[0]) in res
py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/__init__.py ADDED
File without changes
py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_append.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+
3
+ from pandas import (
4
+ CategoricalIndex,
5
+ Index,
6
+ )
7
+ import pandas._testing as tm
8
+
9
+
10
+ class TestAppend:
11
+ @pytest.fixture
12
+ def ci(self):
13
+ categories = list("cab")
14
+ return CategoricalIndex(list("aabbca"), categories=categories, ordered=False)
15
+
16
+ def test_append(self, ci):
17
+ # append cats with the same categories
18
+ result = ci[:3].append(ci[3:])
19
+ tm.assert_index_equal(result, ci, exact=True)
20
+
21
+ foos = [ci[:1], ci[1:3], ci[3:]]
22
+ result = foos[0].append(foos[1:])
23
+ tm.assert_index_equal(result, ci, exact=True)
24
+
25
+ def test_append_empty(self, ci):
26
+ # empty
27
+ result = ci.append([])
28
+ tm.assert_index_equal(result, ci, exact=True)
29
+
30
+ def test_append_mismatched_categories(self, ci):
31
+ # appending with different categories or reordered is not ok
32
+ msg = "all inputs must be Index"
33
+ with pytest.raises(TypeError, match=msg):
34
+ ci.append(ci.values.set_categories(list("abcd")))
35
+ with pytest.raises(TypeError, match=msg):
36
+ ci.append(ci.values.reorder_categories(list("abc")))
37
+
38
+ def test_append_category_objects(self, ci):
39
+ # with objects
40
+ result = ci.append(Index(["c", "a"]))
41
+ expected = CategoricalIndex(list("aabbcaca"), categories=ci.categories)
42
+ tm.assert_index_equal(result, expected, exact=True)
43
+
44
+ def test_append_non_categories(self, ci):
45
+ # invalid objects -> cast to object via concat_compat
46
+ result = ci.append(Index(["a", "d"]))
47
+ expected = Index(["a", "a", "b", "b", "c", "a", "a", "d"])
48
+ tm.assert_index_equal(result, expected, exact=True)
49
+
50
+ def test_append_object(self, ci):
51
+ # GH#14298 - if base object is not categorical -> coerce to object
52
+ result = Index(["c", "a"]).append(ci)
53
+ expected = Index(list("caaabbca"))
54
+ tm.assert_index_equal(result, expected, exact=True)
55
+
56
+ def test_append_to_another(self):
57
+ # hits Index._concat
58
+ fst = Index(["a", "b"])
59
+ snd = CategoricalIndex(["d", "e"])
60
+ result = fst.append(snd)
61
+ expected = Index(["a", "b", "d", "e"])
62
+ tm.assert_index_equal(result, expected)
py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_category.py ADDED
@@ -0,0 +1,391 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ from pandas._libs import index as libindex
5
+ from pandas._libs.arrays import NDArrayBacked
6
+
7
+ import pandas as pd
8
+ from pandas import (
9
+ Categorical,
10
+ CategoricalDtype,
11
+ )
12
+ import pandas._testing as tm
13
+ from pandas.core.indexes.api import (
14
+ CategoricalIndex,
15
+ Index,
16
+ )
17
+
18
+
19
+ class TestCategoricalIndex:
20
+ @pytest.fixture
21
+ def simple_index(self) -> CategoricalIndex:
22
+ return CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
23
+
24
+ def test_can_hold_identifiers(self):
25
+ idx = CategoricalIndex(list("aabbca"), categories=None, ordered=False)
26
+ key = idx[0]
27
+ assert idx._can_hold_identifiers_and_holds_name(key) is True
28
+
29
+ def test_insert(self, simple_index):
30
+ ci = simple_index
31
+ categories = ci.categories
32
+
33
+ # test 0th element
34
+ result = ci.insert(0, "a")
35
+ expected = CategoricalIndex(list("aaabbca"), categories=categories)
36
+ tm.assert_index_equal(result, expected, exact=True)
37
+
38
+ # test Nth element that follows Python list behavior
39
+ result = ci.insert(-1, "a")
40
+ expected = CategoricalIndex(list("aabbcaa"), categories=categories)
41
+ tm.assert_index_equal(result, expected, exact=True)
42
+
43
+ # test empty
44
+ result = CategoricalIndex([], categories=categories).insert(0, "a")
45
+ expected = CategoricalIndex(["a"], categories=categories)
46
+ tm.assert_index_equal(result, expected, exact=True)
47
+
48
+ # invalid -> cast to object
49
+ expected = ci.astype(object).insert(0, "d")
50
+ result = ci.insert(0, "d").astype(object)
51
+ tm.assert_index_equal(result, expected, exact=True)
52
+
53
+ # GH 18295 (test missing)
54
+ expected = CategoricalIndex(["a", np.nan, "a", "b", "c", "b"])
55
+ for na in (np.nan, pd.NaT, None):
56
+ result = CategoricalIndex(list("aabcb")).insert(1, na)
57
+ tm.assert_index_equal(result, expected)
58
+
59
+ def test_insert_na_mismatched_dtype(self):
60
+ ci = CategoricalIndex([0, 1, 1])
61
+ result = ci.insert(0, pd.NaT)
62
+ expected = Index([pd.NaT, 0, 1, 1], dtype=object)
63
+ tm.assert_index_equal(result, expected)
64
+
65
+ def test_delete(self, simple_index):
66
+ ci = simple_index
67
+ categories = ci.categories
68
+
69
+ result = ci.delete(0)
70
+ expected = CategoricalIndex(list("abbca"), categories=categories)
71
+ tm.assert_index_equal(result, expected, exact=True)
72
+
73
+ result = ci.delete(-1)
74
+ expected = CategoricalIndex(list("aabbc"), categories=categories)
75
+ tm.assert_index_equal(result, expected, exact=True)
76
+
77
+ with tm.external_error_raised((IndexError, ValueError)):
78
+ # Either depending on NumPy version
79
+ ci.delete(10)
80
+
81
+ @pytest.mark.parametrize(
82
+ "data, non_lexsorted_data",
83
+ [[[1, 2, 3], [9, 0, 1, 2, 3]], [list("abc"), list("fabcd")]],
84
+ )
85
+ def test_is_monotonic(self, data, non_lexsorted_data):
86
+ c = CategoricalIndex(data)
87
+ assert c.is_monotonic_increasing is True
88
+ assert c.is_monotonic_decreasing is False
89
+
90
+ c = CategoricalIndex(data, ordered=True)
91
+ assert c.is_monotonic_increasing is True
92
+ assert c.is_monotonic_decreasing is False
93
+
94
+ c = CategoricalIndex(data, categories=reversed(data))
95
+ assert c.is_monotonic_increasing is False
96
+ assert c.is_monotonic_decreasing is True
97
+
98
+ c = CategoricalIndex(data, categories=reversed(data), ordered=True)
99
+ assert c.is_monotonic_increasing is False
100
+ assert c.is_monotonic_decreasing is True
101
+
102
+ # test when data is neither monotonic increasing nor decreasing
103
+ reordered_data = [data[0], data[2], data[1]]
104
+ c = CategoricalIndex(reordered_data, categories=reversed(data))
105
+ assert c.is_monotonic_increasing is False
106
+ assert c.is_monotonic_decreasing is False
107
+
108
+ # non lexsorted categories
109
+ categories = non_lexsorted_data
110
+
111
+ c = CategoricalIndex(categories[:2], categories=categories)
112
+ assert c.is_monotonic_increasing is True
113
+ assert c.is_monotonic_decreasing is False
114
+
115
+ c = CategoricalIndex(categories[1:3], categories=categories)
116
+ assert c.is_monotonic_increasing is True
117
+ assert c.is_monotonic_decreasing is False
118
+
119
+ def test_has_duplicates(self):
120
+ idx = CategoricalIndex([0, 0, 0], name="foo")
121
+ assert idx.is_unique is False
122
+ assert idx.has_duplicates is True
123
+
124
+ idx = CategoricalIndex([0, 1], categories=[2, 3], name="foo")
125
+ assert idx.is_unique is False
126
+ assert idx.has_duplicates is True
127
+
128
+ idx = CategoricalIndex([0, 1, 2, 3], categories=[1, 2, 3], name="foo")
129
+ assert idx.is_unique is True
130
+ assert idx.has_duplicates is False
131
+
132
+ @pytest.mark.parametrize(
133
+ "data, categories, expected",
134
+ [
135
+ (
136
+ [1, 1, 1],
137
+ [1, 2, 3],
138
+ {
139
+ "first": np.array([False, True, True]),
140
+ "last": np.array([True, True, False]),
141
+ False: np.array([True, True, True]),
142
+ },
143
+ ),
144
+ (
145
+ [1, 1, 1],
146
+ list("abc"),
147
+ {
148
+ "first": np.array([False, True, True]),
149
+ "last": np.array([True, True, False]),
150
+ False: np.array([True, True, True]),
151
+ },
152
+ ),
153
+ (
154
+ [2, "a", "b"],
155
+ list("abc"),
156
+ {
157
+ "first": np.zeros(shape=(3), dtype=np.bool_),
158
+ "last": np.zeros(shape=(3), dtype=np.bool_),
159
+ False: np.zeros(shape=(3), dtype=np.bool_),
160
+ },
161
+ ),
162
+ (
163
+ list("abb"),
164
+ list("abc"),
165
+ {
166
+ "first": np.array([False, False, True]),
167
+ "last": np.array([False, True, False]),
168
+ False: np.array([False, True, True]),
169
+ },
170
+ ),
171
+ ],
172
+ )
173
+ def test_drop_duplicates(self, data, categories, expected):
174
+ idx = CategoricalIndex(data, categories=categories, name="foo")
175
+ for keep, e in expected.items():
176
+ tm.assert_numpy_array_equal(idx.duplicated(keep=keep), e)
177
+ e = idx[~e]
178
+ result = idx.drop_duplicates(keep=keep)
179
+ tm.assert_index_equal(result, e)
180
+
181
+ @pytest.mark.parametrize(
182
+ "data, categories, expected_data",
183
+ [
184
+ ([1, 1, 1], [1, 2, 3], [1]),
185
+ ([1, 1, 1], list("abc"), [np.nan]),
186
+ ([1, 2, "a"], [1, 2, 3], [1, 2, np.nan]),
187
+ ([2, "a", "b"], list("abc"), [np.nan, "a", "b"]),
188
+ ],
189
+ )
190
+ def test_unique(self, data, categories, expected_data, ordered):
191
+ dtype = CategoricalDtype(categories, ordered=ordered)
192
+
193
+ idx = CategoricalIndex(data, dtype=dtype)
194
+ expected = CategoricalIndex(expected_data, dtype=dtype)
195
+ tm.assert_index_equal(idx.unique(), expected)
196
+
197
+ def test_repr_roundtrip(self):
198
+ ci = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True)
199
+ str(ci)
200
+ tm.assert_index_equal(eval(repr(ci)), ci, exact=True)
201
+
202
+ # formatting
203
+ str(ci)
204
+
205
+ # long format
206
+ # this is not reprable
207
+ ci = CategoricalIndex(np.random.default_rng(2).integers(0, 5, size=100))
208
+ str(ci)
209
+
210
+ def test_isin(self):
211
+ ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"])
212
+ tm.assert_numpy_array_equal(
213
+ ci.isin(["c"]), np.array([False, False, False, True, False, False])
214
+ )
215
+ tm.assert_numpy_array_equal(
216
+ ci.isin(["c", "a", "b"]), np.array([True] * 5 + [False])
217
+ )
218
+ tm.assert_numpy_array_equal(
219
+ ci.isin(["c", "a", "b", np.nan]), np.array([True] * 6)
220
+ )
221
+
222
+ # mismatched categorical -> coerced to ndarray so doesn't matter
223
+ result = ci.isin(ci.set_categories(list("abcdefghi")))
224
+ expected = np.array([True] * 6)
225
+ tm.assert_numpy_array_equal(result, expected)
226
+
227
+ result = ci.isin(ci.set_categories(list("defghi")))
228
+ expected = np.array([False] * 5 + [True])
229
+ tm.assert_numpy_array_equal(result, expected)
230
+
231
+ def test_isin_overlapping_intervals(self):
232
+ # GH 34974
233
+ idx = pd.IntervalIndex([pd.Interval(0, 2), pd.Interval(0, 1)])
234
+ result = CategoricalIndex(idx).isin(idx)
235
+ expected = np.array([True, True])
236
+ tm.assert_numpy_array_equal(result, expected)
237
+
238
+ def test_identical(self):
239
+ ci1 = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True)
240
+ ci2 = CategoricalIndex(["a", "b"], categories=["a", "b", "c"], ordered=True)
241
+ assert ci1.identical(ci1)
242
+ assert ci1.identical(ci1.copy())
243
+ assert not ci1.identical(ci2)
244
+
245
+ def test_ensure_copied_data(self):
246
+ # gh-12309: Check the "copy" argument of each
247
+ # Index.__new__ is honored.
248
+ #
249
+ # Must be tested separately from other indexes because
250
+ # self.values is not an ndarray.
251
+ index = CategoricalIndex(list("ab") * 5)
252
+
253
+ result = CategoricalIndex(index.values, copy=True)
254
+ tm.assert_index_equal(index, result)
255
+ assert not np.shares_memory(result._data._codes, index._data._codes)
256
+
257
+ result = CategoricalIndex(index.values, copy=False)
258
+ assert result._data._codes is index._data._codes
259
+
260
+
261
+ class TestCategoricalIndex2:
262
+ def test_view_i8(self):
263
+ # GH#25464
264
+ ci = CategoricalIndex(list("ab") * 50)
265
+ msg = "When changing to a larger dtype, its size must be a divisor"
266
+ with pytest.raises(ValueError, match=msg):
267
+ ci.view("i8")
268
+ with pytest.raises(ValueError, match=msg):
269
+ ci._data.view("i8")
270
+
271
+ ci = ci[:-4] # length divisible by 8
272
+
273
+ res = ci.view("i8")
274
+ expected = ci._data.codes.view("i8")
275
+ tm.assert_numpy_array_equal(res, expected)
276
+
277
+ cat = ci._data
278
+ tm.assert_numpy_array_equal(cat.view("i8"), expected)
279
+
280
+ @pytest.mark.parametrize(
281
+ "dtype, engine_type",
282
+ [
283
+ (np.int8, libindex.Int8Engine),
284
+ (np.int16, libindex.Int16Engine),
285
+ (np.int32, libindex.Int32Engine),
286
+ (np.int64, libindex.Int64Engine),
287
+ ],
288
+ )
289
+ def test_engine_type(self, dtype, engine_type):
290
+ if dtype != np.int64:
291
+ # num. of uniques required to push CategoricalIndex.codes to a
292
+ # dtype (128 categories required for .codes dtype to be int16 etc.)
293
+ num_uniques = {np.int8: 1, np.int16: 128, np.int32: 32768}[dtype]
294
+ ci = CategoricalIndex(range(num_uniques))
295
+ else:
296
+ # having 2**32 - 2**31 categories would be very memory-intensive,
297
+ # so we cheat a bit with the dtype
298
+ ci = CategoricalIndex(range(32768)) # == 2**16 - 2**(16 - 1)
299
+ arr = ci.values._ndarray.astype("int64")
300
+ NDArrayBacked.__init__(ci._data, arr, ci.dtype)
301
+ assert np.issubdtype(ci.codes.dtype, dtype)
302
+ assert isinstance(ci._engine, engine_type)
303
+
304
+ @pytest.mark.parametrize(
305
+ "func,op_name",
306
+ [
307
+ (lambda idx: idx - idx, "__sub__"),
308
+ (lambda idx: idx + idx, "__add__"),
309
+ (lambda idx: idx - ["a", "b"], "__sub__"),
310
+ (lambda idx: idx + ["a", "b"], "__add__"),
311
+ (lambda idx: ["a", "b"] - idx, "__rsub__"),
312
+ (lambda idx: ["a", "b"] + idx, "__radd__"),
313
+ ],
314
+ )
315
+ def test_disallow_addsub_ops(self, func, op_name):
316
+ # GH 10039
317
+ # set ops (+/-) raise TypeError
318
+ idx = Index(Categorical(["a", "b"]))
319
+ cat_or_list = "'(Categorical|list)' and '(Categorical|list)'"
320
+ msg = "|".join(
321
+ [
322
+ f"cannot perform {op_name} with this index type: CategoricalIndex",
323
+ "can only concatenate list",
324
+ rf"unsupported operand type\(s\) for [\+-]: {cat_or_list}",
325
+ ]
326
+ )
327
+ with pytest.raises(TypeError, match=msg):
328
+ func(idx)
329
+
330
+ def test_method_delegation(self):
331
+ ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"))
332
+ result = ci.set_categories(list("cab"))
333
+ tm.assert_index_equal(
334
+ result, CategoricalIndex(list("aabbca"), categories=list("cab"))
335
+ )
336
+
337
+ ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
338
+ result = ci.rename_categories(list("efg"))
339
+ tm.assert_index_equal(
340
+ result, CategoricalIndex(list("ffggef"), categories=list("efg"))
341
+ )
342
+
343
+ # GH18862 (let rename_categories take callables)
344
+ result = ci.rename_categories(lambda x: x.upper())
345
+ tm.assert_index_equal(
346
+ result, CategoricalIndex(list("AABBCA"), categories=list("CAB"))
347
+ )
348
+
349
+ ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
350
+ result = ci.add_categories(["d"])
351
+ tm.assert_index_equal(
352
+ result, CategoricalIndex(list("aabbca"), categories=list("cabd"))
353
+ )
354
+
355
+ ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
356
+ result = ci.remove_categories(["c"])
357
+ tm.assert_index_equal(
358
+ result,
359
+ CategoricalIndex(list("aabb") + [np.nan] + ["a"], categories=list("ab")),
360
+ )
361
+
362
+ ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"))
363
+ result = ci.as_unordered()
364
+ tm.assert_index_equal(result, ci)
365
+
366
+ ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"))
367
+ result = ci.as_ordered()
368
+ tm.assert_index_equal(
369
+ result,
370
+ CategoricalIndex(list("aabbca"), categories=list("cabdef"), ordered=True),
371
+ )
372
+
373
+ # invalid
374
+ msg = "cannot use inplace with CategoricalIndex"
375
+ with pytest.raises(ValueError, match=msg):
376
+ ci.set_categories(list("cab"), inplace=True)
377
+
378
+ def test_remove_maintains_order(self):
379
+ ci = CategoricalIndex(list("abcdda"), categories=list("abcd"))
380
+ result = ci.reorder_categories(["d", "c", "b", "a"], ordered=True)
381
+ tm.assert_index_equal(
382
+ result,
383
+ CategoricalIndex(list("abcdda"), categories=list("dcba"), ordered=True),
384
+ )
385
+ result = result.remove_categories(["c"])
386
+ tm.assert_index_equal(
387
+ result,
388
+ CategoricalIndex(
389
+ ["a", "b", np.nan, "d", "d", "a"], categories=list("dba"), ordered=True
390
+ ),
391
+ )
py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_constructors.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ from pandas import (
5
+ Categorical,
6
+ CategoricalDtype,
7
+ CategoricalIndex,
8
+ Index,
9
+ )
10
+ import pandas._testing as tm
11
+
12
+
13
+ class TestCategoricalIndexConstructors:
14
+ def test_construction_disallows_scalar(self):
15
+ msg = "must be called with a collection of some kind"
16
+ with pytest.raises(TypeError, match=msg):
17
+ CategoricalIndex(data=1, categories=list("abcd"), ordered=False)
18
+ with pytest.raises(TypeError, match=msg):
19
+ CategoricalIndex(categories=list("abcd"), ordered=False)
20
+
21
+ def test_construction(self):
22
+ ci = CategoricalIndex(list("aabbca"), categories=list("abcd"), ordered=False)
23
+ categories = ci.categories
24
+
25
+ result = Index(ci)
26
+ tm.assert_index_equal(result, ci, exact=True)
27
+ assert not result.ordered
28
+
29
+ result = Index(ci.values)
30
+ tm.assert_index_equal(result, ci, exact=True)
31
+ assert not result.ordered
32
+
33
+ # empty
34
+ result = CategoricalIndex([], categories=categories)
35
+ tm.assert_index_equal(result.categories, Index(categories))
36
+ tm.assert_numpy_array_equal(result.codes, np.array([], dtype="int8"))
37
+ assert not result.ordered
38
+
39
+ # passing categories
40
+ result = CategoricalIndex(list("aabbca"), categories=categories)
41
+ tm.assert_index_equal(result.categories, Index(categories))
42
+ tm.assert_numpy_array_equal(
43
+ result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
44
+ )
45
+
46
+ c = Categorical(list("aabbca"))
47
+ result = CategoricalIndex(c)
48
+ tm.assert_index_equal(result.categories, Index(list("abc")))
49
+ tm.assert_numpy_array_equal(
50
+ result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
51
+ )
52
+ assert not result.ordered
53
+
54
+ result = CategoricalIndex(c, categories=categories)
55
+ tm.assert_index_equal(result.categories, Index(categories))
56
+ tm.assert_numpy_array_equal(
57
+ result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
58
+ )
59
+ assert not result.ordered
60
+
61
+ ci = CategoricalIndex(c, categories=list("abcd"))
62
+ result = CategoricalIndex(ci)
63
+ tm.assert_index_equal(result.categories, Index(categories))
64
+ tm.assert_numpy_array_equal(
65
+ result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
66
+ )
67
+ assert not result.ordered
68
+
69
+ result = CategoricalIndex(ci, categories=list("ab"))
70
+ tm.assert_index_equal(result.categories, Index(list("ab")))
71
+ tm.assert_numpy_array_equal(
72
+ result.codes, np.array([0, 0, 1, 1, -1, 0], dtype="int8")
73
+ )
74
+ assert not result.ordered
75
+
76
+ result = CategoricalIndex(ci, categories=list("ab"), ordered=True)
77
+ tm.assert_index_equal(result.categories, Index(list("ab")))
78
+ tm.assert_numpy_array_equal(
79
+ result.codes, np.array([0, 0, 1, 1, -1, 0], dtype="int8")
80
+ )
81
+ assert result.ordered
82
+
83
+ result = CategoricalIndex(ci, categories=list("ab"), ordered=True)
84
+ expected = CategoricalIndex(
85
+ ci, categories=list("ab"), ordered=True, dtype="category"
86
+ )
87
+ tm.assert_index_equal(result, expected, exact=True)
88
+
89
+ # turn me to an Index
90
+ result = Index(np.array(ci))
91
+ assert isinstance(result, Index)
92
+ assert not isinstance(result, CategoricalIndex)
93
+
94
+ def test_construction_with_dtype(self):
95
+ # specify dtype
96
+ ci = CategoricalIndex(list("aabbca"), categories=list("abc"), ordered=False)
97
+
98
+ result = Index(np.array(ci), dtype="category")
99
+ tm.assert_index_equal(result, ci, exact=True)
100
+
101
+ result = Index(np.array(ci).tolist(), dtype="category")
102
+ tm.assert_index_equal(result, ci, exact=True)
103
+
104
+ # these are generally only equal when the categories are reordered
105
+ ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
106
+
107
+ result = Index(np.array(ci), dtype="category").reorder_categories(ci.categories)
108
+ tm.assert_index_equal(result, ci, exact=True)
109
+
110
+ # make sure indexes are handled
111
+ idx = Index(range(3))
112
+ expected = CategoricalIndex([0, 1, 2], categories=idx, ordered=True)
113
+ result = CategoricalIndex(idx, categories=idx, ordered=True)
114
+ tm.assert_index_equal(result, expected, exact=True)
115
+
116
+ def test_construction_empty_with_bool_categories(self):
117
+ # see GH#22702
118
+ cat = CategoricalIndex([], categories=[True, False])
119
+ categories = sorted(cat.categories.tolist())
120
+ assert categories == [False, True]
121
+
122
+ def test_construction_with_categorical_dtype(self):
123
+ # construction with CategoricalDtype
124
+ # GH#18109
125
+ data, cats, ordered = "a a b b".split(), "c b a".split(), True
126
+ dtype = CategoricalDtype(categories=cats, ordered=ordered)
127
+
128
+ result = CategoricalIndex(data, dtype=dtype)
129
+ expected = CategoricalIndex(data, categories=cats, ordered=ordered)
130
+ tm.assert_index_equal(result, expected, exact=True)
131
+
132
+ # GH#19032
133
+ result = Index(data, dtype=dtype)
134
+ tm.assert_index_equal(result, expected, exact=True)
135
+
136
+ # error when combining categories/ordered and dtype kwargs
137
+ msg = "Cannot specify `categories` or `ordered` together with `dtype`."
138
+ with pytest.raises(ValueError, match=msg):
139
+ CategoricalIndex(data, categories=cats, dtype=dtype)
140
+
141
+ with pytest.raises(ValueError, match=msg):
142
+ CategoricalIndex(data, ordered=ordered, dtype=dtype)
py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_equals.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ from pandas import (
5
+ Categorical,
6
+ CategoricalIndex,
7
+ Index,
8
+ MultiIndex,
9
+ )
10
+
11
+
12
+ class TestEquals:
13
+ def test_equals_categorical(self):
14
+ ci1 = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True)
15
+ ci2 = CategoricalIndex(["a", "b"], categories=["a", "b", "c"], ordered=True)
16
+
17
+ assert ci1.equals(ci1)
18
+ assert not ci1.equals(ci2)
19
+ assert ci1.equals(ci1.astype(object))
20
+ assert ci1.astype(object).equals(ci1)
21
+
22
+ assert (ci1 == ci1).all()
23
+ assert not (ci1 != ci1).all()
24
+ assert not (ci1 > ci1).all()
25
+ assert not (ci1 < ci1).all()
26
+ assert (ci1 <= ci1).all()
27
+ assert (ci1 >= ci1).all()
28
+
29
+ assert not (ci1 == 1).all()
30
+ assert (ci1 == Index(["a", "b"])).all()
31
+ assert (ci1 == ci1.values).all()
32
+
33
+ # invalid comparisons
34
+ with pytest.raises(ValueError, match="Lengths must match"):
35
+ ci1 == Index(["a", "b", "c"])
36
+
37
+ msg = "Categoricals can only be compared if 'categories' are the same"
38
+ with pytest.raises(TypeError, match=msg):
39
+ ci1 == ci2
40
+ with pytest.raises(TypeError, match=msg):
41
+ ci1 == Categorical(ci1.values, ordered=False)
42
+ with pytest.raises(TypeError, match=msg):
43
+ ci1 == Categorical(ci1.values, categories=list("abc"))
44
+
45
+ # tests
46
+ # make sure that we are testing for category inclusion properly
47
+ ci = CategoricalIndex(list("aabca"), categories=["c", "a", "b"])
48
+ assert not ci.equals(list("aabca"))
49
+ # Same categories, but different order
50
+ # Unordered
51
+ assert ci.equals(CategoricalIndex(list("aabca")))
52
+ # Ordered
53
+ assert not ci.equals(CategoricalIndex(list("aabca"), ordered=True))
54
+ assert ci.equals(ci.copy())
55
+
56
+ ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"])
57
+ assert not ci.equals(list("aabca"))
58
+ assert not ci.equals(CategoricalIndex(list("aabca")))
59
+ assert ci.equals(ci.copy())
60
+
61
+ ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"])
62
+ assert not ci.equals(list("aabca") + [np.nan])
63
+ assert ci.equals(CategoricalIndex(list("aabca") + [np.nan]))
64
+ assert not ci.equals(CategoricalIndex(list("aabca") + [np.nan], ordered=True))
65
+ assert ci.equals(ci.copy())
66
+
67
+ def test_equals_categorical_unordered(self):
68
+ # https://github.com/pandas-dev/pandas/issues/16603
69
+ a = CategoricalIndex(["A"], categories=["A", "B"])
70
+ b = CategoricalIndex(["A"], categories=["B", "A"])
71
+ c = CategoricalIndex(["C"], categories=["B", "A"])
72
+ assert a.equals(b)
73
+ assert not a.equals(c)
74
+ assert not b.equals(c)
75
+
76
+ def test_equals_non_category(self):
77
+ # GH#37667 Case where other contains a value not among ci's
78
+ # categories ("D") and also contains np.nan
79
+ ci = CategoricalIndex(["A", "B", np.nan, np.nan])
80
+ other = Index(["A", "B", "D", np.nan])
81
+
82
+ assert not ci.equals(other)
83
+
84
+ def test_equals_multiindex(self):
85
+ # dont raise NotImplementedError when calling is_dtype_compat
86
+
87
+ mi = MultiIndex.from_arrays([["A", "B", "C", "D"], range(4)])
88
+ ci = mi.to_flat_index().astype("category")
89
+
90
+ assert not ci.equals(mi)
91
+
92
+ def test_equals_string_dtype(self, any_string_dtype):
93
+ # GH#55364
94
+ idx = CategoricalIndex(list("abc"), name="B")
95
+ other = Index(["a", "b", "c"], name="B", dtype=any_string_dtype)
96
+ assert idx.equals(other)
py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_fillna.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ from pandas import CategoricalIndex
5
+ import pandas._testing as tm
6
+
7
+
8
+ class TestFillNA:
9
+ def test_fillna_categorical(self):
10
+ # GH#11343
11
+ idx = CategoricalIndex([1.0, np.nan, 3.0, 1.0], name="x")
12
+ # fill by value in categories
13
+ exp = CategoricalIndex([1.0, 1.0, 3.0, 1.0], name="x")
14
+ tm.assert_index_equal(idx.fillna(1.0), exp)
15
+
16
+ cat = idx._data
17
+
18
+ # fill by value not in categories raises TypeError on EA, casts on CI
19
+ msg = "Cannot setitem on a Categorical with a new category"
20
+ with pytest.raises(TypeError, match=msg):
21
+ cat.fillna(2.0)
22
+
23
+ result = idx.fillna(2.0)
24
+ expected = idx.astype(object).fillna(2.0)
25
+ tm.assert_index_equal(result, expected)
26
+
27
+ def test_fillna_copies_with_no_nas(self):
28
+ # Nothing to fill, should still get a copy for the Categorical method,
29
+ # but OK to get a view on CategoricalIndex method
30
+ ci = CategoricalIndex([0, 1, 1])
31
+ result = ci.fillna(0)
32
+ assert result is not ci
33
+ assert tm.shares_memory(result, ci)
34
+
35
+ # But at the EA level we always get a copy.
36
+ cat = ci._data
37
+ result = cat.fillna(0)
38
+ assert result._ndarray is not cat._ndarray
39
+ assert result._ndarray.base is None
40
+ assert not tm.shares_memory(result, cat)
41
+
42
+ def test_fillna_validates_with_no_nas(self):
43
+ # We validate the fill value even if fillna is a no-op
44
+ ci = CategoricalIndex([2, 3, 3])
45
+ cat = ci._data
46
+
47
+ msg = "Cannot setitem on a Categorical with a new category"
48
+ res = ci.fillna(False)
49
+ # nothing to fill, so we dont cast
50
+ tm.assert_index_equal(res, ci)
51
+
52
+ # Same check directly on the Categorical
53
+ with pytest.raises(TypeError, match=msg):
54
+ cat.fillna(False)
py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_formats.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Tests for CategoricalIndex.__repr__ and related methods.
3
+ """
4
+ import pytest
5
+
6
+ from pandas._config import using_string_dtype
7
+ import pandas._config.config as cf
8
+
9
+ from pandas import CategoricalIndex
10
+ import pandas._testing as tm
11
+
12
+
13
+ class TestCategoricalIndexRepr:
14
+ def test_format_different_scalar_lengths(self):
15
+ # GH#35439
16
+ idx = CategoricalIndex(["aaaaaaaaa", "b"])
17
+ expected = ["aaaaaaaaa", "b"]
18
+ msg = r"CategoricalIndex\.format is deprecated"
19
+ with tm.assert_produces_warning(FutureWarning, match=msg):
20
+ assert idx.format() == expected
21
+
22
+ @pytest.mark.xfail(using_string_dtype(), reason="repr different")
23
+ def test_string_categorical_index_repr(self):
24
+ # short
25
+ idx = CategoricalIndex(["a", "bb", "ccc"])
26
+ expected = """CategoricalIndex(['a', 'bb', 'ccc'], categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" # noqa: E501
27
+ assert repr(idx) == expected
28
+
29
+ # multiple lines
30
+ idx = CategoricalIndex(["a", "bb", "ccc"] * 10)
31
+ expected = """CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',
32
+ 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb',
33
+ 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],
34
+ categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" # noqa: E501
35
+
36
+ assert repr(idx) == expected
37
+
38
+ # truncated
39
+ idx = CategoricalIndex(["a", "bb", "ccc"] * 100)
40
+ expected = """CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',
41
+ ...
42
+ 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],
43
+ categories=['a', 'bb', 'ccc'], ordered=False, dtype='category', length=300)""" # noqa: E501
44
+
45
+ assert repr(idx) == expected
46
+
47
+ # larger categories
48
+ idx = CategoricalIndex(list("abcdefghijklmmo"))
49
+ expected = """CategoricalIndex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
50
+ 'm', 'm', 'o'],
51
+ categories=['a', 'b', 'c', 'd', ..., 'k', 'l', 'm', 'o'], ordered=False, dtype='category')""" # noqa: E501
52
+
53
+ assert repr(idx) == expected
54
+
55
+ # short
56
+ idx = CategoricalIndex(["あ", "いい", "ううう"])
57
+ expected = """CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa: E501
58
+ assert repr(idx) == expected
59
+
60
+ # multiple lines
61
+ idx = CategoricalIndex(["あ", "いい", "ううう"] * 10)
62
+ expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ',
63
+ 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
64
+ 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'],
65
+ categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa: E501
66
+
67
+ assert repr(idx) == expected
68
+
69
+ # truncated
70
+ idx = CategoricalIndex(["あ", "いい", "ううう"] * 100)
71
+ expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ',
72
+ ...
73
+ 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'],
74
+ categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" # noqa: E501
75
+
76
+ assert repr(idx) == expected
77
+
78
+ # larger categories
79
+ idx = CategoricalIndex(list("あいうえおかきくけこさしすせそ"))
80
+ expected = """CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', 'さ', 'し',
81
+ 'す', 'せ', 'そ'],
82
+ categories=['あ', 'い', 'う', 'え', ..., 'し', 'す', 'せ', 'そ'], ordered=False, dtype='category')""" # noqa: E501
83
+
84
+ assert repr(idx) == expected
85
+
86
+ # Enable Unicode option -----------------------------------------
87
+ with cf.option_context("display.unicode.east_asian_width", True):
88
+ # short
89
+ idx = CategoricalIndex(["あ", "いい", "ううう"])
90
+ expected = """CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa: E501
91
+ assert repr(idx) == expected
92
+
93
+ # multiple lines
94
+ idx = CategoricalIndex(["あ", "いい", "ううう"] * 10)
95
+ expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
96
+ 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',
97
+ 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
98
+ 'う��う', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'],
99
+ categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa: E501
100
+
101
+ assert repr(idx) == expected
102
+
103
+ # truncated
104
+ idx = CategoricalIndex(["あ", "いい", "ううう"] * 100)
105
+ expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
106
+ 'ううう', 'あ',
107
+ ...
108
+ 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',
109
+ 'あ', 'いい', 'ううう'],
110
+ categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" # noqa: E501
111
+
112
+ assert repr(idx) == expected
113
+
114
+ # larger categories
115
+ idx = CategoricalIndex(list("あいうえおかきくけこさしすせそ"))
116
+ expected = """CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ',
117
+ 'さ', 'し', 'す', 'せ', 'そ'],
118
+ categories=['あ', 'い', 'う', 'え', ..., 'し', 'す', 'せ', 'そ'], ordered=False, dtype='category')""" # noqa: E501
119
+
120
+ assert repr(idx) == expected
py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_indexing.py ADDED
@@ -0,0 +1,420 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ from pandas.errors import InvalidIndexError
5
+
6
+ import pandas as pd
7
+ from pandas import (
8
+ CategoricalIndex,
9
+ Index,
10
+ IntervalIndex,
11
+ Timestamp,
12
+ )
13
+ import pandas._testing as tm
14
+
15
+
16
+ class TestTake:
17
+ def test_take_fill_value(self):
18
+ # GH 12631
19
+
20
+ # numeric category
21
+ idx = CategoricalIndex([1, 2, 3], name="xxx")
22
+ result = idx.take(np.array([1, 0, -1]))
23
+ expected = CategoricalIndex([2, 1, 3], name="xxx")
24
+ tm.assert_index_equal(result, expected)
25
+ tm.assert_categorical_equal(result.values, expected.values)
26
+
27
+ # fill_value
28
+ result = idx.take(np.array([1, 0, -1]), fill_value=True)
29
+ expected = CategoricalIndex([2, 1, np.nan], categories=[1, 2, 3], name="xxx")
30
+ tm.assert_index_equal(result, expected)
31
+ tm.assert_categorical_equal(result.values, expected.values)
32
+
33
+ # allow_fill=False
34
+ result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
35
+ expected = CategoricalIndex([2, 1, 3], name="xxx")
36
+ tm.assert_index_equal(result, expected)
37
+ tm.assert_categorical_equal(result.values, expected.values)
38
+
39
+ # object category
40
+ idx = CategoricalIndex(
41
+ list("CBA"), categories=list("ABC"), ordered=True, name="xxx"
42
+ )
43
+ result = idx.take(np.array([1, 0, -1]))
44
+ expected = CategoricalIndex(
45
+ list("BCA"), categories=list("ABC"), ordered=True, name="xxx"
46
+ )
47
+ tm.assert_index_equal(result, expected)
48
+ tm.assert_categorical_equal(result.values, expected.values)
49
+
50
+ # fill_value
51
+ result = idx.take(np.array([1, 0, -1]), fill_value=True)
52
+ expected = CategoricalIndex(
53
+ ["B", "C", np.nan], categories=list("ABC"), ordered=True, name="xxx"
54
+ )
55
+ tm.assert_index_equal(result, expected)
56
+ tm.assert_categorical_equal(result.values, expected.values)
57
+
58
+ # allow_fill=False
59
+ result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
60
+ expected = CategoricalIndex(
61
+ list("BCA"), categories=list("ABC"), ordered=True, name="xxx"
62
+ )
63
+ tm.assert_index_equal(result, expected)
64
+ tm.assert_categorical_equal(result.values, expected.values)
65
+
66
+ msg = (
67
+ "When allow_fill=True and fill_value is not None, "
68
+ "all indices must be >= -1"
69
+ )
70
+ with pytest.raises(ValueError, match=msg):
71
+ idx.take(np.array([1, 0, -2]), fill_value=True)
72
+ with pytest.raises(ValueError, match=msg):
73
+ idx.take(np.array([1, 0, -5]), fill_value=True)
74
+
75
+ msg = "index -5 is out of bounds for (axis 0 with )?size 3"
76
+ with pytest.raises(IndexError, match=msg):
77
+ idx.take(np.array([1, -5]))
78
+
79
+ def test_take_fill_value_datetime(self):
80
+ # datetime category
81
+ idx = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx")
82
+ idx = CategoricalIndex(idx)
83
+ result = idx.take(np.array([1, 0, -1]))
84
+ expected = pd.DatetimeIndex(
85
+ ["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx"
86
+ )
87
+ expected = CategoricalIndex(expected)
88
+ tm.assert_index_equal(result, expected)
89
+
90
+ # fill_value
91
+ result = idx.take(np.array([1, 0, -1]), fill_value=True)
92
+ expected = pd.DatetimeIndex(["2011-02-01", "2011-01-01", "NaT"], name="xxx")
93
+ exp_cats = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"])
94
+ expected = CategoricalIndex(expected, categories=exp_cats)
95
+ tm.assert_index_equal(result, expected)
96
+
97
+ # allow_fill=False
98
+ result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
99
+ expected = pd.DatetimeIndex(
100
+ ["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx"
101
+ )
102
+ expected = CategoricalIndex(expected)
103
+ tm.assert_index_equal(result, expected)
104
+
105
+ msg = (
106
+ "When allow_fill=True and fill_value is not None, "
107
+ "all indices must be >= -1"
108
+ )
109
+ with pytest.raises(ValueError, match=msg):
110
+ idx.take(np.array([1, 0, -2]), fill_value=True)
111
+ with pytest.raises(ValueError, match=msg):
112
+ idx.take(np.array([1, 0, -5]), fill_value=True)
113
+
114
+ msg = "index -5 is out of bounds for (axis 0 with )?size 3"
115
+ with pytest.raises(IndexError, match=msg):
116
+ idx.take(np.array([1, -5]))
117
+
118
+ def test_take_invalid_kwargs(self):
119
+ idx = CategoricalIndex([1, 2, 3], name="foo")
120
+ indices = [1, 0, -1]
121
+
122
+ msg = r"take\(\) got an unexpected keyword argument 'foo'"
123
+ with pytest.raises(TypeError, match=msg):
124
+ idx.take(indices, foo=2)
125
+
126
+ msg = "the 'out' parameter is not supported"
127
+ with pytest.raises(ValueError, match=msg):
128
+ idx.take(indices, out=indices)
129
+
130
+ msg = "the 'mode' parameter is not supported"
131
+ with pytest.raises(ValueError, match=msg):
132
+ idx.take(indices, mode="clip")
133
+
134
+
135
+ class TestGetLoc:
136
+ def test_get_loc(self):
137
+ # GH 12531
138
+ cidx1 = CategoricalIndex(list("abcde"), categories=list("edabc"))
139
+ idx1 = Index(list("abcde"))
140
+ assert cidx1.get_loc("a") == idx1.get_loc("a")
141
+ assert cidx1.get_loc("e") == idx1.get_loc("e")
142
+
143
+ for i in [cidx1, idx1]:
144
+ with pytest.raises(KeyError, match="'NOT-EXIST'"):
145
+ i.get_loc("NOT-EXIST")
146
+
147
+ # non-unique
148
+ cidx2 = CategoricalIndex(list("aacded"), categories=list("edabc"))
149
+ idx2 = Index(list("aacded"))
150
+
151
+ # results in bool array
152
+ res = cidx2.get_loc("d")
153
+ tm.assert_numpy_array_equal(res, idx2.get_loc("d"))
154
+ tm.assert_numpy_array_equal(
155
+ res, np.array([False, False, False, True, False, True])
156
+ )
157
+ # unique element results in scalar
158
+ res = cidx2.get_loc("e")
159
+ assert res == idx2.get_loc("e")
160
+ assert res == 4
161
+
162
+ for i in [cidx2, idx2]:
163
+ with pytest.raises(KeyError, match="'NOT-EXIST'"):
164
+ i.get_loc("NOT-EXIST")
165
+
166
+ # non-unique, sliceable
167
+ cidx3 = CategoricalIndex(list("aabbb"), categories=list("abc"))
168
+ idx3 = Index(list("aabbb"))
169
+
170
+ # results in slice
171
+ res = cidx3.get_loc("a")
172
+ assert res == idx3.get_loc("a")
173
+ assert res == slice(0, 2, None)
174
+
175
+ res = cidx3.get_loc("b")
176
+ assert res == idx3.get_loc("b")
177
+ assert res == slice(2, 5, None)
178
+
179
+ for i in [cidx3, idx3]:
180
+ with pytest.raises(KeyError, match="'c'"):
181
+ i.get_loc("c")
182
+
183
+ def test_get_loc_unique(self):
184
+ cidx = CategoricalIndex(list("abc"))
185
+ result = cidx.get_loc("b")
186
+ assert result == 1
187
+
188
+ def test_get_loc_monotonic_nonunique(self):
189
+ cidx = CategoricalIndex(list("abbc"))
190
+ result = cidx.get_loc("b")
191
+ expected = slice(1, 3, None)
192
+ assert result == expected
193
+
194
+ def test_get_loc_nonmonotonic_nonunique(self):
195
+ cidx = CategoricalIndex(list("abcb"))
196
+ result = cidx.get_loc("b")
197
+ expected = np.array([False, True, False, True], dtype=bool)
198
+ tm.assert_numpy_array_equal(result, expected)
199
+
200
+ def test_get_loc_nan(self):
201
+ # GH#41933
202
+ ci = CategoricalIndex(["A", "B", np.nan])
203
+ res = ci.get_loc(np.nan)
204
+
205
+ assert res == 2
206
+
207
+
208
+ class TestGetIndexer:
209
+ def test_get_indexer_base(self):
210
+ # Determined by cat ordering.
211
+ idx = CategoricalIndex(list("cab"), categories=list("cab"))
212
+ expected = np.arange(len(idx), dtype=np.intp)
213
+
214
+ actual = idx.get_indexer(idx)
215
+ tm.assert_numpy_array_equal(expected, actual)
216
+
217
+ with pytest.raises(ValueError, match="Invalid fill method"):
218
+ idx.get_indexer(idx, method="invalid")
219
+
220
+ def test_get_indexer_requires_unique(self):
221
+ ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
222
+ oidx = Index(np.array(ci))
223
+
224
+ msg = "Reindexing only valid with uniquely valued Index objects"
225
+
226
+ for n in [1, 2, 5, len(ci)]:
227
+ finder = oidx[np.random.default_rng(2).integers(0, len(ci), size=n)]
228
+
229
+ with pytest.raises(InvalidIndexError, match=msg):
230
+ ci.get_indexer(finder)
231
+
232
+ # see gh-17323
233
+ #
234
+ # Even when indexer is equal to the
235
+ # members in the index, we should
236
+ # respect duplicates instead of taking
237
+ # the fast-track path.
238
+ for finder in [list("aabbca"), list("aababca")]:
239
+ with pytest.raises(InvalidIndexError, match=msg):
240
+ ci.get_indexer(finder)
241
+
242
+ def test_get_indexer_non_unique(self):
243
+ idx1 = CategoricalIndex(list("aabcde"), categories=list("edabc"))
244
+ idx2 = CategoricalIndex(list("abf"))
245
+
246
+ for indexer in [idx2, list("abf"), Index(list("abf"))]:
247
+ msg = "Reindexing only valid with uniquely valued Index objects"
248
+ with pytest.raises(InvalidIndexError, match=msg):
249
+ idx1.get_indexer(indexer)
250
+
251
+ r1, _ = idx1.get_indexer_non_unique(indexer)
252
+ expected = np.array([0, 1, 2, -1], dtype=np.intp)
253
+ tm.assert_almost_equal(r1, expected)
254
+
255
+ def test_get_indexer_method(self):
256
+ idx1 = CategoricalIndex(list("aabcde"), categories=list("edabc"))
257
+ idx2 = CategoricalIndex(list("abf"))
258
+
259
+ msg = "method pad not yet implemented for CategoricalIndex"
260
+ with pytest.raises(NotImplementedError, match=msg):
261
+ idx2.get_indexer(idx1, method="pad")
262
+ msg = "method backfill not yet implemented for CategoricalIndex"
263
+ with pytest.raises(NotImplementedError, match=msg):
264
+ idx2.get_indexer(idx1, method="backfill")
265
+
266
+ msg = "method nearest not yet implemented for CategoricalIndex"
267
+ with pytest.raises(NotImplementedError, match=msg):
268
+ idx2.get_indexer(idx1, method="nearest")
269
+
270
+ def test_get_indexer_array(self):
271
+ arr = np.array(
272
+ [Timestamp("1999-12-31 00:00:00"), Timestamp("2000-12-31 00:00:00")],
273
+ dtype=object,
274
+ )
275
+ cats = [Timestamp("1999-12-31 00:00:00"), Timestamp("2000-12-31 00:00:00")]
276
+ ci = CategoricalIndex(cats, categories=cats, ordered=False, dtype="category")
277
+ result = ci.get_indexer(arr)
278
+ expected = np.array([0, 1], dtype="intp")
279
+ tm.assert_numpy_array_equal(result, expected)
280
+
281
+ def test_get_indexer_same_categories_same_order(self):
282
+ ci = CategoricalIndex(["a", "b"], categories=["a", "b"])
283
+
284
+ result = ci.get_indexer(CategoricalIndex(["b", "b"], categories=["a", "b"]))
285
+ expected = np.array([1, 1], dtype="intp")
286
+ tm.assert_numpy_array_equal(result, expected)
287
+
288
+ def test_get_indexer_same_categories_different_order(self):
289
+ # https://github.com/pandas-dev/pandas/issues/19551
290
+ ci = CategoricalIndex(["a", "b"], categories=["a", "b"])
291
+
292
+ result = ci.get_indexer(CategoricalIndex(["b", "b"], categories=["b", "a"]))
293
+ expected = np.array([1, 1], dtype="intp")
294
+ tm.assert_numpy_array_equal(result, expected)
295
+
296
+ def test_get_indexer_nans_in_index_and_target(self):
297
+ # GH 45361
298
+ ci = CategoricalIndex([1, 2, np.nan, 3])
299
+ other1 = [2, 3, 4, np.nan]
300
+ res1 = ci.get_indexer(other1)
301
+ expected1 = np.array([1, 3, -1, 2], dtype=np.intp)
302
+ tm.assert_numpy_array_equal(res1, expected1)
303
+ other2 = [1, 4, 2, 3]
304
+ res2 = ci.get_indexer(other2)
305
+ expected2 = np.array([0, -1, 1, 3], dtype=np.intp)
306
+ tm.assert_numpy_array_equal(res2, expected2)
307
+
308
+
309
+ class TestWhere:
310
+ def test_where(self, listlike_box):
311
+ klass = listlike_box
312
+
313
+ i = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
314
+ cond = [True] * len(i)
315
+ expected = i
316
+ result = i.where(klass(cond))
317
+ tm.assert_index_equal(result, expected)
318
+
319
+ cond = [False] + [True] * (len(i) - 1)
320
+ expected = CategoricalIndex([np.nan] + i[1:].tolist(), categories=i.categories)
321
+ result = i.where(klass(cond))
322
+ tm.assert_index_equal(result, expected)
323
+
324
+ def test_where_non_categories(self):
325
+ ci = CategoricalIndex(["a", "b", "c", "d"])
326
+ mask = np.array([True, False, True, False])
327
+
328
+ result = ci.where(mask, 2)
329
+ expected = Index(["a", 2, "c", 2], dtype=object)
330
+ tm.assert_index_equal(result, expected)
331
+
332
+ msg = "Cannot setitem on a Categorical with a new category"
333
+ with pytest.raises(TypeError, match=msg):
334
+ # Test the Categorical method directly
335
+ ci._data._where(mask, 2)
336
+
337
+
338
+ class TestContains:
339
+ def test_contains(self):
340
+ ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"), ordered=False)
341
+
342
+ assert "a" in ci
343
+ assert "z" not in ci
344
+ assert "e" not in ci
345
+ assert np.nan not in ci
346
+
347
+ # assert codes NOT in index
348
+ assert 0 not in ci
349
+ assert 1 not in ci
350
+
351
+ def test_contains_nan(self):
352
+ ci = CategoricalIndex(list("aabbca") + [np.nan], categories=list("cabdef"))
353
+ assert np.nan in ci
354
+
355
+ @pytest.mark.parametrize("unwrap", [True, False])
356
+ def test_contains_na_dtype(self, unwrap):
357
+ dti = pd.date_range("2016-01-01", periods=100).insert(0, pd.NaT)
358
+ pi = dti.to_period("D")
359
+ tdi = dti - dti[-1]
360
+ ci = CategoricalIndex(dti)
361
+
362
+ obj = ci
363
+ if unwrap:
364
+ obj = ci._data
365
+
366
+ assert np.nan in obj
367
+ assert None in obj
368
+ assert pd.NaT in obj
369
+ assert np.datetime64("NaT") in obj
370
+ assert np.timedelta64("NaT") not in obj
371
+
372
+ obj2 = CategoricalIndex(tdi)
373
+ if unwrap:
374
+ obj2 = obj2._data
375
+
376
+ assert np.nan in obj2
377
+ assert None in obj2
378
+ assert pd.NaT in obj2
379
+ assert np.datetime64("NaT") not in obj2
380
+ assert np.timedelta64("NaT") in obj2
381
+
382
+ obj3 = CategoricalIndex(pi)
383
+ if unwrap:
384
+ obj3 = obj3._data
385
+
386
+ assert np.nan in obj3
387
+ assert None in obj3
388
+ assert pd.NaT in obj3
389
+ assert np.datetime64("NaT") not in obj3
390
+ assert np.timedelta64("NaT") not in obj3
391
+
392
+ @pytest.mark.parametrize(
393
+ "item, expected",
394
+ [
395
+ (pd.Interval(0, 1), True),
396
+ (1.5, True),
397
+ (pd.Interval(0.5, 1.5), False),
398
+ ("a", False),
399
+ (Timestamp(1), False),
400
+ (pd.Timedelta(1), False),
401
+ ],
402
+ ids=str,
403
+ )
404
+ def test_contains_interval(self, item, expected):
405
+ # GH 23705
406
+ ci = CategoricalIndex(IntervalIndex.from_breaks(range(3)))
407
+ result = item in ci
408
+ assert result is expected
409
+
410
+ def test_contains_list(self):
411
+ # GH#21729
412
+ idx = CategoricalIndex([1, 2, 3])
413
+
414
+ assert "a" not in idx
415
+
416
+ with pytest.raises(TypeError, match="unhashable type"):
417
+ ["a"] in idx
418
+
419
+ with pytest.raises(TypeError, match="unhashable type"):
420
+ ["a", "b"] in idx
py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_map.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ import pandas as pd
5
+ from pandas import (
6
+ CategoricalIndex,
7
+ Index,
8
+ Series,
9
+ )
10
+ import pandas._testing as tm
11
+
12
+
13
+ @pytest.mark.parametrize(
14
+ "data, categories",
15
+ [
16
+ (list("abcbca"), list("cab")),
17
+ (pd.interval_range(0, 3).repeat(3), pd.interval_range(0, 3)),
18
+ ],
19
+ ids=["string", "interval"],
20
+ )
21
+ def test_map_str(data, categories, ordered):
22
+ # GH 31202 - override base class since we want to maintain categorical/ordered
23
+ index = CategoricalIndex(data, categories=categories, ordered=ordered)
24
+ result = index.map(str)
25
+ expected = CategoricalIndex(
26
+ map(str, data), categories=map(str, categories), ordered=ordered
27
+ )
28
+ tm.assert_index_equal(result, expected)
29
+
30
+
31
+ def test_map():
32
+ ci = CategoricalIndex(list("ABABC"), categories=list("CBA"), ordered=True)
33
+ result = ci.map(lambda x: x.lower())
34
+ exp = CategoricalIndex(list("ababc"), categories=list("cba"), ordered=True)
35
+ tm.assert_index_equal(result, exp)
36
+
37
+ ci = CategoricalIndex(
38
+ list("ABABC"), categories=list("BAC"), ordered=False, name="XXX"
39
+ )
40
+ result = ci.map(lambda x: x.lower())
41
+ exp = CategoricalIndex(
42
+ list("ababc"), categories=list("bac"), ordered=False, name="XXX"
43
+ )
44
+ tm.assert_index_equal(result, exp)
45
+
46
+ # GH 12766: Return an index not an array
47
+ tm.assert_index_equal(
48
+ ci.map(lambda x: 1), Index(np.array([1] * 5, dtype=np.int64), name="XXX")
49
+ )
50
+
51
+ # change categories dtype
52
+ ci = CategoricalIndex(list("ABABC"), categories=list("BAC"), ordered=False)
53
+
54
+ def f(x):
55
+ return {"A": 10, "B": 20, "C": 30}.get(x)
56
+
57
+ result = ci.map(f)
58
+ exp = CategoricalIndex([10, 20, 10, 20, 30], categories=[20, 10, 30], ordered=False)
59
+ tm.assert_index_equal(result, exp)
60
+
61
+ result = ci.map(Series([10, 20, 30], index=["A", "B", "C"]))
62
+ tm.assert_index_equal(result, exp)
63
+
64
+ result = ci.map({"A": 10, "B": 20, "C": 30})
65
+ tm.assert_index_equal(result, exp)
66
+
67
+
68
+ def test_map_with_categorical_series():
69
+ # GH 12756
70
+ a = Index([1, 2, 3, 4])
71
+ b = Series(["even", "odd", "even", "odd"], dtype="category")
72
+ c = Series(["even", "odd", "even", "odd"])
73
+
74
+ exp = CategoricalIndex(["odd", "even", "odd", np.nan])
75
+ tm.assert_index_equal(a.map(b), exp)
76
+ exp = Index(["odd", "even", "odd", np.nan])
77
+ tm.assert_index_equal(a.map(c), exp)
78
+
79
+
80
+ @pytest.mark.parametrize(
81
+ ("data", "f", "expected"),
82
+ (
83
+ ([1, 1, np.nan], pd.isna, CategoricalIndex([False, False, np.nan])),
84
+ ([1, 2, np.nan], pd.isna, Index([False, False, np.nan])),
85
+ ([1, 1, np.nan], {1: False}, CategoricalIndex([False, False, np.nan])),
86
+ ([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])),
87
+ (
88
+ [1, 1, np.nan],
89
+ Series([False, False]),
90
+ CategoricalIndex([False, False, np.nan]),
91
+ ),
92
+ (
93
+ [1, 2, np.nan],
94
+ Series([False, False, False]),
95
+ Index([False, False, np.nan]),
96
+ ),
97
+ ),
98
+ )
99
+ def test_map_with_nan_ignore(data, f, expected): # GH 24241
100
+ values = CategoricalIndex(data)
101
+ result = values.map(f, na_action="ignore")
102
+ tm.assert_index_equal(result, expected)
103
+
104
+
105
+ @pytest.mark.parametrize(
106
+ ("data", "f", "expected"),
107
+ (
108
+ ([1, 1, np.nan], pd.isna, Index([False, False, True])),
109
+ ([1, 2, np.nan], pd.isna, Index([False, False, True])),
110
+ ([1, 1, np.nan], {1: False}, CategoricalIndex([False, False, np.nan])),
111
+ ([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])),
112
+ (
113
+ [1, 1, np.nan],
114
+ Series([False, False]),
115
+ CategoricalIndex([False, False, np.nan]),
116
+ ),
117
+ (
118
+ [1, 2, np.nan],
119
+ Series([False, False, False]),
120
+ Index([False, False, np.nan]),
121
+ ),
122
+ ),
123
+ )
124
+ def test_map_with_nan_none(data, f, expected): # GH 24241
125
+ values = CategoricalIndex(data)
126
+ result = values.map(f, na_action=None)
127
+ tm.assert_index_equal(result, expected)
128
+
129
+
130
+ def test_map_with_dict_or_series():
131
+ orig_values = ["a", "B", 1, "a"]
132
+ new_values = ["one", 2, 3.0, "one"]
133
+ cur_index = CategoricalIndex(orig_values, name="XXX")
134
+ expected = CategoricalIndex(new_values, name="XXX", categories=[3.0, 2, "one"])
135
+
136
+ mapper = Series(new_values[:-1], index=orig_values[:-1])
137
+ result = cur_index.map(mapper)
138
+ # Order of categories in result can be different
139
+ tm.assert_index_equal(result, expected)
140
+
141
+ mapper = dict(zip(orig_values[:-1], new_values[:-1]))
142
+ result = cur_index.map(mapper)
143
+ # Order of categories in result can be different
144
+ tm.assert_index_equal(result, expected)
py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_reindex.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ from pandas import (
5
+ Categorical,
6
+ CategoricalIndex,
7
+ Index,
8
+ Interval,
9
+ )
10
+ import pandas._testing as tm
11
+
12
+
13
+ class TestReindex:
14
+ def test_reindex_list_non_unique(self):
15
+ # GH#11586
16
+ msg = "cannot reindex on an axis with duplicate labels"
17
+ ci = CategoricalIndex(["a", "b", "c", "a"])
18
+ with pytest.raises(ValueError, match=msg):
19
+ ci.reindex(["a", "c"])
20
+
21
+ def test_reindex_categorical_non_unique(self):
22
+ msg = "cannot reindex on an axis with duplicate labels"
23
+ ci = CategoricalIndex(["a", "b", "c", "a"])
24
+ with pytest.raises(ValueError, match=msg):
25
+ ci.reindex(Categorical(["a", "c"]))
26
+
27
+ def test_reindex_list_non_unique_unused_category(self):
28
+ msg = "cannot reindex on an axis with duplicate labels"
29
+ ci = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
30
+ with pytest.raises(ValueError, match=msg):
31
+ ci.reindex(["a", "c"])
32
+
33
+ def test_reindex_categorical_non_unique_unused_category(self):
34
+ msg = "cannot reindex on an axis with duplicate labels"
35
+ ci = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
36
+ with pytest.raises(ValueError, match=msg):
37
+ ci.reindex(Categorical(["a", "c"]))
38
+
39
+ def test_reindex_duplicate_target(self):
40
+ # See GH25459
41
+ cat = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c", "d"])
42
+ res, indexer = cat.reindex(["a", "c", "c"])
43
+ exp = Index(["a", "c", "c"])
44
+ tm.assert_index_equal(res, exp, exact=True)
45
+ tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp))
46
+
47
+ res, indexer = cat.reindex(
48
+ CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"])
49
+ )
50
+ exp = CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"])
51
+ tm.assert_index_equal(res, exp, exact=True)
52
+ tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp))
53
+
54
+ def test_reindex_empty_index(self):
55
+ # See GH16770
56
+ c = CategoricalIndex([])
57
+ res, indexer = c.reindex(["a", "b"])
58
+ tm.assert_index_equal(res, Index(["a", "b"]), exact=True)
59
+ tm.assert_numpy_array_equal(indexer, np.array([-1, -1], dtype=np.intp))
60
+
61
+ def test_reindex_categorical_added_category(self):
62
+ # GH 42424
63
+ ci = CategoricalIndex(
64
+ [Interval(0, 1, closed="right"), Interval(1, 2, closed="right")],
65
+ ordered=True,
66
+ )
67
+ ci_add = CategoricalIndex(
68
+ [
69
+ Interval(0, 1, closed="right"),
70
+ Interval(1, 2, closed="right"),
71
+ Interval(2, 3, closed="right"),
72
+ Interval(3, 4, closed="right"),
73
+ ],
74
+ ordered=True,
75
+ )
76
+ result, _ = ci.reindex(ci_add)
77
+ expected = ci_add
78
+ tm.assert_index_equal(expected, result)
py311/lib/python3.11/site-packages/pandas/tests/indexes/categorical/test_setops.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ from pandas import (
5
+ CategoricalIndex,
6
+ Index,
7
+ )
8
+ import pandas._testing as tm
9
+
10
+
11
+ @pytest.mark.parametrize("na_value", [None, np.nan])
12
+ def test_difference_with_na(na_value):
13
+ # GH 57318
14
+ ci = CategoricalIndex(["a", "b", "c", None])
15
+ other = Index(["c", na_value])
16
+ result = ci.difference(other)
17
+ expected = CategoricalIndex(["a", "b"], categories=["a", "b", "c"])
18
+ tm.assert_index_equal(result, expected)
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/__init__.py ADDED
File without changes
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_arithmetic.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Arithmetic tests specific to DatetimeIndex are generally about `freq`
2
+ # rentention or inference. Other arithmetic tests belong in
3
+ # tests/arithmetic/test_datetime64.py
4
+ import pytest
5
+
6
+ from pandas import (
7
+ Timedelta,
8
+ TimedeltaIndex,
9
+ Timestamp,
10
+ date_range,
11
+ timedelta_range,
12
+ )
13
+ import pandas._testing as tm
14
+
15
+
16
+ class TestDatetimeIndexArithmetic:
17
+ def test_add_timedelta_preserves_freq(self):
18
+ # GH#37295 should hold for any DTI with freq=None or Tick freq
19
+ tz = "Canada/Eastern"
20
+ dti = date_range(
21
+ start=Timestamp("2019-03-26 00:00:00-0400", tz=tz),
22
+ end=Timestamp("2020-10-17 00:00:00-0400", tz=tz),
23
+ freq="D",
24
+ )
25
+ result = dti + Timedelta(days=1)
26
+ assert result.freq == dti.freq
27
+
28
+ def test_sub_datetime_preserves_freq(self, tz_naive_fixture):
29
+ # GH#48818
30
+ dti = date_range("2016-01-01", periods=12, tz=tz_naive_fixture)
31
+
32
+ res = dti - dti[0]
33
+ expected = timedelta_range("0 Days", "11 Days")
34
+ tm.assert_index_equal(res, expected)
35
+ assert res.freq == expected.freq
36
+
37
+ @pytest.mark.xfail(
38
+ reason="The inherited freq is incorrect bc dti.freq is incorrect "
39
+ "https://github.com/pandas-dev/pandas/pull/48818/files#r982793461"
40
+ )
41
+ def test_sub_datetime_preserves_freq_across_dst(self):
42
+ # GH#48818
43
+ ts = Timestamp("2016-03-11", tz="US/Pacific")
44
+ dti = date_range(ts, periods=4)
45
+
46
+ res = dti - dti[0]
47
+ expected = TimedeltaIndex(
48
+ [
49
+ Timedelta(days=0),
50
+ Timedelta(days=1),
51
+ Timedelta(days=2),
52
+ Timedelta(days=2, hours=23),
53
+ ]
54
+ )
55
+ tm.assert_index_equal(res, expected)
56
+ assert res.freq == expected.freq
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_constructors.py ADDED
@@ -0,0 +1,1204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from datetime import (
4
+ datetime,
5
+ timedelta,
6
+ timezone,
7
+ )
8
+ from functools import partial
9
+ from operator import attrgetter
10
+
11
+ import dateutil
12
+ import dateutil.tz
13
+ from dateutil.tz import gettz
14
+ import numpy as np
15
+ import pytest
16
+ import pytz
17
+
18
+ from pandas._libs.tslibs import (
19
+ OutOfBoundsDatetime,
20
+ astype_overflowsafe,
21
+ timezones,
22
+ )
23
+
24
+ import pandas as pd
25
+ from pandas import (
26
+ DatetimeIndex,
27
+ Index,
28
+ Timestamp,
29
+ date_range,
30
+ offsets,
31
+ to_datetime,
32
+ )
33
+ import pandas._testing as tm
34
+ from pandas.core.arrays import period_array
35
+
36
+
37
+ class TestDatetimeIndex:
38
+ def test_closed_deprecated(self):
39
+ # GH#52628
40
+ msg = "The 'closed' keyword"
41
+ with tm.assert_produces_warning(FutureWarning, match=msg):
42
+ DatetimeIndex([], closed=True)
43
+
44
+ def test_normalize_deprecated(self):
45
+ # GH#52628
46
+ msg = "The 'normalize' keyword"
47
+ with tm.assert_produces_warning(FutureWarning, match=msg):
48
+ DatetimeIndex([], normalize=True)
49
+
50
+ def test_from_dt64_unsupported_unit(self):
51
+ # GH#49292
52
+ val = np.datetime64(1, "D")
53
+ result = DatetimeIndex([val], tz="US/Pacific")
54
+
55
+ expected = DatetimeIndex([val.astype("M8[s]")], tz="US/Pacific")
56
+ tm.assert_index_equal(result, expected)
57
+
58
+ def test_explicit_tz_none(self):
59
+ # GH#48659
60
+ dti = date_range("2016-01-01", periods=10, tz="UTC")
61
+
62
+ msg = "Passed data is timezone-aware, incompatible with 'tz=None'"
63
+ with pytest.raises(ValueError, match=msg):
64
+ DatetimeIndex(dti, tz=None)
65
+
66
+ with pytest.raises(ValueError, match=msg):
67
+ DatetimeIndex(np.array(dti), tz=None)
68
+
69
+ msg = "Cannot pass both a timezone-aware dtype and tz=None"
70
+ with pytest.raises(ValueError, match=msg):
71
+ DatetimeIndex([], dtype="M8[ns, UTC]", tz=None)
72
+
73
+ def test_freq_validation_with_nat(self):
74
+ # GH#11587 make sure we get a useful error message when generate_range
75
+ # raises
76
+ msg = (
77
+ "Inferred frequency None from passed values does not conform "
78
+ "to passed frequency D"
79
+ )
80
+ with pytest.raises(ValueError, match=msg):
81
+ DatetimeIndex([pd.NaT, Timestamp("2011-01-01")], freq="D")
82
+ with pytest.raises(ValueError, match=msg):
83
+ DatetimeIndex([pd.NaT, Timestamp("2011-01-01")._value], freq="D")
84
+
85
+ # TODO: better place for tests shared by DTI/TDI?
86
+ @pytest.mark.parametrize(
87
+ "index",
88
+ [
89
+ date_range("2016-01-01", periods=5, tz="US/Pacific"),
90
+ pd.timedelta_range("1 Day", periods=5),
91
+ ],
92
+ )
93
+ def test_shallow_copy_inherits_array_freq(self, index):
94
+ # If we pass a DTA/TDA to shallow_copy and dont specify a freq,
95
+ # we should inherit the array's freq, not our own.
96
+ array = index._data
97
+
98
+ arr = array[[0, 3, 2, 4, 1]]
99
+ assert arr.freq is None
100
+
101
+ result = index._shallow_copy(arr)
102
+ assert result.freq is None
103
+
104
+ def test_categorical_preserves_tz(self):
105
+ # GH#18664 retain tz when going DTI-->Categorical-->DTI
106
+ dti = DatetimeIndex(
107
+ [pd.NaT, "2015-01-01", "1999-04-06 15:14:13", "2015-01-01"], tz="US/Eastern"
108
+ )
109
+
110
+ for dtobj in [dti, dti._data]:
111
+ # works for DatetimeIndex or DatetimeArray
112
+
113
+ ci = pd.CategoricalIndex(dtobj)
114
+ carr = pd.Categorical(dtobj)
115
+ cser = pd.Series(ci)
116
+
117
+ for obj in [ci, carr, cser]:
118
+ result = DatetimeIndex(obj)
119
+ tm.assert_index_equal(result, dti)
120
+
121
+ def test_dti_with_period_data_raises(self):
122
+ # GH#23675
123
+ data = pd.PeriodIndex(["2016Q1", "2016Q2"], freq="Q")
124
+
125
+ with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
126
+ DatetimeIndex(data)
127
+
128
+ with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
129
+ to_datetime(data)
130
+
131
+ with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
132
+ DatetimeIndex(period_array(data))
133
+
134
+ with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
135
+ to_datetime(period_array(data))
136
+
137
+ def test_dti_with_timedelta64_data_raises(self):
138
+ # GH#23675 deprecated, enforrced in GH#29794
139
+ data = np.array([0], dtype="m8[ns]")
140
+ msg = r"timedelta64\[ns\] cannot be converted to datetime64"
141
+ with pytest.raises(TypeError, match=msg):
142
+ DatetimeIndex(data)
143
+
144
+ with pytest.raises(TypeError, match=msg):
145
+ to_datetime(data)
146
+
147
+ with pytest.raises(TypeError, match=msg):
148
+ DatetimeIndex(pd.TimedeltaIndex(data))
149
+
150
+ with pytest.raises(TypeError, match=msg):
151
+ to_datetime(pd.TimedeltaIndex(data))
152
+
153
+ def test_constructor_from_sparse_array(self):
154
+ # https://github.com/pandas-dev/pandas/issues/35843
155
+ values = [
156
+ Timestamp("2012-05-01T01:00:00.000000"),
157
+ Timestamp("2016-05-01T01:00:00.000000"),
158
+ ]
159
+ arr = pd.arrays.SparseArray(values)
160
+ result = Index(arr)
161
+ assert type(result) is Index
162
+ assert result.dtype == arr.dtype
163
+
164
+ def test_construction_caching(self):
165
+ df = pd.DataFrame(
166
+ {
167
+ "dt": date_range("20130101", periods=3),
168
+ "dttz": date_range("20130101", periods=3, tz="US/Eastern"),
169
+ "dt_with_null": [
170
+ Timestamp("20130101"),
171
+ pd.NaT,
172
+ Timestamp("20130103"),
173
+ ],
174
+ "dtns": date_range("20130101", periods=3, freq="ns"),
175
+ }
176
+ )
177
+ assert df.dttz.dtype.tz.zone == "US/Eastern"
178
+
179
+ @pytest.mark.parametrize(
180
+ "kwargs",
181
+ [{"tz": "dtype.tz"}, {"dtype": "dtype"}, {"dtype": "dtype", "tz": "dtype.tz"}],
182
+ )
183
+ def test_construction_with_alt(self, kwargs, tz_aware_fixture):
184
+ tz = tz_aware_fixture
185
+ i = date_range("20130101", periods=5, freq="h", tz=tz)
186
+ kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()}
187
+ result = DatetimeIndex(i, **kwargs)
188
+ tm.assert_index_equal(i, result)
189
+
190
+ @pytest.mark.parametrize(
191
+ "kwargs",
192
+ [{"tz": "dtype.tz"}, {"dtype": "dtype"}, {"dtype": "dtype", "tz": "dtype.tz"}],
193
+ )
194
+ def test_construction_with_alt_tz_localize(self, kwargs, tz_aware_fixture):
195
+ tz = tz_aware_fixture
196
+ i = date_range("20130101", periods=5, freq="h", tz=tz)
197
+ i = i._with_freq(None)
198
+ kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()}
199
+
200
+ if "tz" in kwargs:
201
+ result = DatetimeIndex(i.asi8, tz="UTC").tz_convert(kwargs["tz"])
202
+
203
+ expected = DatetimeIndex(i, **kwargs)
204
+ tm.assert_index_equal(result, expected)
205
+
206
+ # localize into the provided tz
207
+ i2 = DatetimeIndex(i.tz_localize(None).asi8, tz="UTC")
208
+ expected = i.tz_localize(None).tz_localize("UTC")
209
+ tm.assert_index_equal(i2, expected)
210
+
211
+ # incompat tz/dtype
212
+ msg = "cannot supply both a tz and a dtype with a tz"
213
+ with pytest.raises(ValueError, match=msg):
214
+ DatetimeIndex(i.tz_localize(None).asi8, dtype=i.dtype, tz="US/Pacific")
215
+
216
+ def test_construction_index_with_mixed_timezones(self):
217
+ # gh-11488: no tz results in DatetimeIndex
218
+ result = Index([Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx")
219
+ exp = DatetimeIndex(
220
+ [Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx"
221
+ )
222
+ tm.assert_index_equal(result, exp, exact=True)
223
+ assert isinstance(result, DatetimeIndex)
224
+ assert result.tz is None
225
+
226
+ # same tz results in DatetimeIndex
227
+ result = Index(
228
+ [
229
+ Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
230
+ Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"),
231
+ ],
232
+ name="idx",
233
+ )
234
+ exp = DatetimeIndex(
235
+ [Timestamp("2011-01-01 10:00"), Timestamp("2011-01-02 10:00")],
236
+ tz="Asia/Tokyo",
237
+ name="idx",
238
+ )
239
+ tm.assert_index_equal(result, exp, exact=True)
240
+ assert isinstance(result, DatetimeIndex)
241
+ assert result.tz is not None
242
+ assert result.tz == exp.tz
243
+
244
+ # same tz results in DatetimeIndex (DST)
245
+ result = Index(
246
+ [
247
+ Timestamp("2011-01-01 10:00", tz="US/Eastern"),
248
+ Timestamp("2011-08-01 10:00", tz="US/Eastern"),
249
+ ],
250
+ name="idx",
251
+ )
252
+ exp = DatetimeIndex(
253
+ [Timestamp("2011-01-01 10:00"), Timestamp("2011-08-01 10:00")],
254
+ tz="US/Eastern",
255
+ name="idx",
256
+ )
257
+ tm.assert_index_equal(result, exp, exact=True)
258
+ assert isinstance(result, DatetimeIndex)
259
+ assert result.tz is not None
260
+ assert result.tz == exp.tz
261
+
262
+ # Different tz results in Index(dtype=object)
263
+ result = Index(
264
+ [
265
+ Timestamp("2011-01-01 10:00"),
266
+ Timestamp("2011-01-02 10:00", tz="US/Eastern"),
267
+ ],
268
+ name="idx",
269
+ )
270
+ exp = Index(
271
+ [
272
+ Timestamp("2011-01-01 10:00"),
273
+ Timestamp("2011-01-02 10:00", tz="US/Eastern"),
274
+ ],
275
+ dtype="object",
276
+ name="idx",
277
+ )
278
+ tm.assert_index_equal(result, exp, exact=True)
279
+ assert not isinstance(result, DatetimeIndex)
280
+
281
+ result = Index(
282
+ [
283
+ Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
284
+ Timestamp("2011-01-02 10:00", tz="US/Eastern"),
285
+ ],
286
+ name="idx",
287
+ )
288
+ exp = Index(
289
+ [
290
+ Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
291
+ Timestamp("2011-01-02 10:00", tz="US/Eastern"),
292
+ ],
293
+ dtype="object",
294
+ name="idx",
295
+ )
296
+ tm.assert_index_equal(result, exp, exact=True)
297
+ assert not isinstance(result, DatetimeIndex)
298
+
299
+ msg = "DatetimeIndex has mixed timezones"
300
+ msg_depr = "parsing datetimes with mixed time zones will raise an error"
301
+ with pytest.raises(TypeError, match=msg):
302
+ with tm.assert_produces_warning(FutureWarning, match=msg_depr):
303
+ DatetimeIndex(["2013-11-02 22:00-05:00", "2013-11-03 22:00-06:00"])
304
+
305
+ # length = 1
306
+ result = Index([Timestamp("2011-01-01")], name="idx")
307
+ exp = DatetimeIndex([Timestamp("2011-01-01")], name="idx")
308
+ tm.assert_index_equal(result, exp, exact=True)
309
+ assert isinstance(result, DatetimeIndex)
310
+ assert result.tz is None
311
+
312
+ # length = 1 with tz
313
+ result = Index([Timestamp("2011-01-01 10:00", tz="Asia/Tokyo")], name="idx")
314
+ exp = DatetimeIndex(
315
+ [Timestamp("2011-01-01 10:00")], tz="Asia/Tokyo", name="idx"
316
+ )
317
+ tm.assert_index_equal(result, exp, exact=True)
318
+ assert isinstance(result, DatetimeIndex)
319
+ assert result.tz is not None
320
+ assert result.tz == exp.tz
321
+
322
+ def test_construction_index_with_mixed_timezones_with_NaT(self):
323
+ # see gh-11488
324
+ result = Index(
325
+ [pd.NaT, Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-02")],
326
+ name="idx",
327
+ )
328
+ exp = DatetimeIndex(
329
+ [pd.NaT, Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-02")],
330
+ name="idx",
331
+ )
332
+ tm.assert_index_equal(result, exp, exact=True)
333
+ assert isinstance(result, DatetimeIndex)
334
+ assert result.tz is None
335
+
336
+ # Same tz results in DatetimeIndex
337
+ result = Index(
338
+ [
339
+ pd.NaT,
340
+ Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
341
+ pd.NaT,
342
+ Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"),
343
+ ],
344
+ name="idx",
345
+ )
346
+ exp = DatetimeIndex(
347
+ [
348
+ pd.NaT,
349
+ Timestamp("2011-01-01 10:00"),
350
+ pd.NaT,
351
+ Timestamp("2011-01-02 10:00"),
352
+ ],
353
+ tz="Asia/Tokyo",
354
+ name="idx",
355
+ )
356
+ tm.assert_index_equal(result, exp, exact=True)
357
+ assert isinstance(result, DatetimeIndex)
358
+ assert result.tz is not None
359
+ assert result.tz == exp.tz
360
+
361
+ # same tz results in DatetimeIndex (DST)
362
+ result = Index(
363
+ [
364
+ Timestamp("2011-01-01 10:00", tz="US/Eastern"),
365
+ pd.NaT,
366
+ Timestamp("2011-08-01 10:00", tz="US/Eastern"),
367
+ ],
368
+ name="idx",
369
+ )
370
+ exp = DatetimeIndex(
371
+ [Timestamp("2011-01-01 10:00"), pd.NaT, Timestamp("2011-08-01 10:00")],
372
+ tz="US/Eastern",
373
+ name="idx",
374
+ )
375
+ tm.assert_index_equal(result, exp, exact=True)
376
+ assert isinstance(result, DatetimeIndex)
377
+ assert result.tz is not None
378
+ assert result.tz == exp.tz
379
+
380
+ # different tz results in Index(dtype=object)
381
+ result = Index(
382
+ [
383
+ pd.NaT,
384
+ Timestamp("2011-01-01 10:00"),
385
+ pd.NaT,
386
+ Timestamp("2011-01-02 10:00", tz="US/Eastern"),
387
+ ],
388
+ name="idx",
389
+ )
390
+ exp = Index(
391
+ [
392
+ pd.NaT,
393
+ Timestamp("2011-01-01 10:00"),
394
+ pd.NaT,
395
+ Timestamp("2011-01-02 10:00", tz="US/Eastern"),
396
+ ],
397
+ dtype="object",
398
+ name="idx",
399
+ )
400
+ tm.assert_index_equal(result, exp, exact=True)
401
+ assert not isinstance(result, DatetimeIndex)
402
+
403
+ result = Index(
404
+ [
405
+ pd.NaT,
406
+ Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
407
+ pd.NaT,
408
+ Timestamp("2011-01-02 10:00", tz="US/Eastern"),
409
+ ],
410
+ name="idx",
411
+ )
412
+ exp = Index(
413
+ [
414
+ pd.NaT,
415
+ Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
416
+ pd.NaT,
417
+ Timestamp("2011-01-02 10:00", tz="US/Eastern"),
418
+ ],
419
+ dtype="object",
420
+ name="idx",
421
+ )
422
+ tm.assert_index_equal(result, exp, exact=True)
423
+ assert not isinstance(result, DatetimeIndex)
424
+
425
+ # all NaT
426
+ result = Index([pd.NaT, pd.NaT], name="idx")
427
+ exp = DatetimeIndex([pd.NaT, pd.NaT], name="idx")
428
+ tm.assert_index_equal(result, exp, exact=True)
429
+ assert isinstance(result, DatetimeIndex)
430
+ assert result.tz is None
431
+
432
+ def test_construction_dti_with_mixed_timezones(self):
433
+ # GH 11488 (not changed, added explicit tests)
434
+
435
+ # no tz results in DatetimeIndex
436
+ result = DatetimeIndex(
437
+ [Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx"
438
+ )
439
+ exp = DatetimeIndex(
440
+ [Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx"
441
+ )
442
+ tm.assert_index_equal(result, exp, exact=True)
443
+ assert isinstance(result, DatetimeIndex)
444
+
445
+ # same tz results in DatetimeIndex
446
+ result = DatetimeIndex(
447
+ [
448
+ Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
449
+ Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"),
450
+ ],
451
+ name="idx",
452
+ )
453
+ exp = DatetimeIndex(
454
+ [Timestamp("2011-01-01 10:00"), Timestamp("2011-01-02 10:00")],
455
+ tz="Asia/Tokyo",
456
+ name="idx",
457
+ )
458
+ tm.assert_index_equal(result, exp, exact=True)
459
+ assert isinstance(result, DatetimeIndex)
460
+
461
+ # same tz results in DatetimeIndex (DST)
462
+ result = DatetimeIndex(
463
+ [
464
+ Timestamp("2011-01-01 10:00", tz="US/Eastern"),
465
+ Timestamp("2011-08-01 10:00", tz="US/Eastern"),
466
+ ],
467
+ name="idx",
468
+ )
469
+ exp = DatetimeIndex(
470
+ [Timestamp("2011-01-01 10:00"), Timestamp("2011-08-01 10:00")],
471
+ tz="US/Eastern",
472
+ name="idx",
473
+ )
474
+ tm.assert_index_equal(result, exp, exact=True)
475
+ assert isinstance(result, DatetimeIndex)
476
+
477
+ # tz mismatch affecting to tz-aware raises TypeError/ValueError
478
+
479
+ msg = "cannot be converted to datetime64"
480
+ with pytest.raises(ValueError, match=msg):
481
+ DatetimeIndex(
482
+ [
483
+ Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
484
+ Timestamp("2011-01-02 10:00", tz="US/Eastern"),
485
+ ],
486
+ name="idx",
487
+ )
488
+
489
+ # pre-2.0 this raised bc of awareness mismatch. in 2.0 with a tz#
490
+ # specified we behave as if this was called pointwise, so
491
+ # the naive Timestamp is treated as a wall time.
492
+ dti = DatetimeIndex(
493
+ [
494
+ Timestamp("2011-01-01 10:00"),
495
+ Timestamp("2011-01-02 10:00", tz="US/Eastern"),
496
+ ],
497
+ tz="Asia/Tokyo",
498
+ name="idx",
499
+ )
500
+ expected = DatetimeIndex(
501
+ [
502
+ Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
503
+ Timestamp("2011-01-02 10:00", tz="US/Eastern").tz_convert("Asia/Tokyo"),
504
+ ],
505
+ tz="Asia/Tokyo",
506
+ name="idx",
507
+ )
508
+ tm.assert_index_equal(dti, expected)
509
+
510
+ # pre-2.0 mixed-tz scalars raised even if a tz/dtype was specified.
511
+ # as of 2.0 we successfully return the requested tz/dtype
512
+ dti = DatetimeIndex(
513
+ [
514
+ Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
515
+ Timestamp("2011-01-02 10:00", tz="US/Eastern"),
516
+ ],
517
+ tz="US/Eastern",
518
+ name="idx",
519
+ )
520
+ expected = DatetimeIndex(
521
+ [
522
+ Timestamp("2011-01-01 10:00", tz="Asia/Tokyo").tz_convert("US/Eastern"),
523
+ Timestamp("2011-01-02 10:00", tz="US/Eastern"),
524
+ ],
525
+ tz="US/Eastern",
526
+ name="idx",
527
+ )
528
+ tm.assert_index_equal(dti, expected)
529
+
530
+ # same thing but pass dtype instead of tz
531
+ dti = DatetimeIndex(
532
+ [
533
+ Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
534
+ Timestamp("2011-01-02 10:00", tz="US/Eastern"),
535
+ ],
536
+ dtype="M8[ns, US/Eastern]",
537
+ name="idx",
538
+ )
539
+ tm.assert_index_equal(dti, expected)
540
+
541
+ def test_construction_base_constructor(self):
542
+ arr = [Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-03")]
543
+ tm.assert_index_equal(Index(arr), DatetimeIndex(arr))
544
+ tm.assert_index_equal(Index(np.array(arr)), DatetimeIndex(np.array(arr)))
545
+
546
+ arr = [np.nan, pd.NaT, Timestamp("2011-01-03")]
547
+ tm.assert_index_equal(Index(arr), DatetimeIndex(arr))
548
+ tm.assert_index_equal(Index(np.array(arr)), DatetimeIndex(np.array(arr)))
549
+
550
+ def test_construction_outofbounds(self):
551
+ # GH 13663
552
+ dates = [
553
+ datetime(3000, 1, 1),
554
+ datetime(4000, 1, 1),
555
+ datetime(5000, 1, 1),
556
+ datetime(6000, 1, 1),
557
+ ]
558
+ exp = Index(dates, dtype=object)
559
+ # coerces to object
560
+ tm.assert_index_equal(Index(dates), exp)
561
+
562
+ msg = "^Out of bounds nanosecond timestamp: 3000-01-01 00:00:00, at position 0$"
563
+ with pytest.raises(OutOfBoundsDatetime, match=msg):
564
+ # can't create DatetimeIndex
565
+ DatetimeIndex(dates)
566
+
567
+ @pytest.mark.parametrize("data", [["1400-01-01"], [datetime(1400, 1, 1)]])
568
+ def test_dti_date_out_of_range(self, data):
569
+ # GH#1475
570
+ msg = (
571
+ "^Out of bounds nanosecond timestamp: "
572
+ "1400-01-01( 00:00:00)?, at position 0$"
573
+ )
574
+ with pytest.raises(OutOfBoundsDatetime, match=msg):
575
+ DatetimeIndex(data)
576
+
577
+ def test_construction_with_ndarray(self):
578
+ # GH 5152
579
+ dates = [datetime(2013, 10, 7), datetime(2013, 10, 8), datetime(2013, 10, 9)]
580
+ data = DatetimeIndex(dates, freq=offsets.BDay()).values
581
+ result = DatetimeIndex(data, freq=offsets.BDay())
582
+ expected = DatetimeIndex(["2013-10-07", "2013-10-08", "2013-10-09"], freq="B")
583
+ tm.assert_index_equal(result, expected)
584
+
585
+ def test_integer_values_and_tz_interpreted_as_utc(self):
586
+ # GH-24559
587
+ val = np.datetime64("2000-01-01 00:00:00", "ns")
588
+ values = np.array([val.view("i8")])
589
+
590
+ result = DatetimeIndex(values).tz_localize("US/Central")
591
+
592
+ expected = DatetimeIndex(["2000-01-01T00:00:00"], dtype="M8[ns, US/Central]")
593
+ tm.assert_index_equal(result, expected)
594
+
595
+ # but UTC is *not* deprecated.
596
+ with tm.assert_produces_warning(None):
597
+ result = DatetimeIndex(values, tz="UTC")
598
+ expected = DatetimeIndex(["2000-01-01T00:00:00"], dtype="M8[ns, UTC]")
599
+ tm.assert_index_equal(result, expected)
600
+
601
+ def test_constructor_coverage(self):
602
+ msg = r"DatetimeIndex\(\.\.\.\) must be called with a collection"
603
+ with pytest.raises(TypeError, match=msg):
604
+ DatetimeIndex("1/1/2000")
605
+
606
+ # generator expression
607
+ gen = (datetime(2000, 1, 1) + timedelta(i) for i in range(10))
608
+ result = DatetimeIndex(gen)
609
+ expected = DatetimeIndex(
610
+ [datetime(2000, 1, 1) + timedelta(i) for i in range(10)]
611
+ )
612
+ tm.assert_index_equal(result, expected)
613
+
614
+ # NumPy string array
615
+ strings = np.array(["2000-01-01", "2000-01-02", "2000-01-03"])
616
+ result = DatetimeIndex(strings)
617
+ expected = DatetimeIndex(strings.astype("O"))
618
+ tm.assert_index_equal(result, expected)
619
+
620
+ from_ints = DatetimeIndex(expected.asi8)
621
+ tm.assert_index_equal(from_ints, expected)
622
+
623
+ # string with NaT
624
+ strings = np.array(["2000-01-01", "2000-01-02", "NaT"])
625
+ result = DatetimeIndex(strings)
626
+ expected = DatetimeIndex(strings.astype("O"))
627
+ tm.assert_index_equal(result, expected)
628
+
629
+ from_ints = DatetimeIndex(expected.asi8)
630
+ tm.assert_index_equal(from_ints, expected)
631
+
632
+ # non-conforming
633
+ msg = (
634
+ "Inferred frequency None from passed values does not conform "
635
+ "to passed frequency D"
636
+ )
637
+ with pytest.raises(ValueError, match=msg):
638
+ DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-04"], freq="D")
639
+
640
+ @pytest.mark.parametrize("freq", ["YS", "W-SUN"])
641
+ def test_constructor_datetime64_tzformat(self, freq):
642
+ # see GH#6572: ISO 8601 format results in stdlib timezone object
643
+ idx = date_range(
644
+ "2013-01-01T00:00:00-05:00", "2016-01-01T23:59:59-05:00", freq=freq
645
+ )
646
+ expected = date_range(
647
+ "2013-01-01T00:00:00",
648
+ "2016-01-01T23:59:59",
649
+ freq=freq,
650
+ tz=timezone(timedelta(minutes=-300)),
651
+ )
652
+ tm.assert_index_equal(idx, expected)
653
+ # Unable to use `US/Eastern` because of DST
654
+ expected_i8 = date_range(
655
+ "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="America/Lima"
656
+ )
657
+ tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8)
658
+
659
+ idx = date_range(
660
+ "2013-01-01T00:00:00+09:00", "2016-01-01T23:59:59+09:00", freq=freq
661
+ )
662
+ expected = date_range(
663
+ "2013-01-01T00:00:00",
664
+ "2016-01-01T23:59:59",
665
+ freq=freq,
666
+ tz=timezone(timedelta(minutes=540)),
667
+ )
668
+ tm.assert_index_equal(idx, expected)
669
+ expected_i8 = date_range(
670
+ "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="Asia/Tokyo"
671
+ )
672
+ tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8)
673
+
674
+ # Non ISO 8601 format results in dateutil.tz.tzoffset
675
+ idx = date_range("2013/1/1 0:00:00-5:00", "2016/1/1 23:59:59-5:00", freq=freq)
676
+ expected = date_range(
677
+ "2013-01-01T00:00:00",
678
+ "2016-01-01T23:59:59",
679
+ freq=freq,
680
+ tz=timezone(timedelta(minutes=-300)),
681
+ )
682
+ tm.assert_index_equal(idx, expected)
683
+ # Unable to use `US/Eastern` because of DST
684
+ expected_i8 = date_range(
685
+ "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="America/Lima"
686
+ )
687
+ tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8)
688
+
689
+ idx = date_range("2013/1/1 0:00:00+9:00", "2016/1/1 23:59:59+09:00", freq=freq)
690
+ expected = date_range(
691
+ "2013-01-01T00:00:00",
692
+ "2016-01-01T23:59:59",
693
+ freq=freq,
694
+ tz=timezone(timedelta(minutes=540)),
695
+ )
696
+ tm.assert_index_equal(idx, expected)
697
+ expected_i8 = date_range(
698
+ "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="Asia/Tokyo"
699
+ )
700
+ tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8)
701
+
702
+ def test_constructor_dtype(self):
703
+ # passing a dtype with a tz should localize
704
+ idx = DatetimeIndex(
705
+ ["2013-01-01", "2013-01-02"], dtype="datetime64[ns, US/Eastern]"
706
+ )
707
+ expected = (
708
+ DatetimeIndex(["2013-01-01", "2013-01-02"])
709
+ .as_unit("ns")
710
+ .tz_localize("US/Eastern")
711
+ )
712
+ tm.assert_index_equal(idx, expected)
713
+
714
+ idx = DatetimeIndex(["2013-01-01", "2013-01-02"], tz="US/Eastern").as_unit("ns")
715
+ tm.assert_index_equal(idx, expected)
716
+
717
+ def test_constructor_dtype_tz_mismatch_raises(self):
718
+ # if we already have a tz and its not the same, then raise
719
+ idx = DatetimeIndex(
720
+ ["2013-01-01", "2013-01-02"], dtype="datetime64[ns, US/Eastern]"
721
+ )
722
+
723
+ msg = (
724
+ "cannot supply both a tz and a timezone-naive dtype "
725
+ r"\(i\.e\. datetime64\[ns\]\)"
726
+ )
727
+ with pytest.raises(ValueError, match=msg):
728
+ DatetimeIndex(idx, dtype="datetime64[ns]")
729
+
730
+ # this is effectively trying to convert tz's
731
+ msg = "data is already tz-aware US/Eastern, unable to set specified tz: CET"
732
+ with pytest.raises(TypeError, match=msg):
733
+ DatetimeIndex(idx, dtype="datetime64[ns, CET]")
734
+ msg = "cannot supply both a tz and a dtype with a tz"
735
+ with pytest.raises(ValueError, match=msg):
736
+ DatetimeIndex(idx, tz="CET", dtype="datetime64[ns, US/Eastern]")
737
+
738
+ result = DatetimeIndex(idx, dtype="datetime64[ns, US/Eastern]")
739
+ tm.assert_index_equal(idx, result)
740
+
741
+ @pytest.mark.parametrize("dtype", [object, np.int32, np.int64])
742
+ def test_constructor_invalid_dtype_raises(self, dtype):
743
+ # GH 23986
744
+ msg = "Unexpected value for 'dtype'"
745
+ with pytest.raises(ValueError, match=msg):
746
+ DatetimeIndex([1, 2], dtype=dtype)
747
+
748
+ def test_000constructor_resolution(self):
749
+ # 2252
750
+ t1 = Timestamp((1352934390 * 1000000000) + 1000000 + 1000 + 1)
751
+ idx = DatetimeIndex([t1])
752
+
753
+ assert idx.nanosecond[0] == t1.nanosecond
754
+
755
+ def test_disallow_setting_tz(self):
756
+ # GH 3746
757
+ dti = DatetimeIndex(["2010"], tz="UTC")
758
+ msg = "Cannot directly set timezone"
759
+ with pytest.raises(AttributeError, match=msg):
760
+ dti.tz = pytz.timezone("US/Pacific")
761
+
762
+ @pytest.mark.parametrize(
763
+ "tz",
764
+ [
765
+ None,
766
+ "America/Los_Angeles",
767
+ pytz.timezone("America/Los_Angeles"),
768
+ Timestamp("2000", tz="America/Los_Angeles").tz,
769
+ ],
770
+ )
771
+ def test_constructor_start_end_with_tz(self, tz):
772
+ # GH 18595
773
+ start = Timestamp("2013-01-01 06:00:00", tz="America/Los_Angeles")
774
+ end = Timestamp("2013-01-02 06:00:00", tz="America/Los_Angeles")
775
+ result = date_range(freq="D", start=start, end=end, tz=tz)
776
+ expected = DatetimeIndex(
777
+ ["2013-01-01 06:00:00", "2013-01-02 06:00:00"],
778
+ dtype="M8[ns, America/Los_Angeles]",
779
+ freq="D",
780
+ )
781
+ tm.assert_index_equal(result, expected)
782
+ # Especially assert that the timezone is consistent for pytz
783
+ assert pytz.timezone("America/Los_Angeles") is result.tz
784
+
785
+ @pytest.mark.parametrize("tz", ["US/Pacific", "US/Eastern", "Asia/Tokyo"])
786
+ def test_constructor_with_non_normalized_pytz(self, tz):
787
+ # GH 18595
788
+ non_norm_tz = Timestamp("2010", tz=tz).tz
789
+ result = DatetimeIndex(["2010"], tz=non_norm_tz)
790
+ assert pytz.timezone(tz) is result.tz
791
+
792
+ def test_constructor_timestamp_near_dst(self):
793
+ # GH 20854
794
+ ts = [
795
+ Timestamp("2016-10-30 03:00:00+0300", tz="Europe/Helsinki"),
796
+ Timestamp("2016-10-30 03:00:00+0200", tz="Europe/Helsinki"),
797
+ ]
798
+ result = DatetimeIndex(ts)
799
+ expected = DatetimeIndex([ts[0].to_pydatetime(), ts[1].to_pydatetime()])
800
+ tm.assert_index_equal(result, expected)
801
+
802
+ @pytest.mark.parametrize("klass", [Index, DatetimeIndex])
803
+ @pytest.mark.parametrize("box", [np.array, partial(np.array, dtype=object), list])
804
+ @pytest.mark.parametrize(
805
+ "tz, dtype",
806
+ [("US/Pacific", "datetime64[ns, US/Pacific]"), (None, "datetime64[ns]")],
807
+ )
808
+ def test_constructor_with_int_tz(self, klass, box, tz, dtype):
809
+ # GH 20997, 20964
810
+ ts = Timestamp("2018-01-01", tz=tz).as_unit("ns")
811
+ result = klass(box([ts._value]), dtype=dtype)
812
+ expected = klass([ts])
813
+ assert result == expected
814
+
815
+ def test_construction_int_rountrip(self, tz_naive_fixture):
816
+ # GH 12619, GH#24559
817
+ tz = tz_naive_fixture
818
+
819
+ result = 1293858000000000000
820
+ expected = DatetimeIndex([result], tz=tz).asi8[0]
821
+ assert result == expected
822
+
823
+ def test_construction_from_replaced_timestamps_with_dst(self):
824
+ # GH 18785
825
+ index = date_range(
826
+ Timestamp(2000, 12, 31),
827
+ Timestamp(2005, 12, 31),
828
+ freq="YE-DEC",
829
+ tz="Australia/Melbourne",
830
+ )
831
+ result = DatetimeIndex([x.replace(month=6, day=1) for x in index])
832
+ expected = DatetimeIndex(
833
+ [
834
+ "2000-06-01 00:00:00",
835
+ "2001-06-01 00:00:00",
836
+ "2002-06-01 00:00:00",
837
+ "2003-06-01 00:00:00",
838
+ "2004-06-01 00:00:00",
839
+ "2005-06-01 00:00:00",
840
+ ],
841
+ tz="Australia/Melbourne",
842
+ )
843
+ tm.assert_index_equal(result, expected)
844
+
845
+ def test_construction_with_tz_and_tz_aware_dti(self):
846
+ # GH 23579
847
+ dti = date_range("2016-01-01", periods=3, tz="US/Central")
848
+ msg = "data is already tz-aware US/Central, unable to set specified tz"
849
+ with pytest.raises(TypeError, match=msg):
850
+ DatetimeIndex(dti, tz="Asia/Tokyo")
851
+
852
+ def test_construction_with_nat_and_tzlocal(self):
853
+ tz = dateutil.tz.tzlocal()
854
+ result = DatetimeIndex(["2018", "NaT"], tz=tz)
855
+ expected = DatetimeIndex([Timestamp("2018", tz=tz), pd.NaT])
856
+ tm.assert_index_equal(result, expected)
857
+
858
+ def test_constructor_with_ambiguous_keyword_arg(self):
859
+ # GH 35297
860
+
861
+ expected = DatetimeIndex(
862
+ ["2020-11-01 01:00:00", "2020-11-02 01:00:00"],
863
+ dtype="datetime64[ns, America/New_York]",
864
+ freq="D",
865
+ ambiguous=False,
866
+ )
867
+
868
+ # ambiguous keyword in start
869
+ timezone = "America/New_York"
870
+ start = Timestamp(year=2020, month=11, day=1, hour=1).tz_localize(
871
+ timezone, ambiguous=False
872
+ )
873
+ result = date_range(start=start, periods=2, ambiguous=False)
874
+ tm.assert_index_equal(result, expected)
875
+
876
+ # ambiguous keyword in end
877
+ timezone = "America/New_York"
878
+ end = Timestamp(year=2020, month=11, day=2, hour=1).tz_localize(
879
+ timezone, ambiguous=False
880
+ )
881
+ result = date_range(end=end, periods=2, ambiguous=False)
882
+ tm.assert_index_equal(result, expected)
883
+
884
+ def test_constructor_with_nonexistent_keyword_arg(self, warsaw):
885
+ # GH 35297
886
+ timezone = warsaw
887
+
888
+ # nonexistent keyword in start
889
+ start = Timestamp("2015-03-29 02:30:00").tz_localize(
890
+ timezone, nonexistent="shift_forward"
891
+ )
892
+ result = date_range(start=start, periods=2, freq="h")
893
+ expected = DatetimeIndex(
894
+ [
895
+ Timestamp("2015-03-29 03:00:00+02:00", tz=timezone),
896
+ Timestamp("2015-03-29 04:00:00+02:00", tz=timezone),
897
+ ]
898
+ )
899
+
900
+ tm.assert_index_equal(result, expected)
901
+
902
+ # nonexistent keyword in end
903
+ end = start
904
+ result = date_range(end=end, periods=2, freq="h")
905
+ expected = DatetimeIndex(
906
+ [
907
+ Timestamp("2015-03-29 01:00:00+01:00", tz=timezone),
908
+ Timestamp("2015-03-29 03:00:00+02:00", tz=timezone),
909
+ ]
910
+ )
911
+
912
+ tm.assert_index_equal(result, expected)
913
+
914
+ def test_constructor_no_precision_raises(self):
915
+ # GH-24753, GH-24739
916
+
917
+ msg = "with no precision is not allowed"
918
+ with pytest.raises(ValueError, match=msg):
919
+ DatetimeIndex(["2000"], dtype="datetime64")
920
+
921
+ msg = "The 'datetime64' dtype has no unit. Please pass in"
922
+ with pytest.raises(ValueError, match=msg):
923
+ Index(["2000"], dtype="datetime64")
924
+
925
+ def test_constructor_wrong_precision_raises(self):
926
+ dti = DatetimeIndex(["2000"], dtype="datetime64[us]")
927
+ assert dti.dtype == "M8[us]"
928
+ assert dti[0] == Timestamp(2000, 1, 1)
929
+
930
+ def test_index_constructor_with_numpy_object_array_and_timestamp_tz_with_nan(self):
931
+ # GH 27011
932
+ result = Index(np.array([Timestamp("2019", tz="UTC"), np.nan], dtype=object))
933
+ expected = DatetimeIndex([Timestamp("2019", tz="UTC"), pd.NaT])
934
+ tm.assert_index_equal(result, expected)
935
+
936
+ @pytest.mark.parametrize("tz", [pytz.timezone("US/Eastern"), gettz("US/Eastern")])
937
+ def test_dti_from_tzaware_datetime(self, tz):
938
+ d = [datetime(2012, 8, 19, tzinfo=tz)]
939
+
940
+ index = DatetimeIndex(d)
941
+ assert timezones.tz_compare(index.tz, tz)
942
+
943
+ @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
944
+ def test_dti_tz_constructors(self, tzstr):
945
+ """Test different DatetimeIndex constructions with timezone
946
+ Follow-up of GH#4229
947
+ """
948
+ arr = ["11/10/2005 08:00:00", "11/10/2005 09:00:00"]
949
+
950
+ idx1 = to_datetime(arr).tz_localize(tzstr)
951
+ idx2 = date_range(start="2005-11-10 08:00:00", freq="h", periods=2, tz=tzstr)
952
+ idx2 = idx2._with_freq(None) # the others all have freq=None
953
+ idx3 = DatetimeIndex(arr, tz=tzstr)
954
+ idx4 = DatetimeIndex(np.array(arr), tz=tzstr)
955
+
956
+ for other in [idx2, idx3, idx4]:
957
+ tm.assert_index_equal(idx1, other)
958
+
959
+ def test_dti_construction_idempotent(self, unit):
960
+ rng = date_range(
961
+ "03/12/2012 00:00", periods=10, freq="W-FRI", tz="US/Eastern", unit=unit
962
+ )
963
+ rng2 = DatetimeIndex(data=rng, tz="US/Eastern")
964
+ tm.assert_index_equal(rng, rng2)
965
+
966
+ @pytest.mark.parametrize("prefix", ["", "dateutil/"])
967
+ def test_dti_constructor_static_tzinfo(self, prefix):
968
+ # it works!
969
+ index = DatetimeIndex([datetime(2012, 1, 1)], tz=prefix + "EST")
970
+ index.hour
971
+ index[0]
972
+
973
+ @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
974
+ def test_dti_convert_datetime_list(self, tzstr):
975
+ dr = date_range("2012-06-02", periods=10, tz=tzstr, name="foo")
976
+ dr2 = DatetimeIndex(list(dr), name="foo", freq="D")
977
+ tm.assert_index_equal(dr, dr2)
978
+
979
+ @pytest.mark.parametrize(
980
+ "tz",
981
+ [
982
+ pytz.timezone("US/Eastern"),
983
+ gettz("US/Eastern"),
984
+ ],
985
+ )
986
+ @pytest.mark.parametrize("use_str", [True, False])
987
+ @pytest.mark.parametrize("box_cls", [Timestamp, DatetimeIndex])
988
+ def test_dti_ambiguous_matches_timestamp(self, tz, use_str, box_cls, request):
989
+ # GH#47471 check that we get the same raising behavior in the DTI
990
+ # constructor and Timestamp constructor
991
+ dtstr = "2013-11-03 01:59:59.999999"
992
+ item = dtstr
993
+ if not use_str:
994
+ item = Timestamp(dtstr).to_pydatetime()
995
+ if box_cls is not Timestamp:
996
+ item = [item]
997
+
998
+ if not use_str and isinstance(tz, dateutil.tz.tzfile):
999
+ # FIXME: The Timestamp constructor here behaves differently than all
1000
+ # the other cases bc with dateutil/zoneinfo tzinfos we implicitly
1001
+ # get fold=0. Having this raise is not important, but having the
1002
+ # behavior be consistent across cases is.
1003
+ mark = pytest.mark.xfail(reason="We implicitly get fold=0.")
1004
+ request.applymarker(mark)
1005
+
1006
+ with pytest.raises(pytz.AmbiguousTimeError, match=dtstr):
1007
+ box_cls(item, tz=tz)
1008
+
1009
+ @pytest.mark.parametrize("tz", [None, "UTC", "US/Pacific"])
1010
+ def test_dti_constructor_with_non_nano_dtype(self, tz):
1011
+ # GH#55756, GH#54620
1012
+ ts = Timestamp("2999-01-01")
1013
+ dtype = "M8[us]"
1014
+ if tz is not None:
1015
+ dtype = f"M8[us, {tz}]"
1016
+ vals = [ts, "2999-01-02 03:04:05.678910", 2500]
1017
+ result = DatetimeIndex(vals, dtype=dtype)
1018
+ # The 2500 is interpreted as microseconds, consistent with what
1019
+ # we would get if we created DatetimeIndexes from vals[:2] and vals[2:]
1020
+ # and concated the results.
1021
+ pointwise = [
1022
+ vals[0].tz_localize(tz),
1023
+ Timestamp(vals[1], tz=tz),
1024
+ to_datetime(vals[2], unit="us", utc=True).tz_convert(tz),
1025
+ ]
1026
+ exp_vals = [x.as_unit("us").asm8 for x in pointwise]
1027
+ exp_arr = np.array(exp_vals, dtype="M8[us]")
1028
+ expected = DatetimeIndex(exp_arr, dtype="M8[us]")
1029
+ if tz is not None:
1030
+ expected = expected.tz_localize("UTC").tz_convert(tz)
1031
+ tm.assert_index_equal(result, expected)
1032
+
1033
+ result2 = DatetimeIndex(np.array(vals, dtype=object), dtype=dtype)
1034
+ tm.assert_index_equal(result2, expected)
1035
+
1036
+ def test_dti_constructor_with_non_nano_now_today(self):
1037
+ # GH#55756
1038
+ now = Timestamp.now()
1039
+ today = Timestamp.today()
1040
+ result = DatetimeIndex(["now", "today"], dtype="M8[s]")
1041
+ assert result.dtype == "M8[s]"
1042
+
1043
+ # result may not exactly match [now, today] so we'll test it up to a tolerance.
1044
+ # (it *may* match exactly due to rounding)
1045
+ tolerance = pd.Timedelta(microseconds=1)
1046
+
1047
+ diff0 = result[0] - now.as_unit("s")
1048
+ assert diff0 >= pd.Timedelta(0)
1049
+ assert diff0 < tolerance
1050
+
1051
+ diff1 = result[1] - today.as_unit("s")
1052
+ assert diff1 >= pd.Timedelta(0)
1053
+ assert diff1 < tolerance
1054
+
1055
+ def test_dti_constructor_object_float_matches_float_dtype(self):
1056
+ # GH#55780
1057
+ arr = np.array([0, np.nan], dtype=np.float64)
1058
+ arr2 = arr.astype(object)
1059
+
1060
+ dti1 = DatetimeIndex(arr, tz="CET")
1061
+ dti2 = DatetimeIndex(arr2, tz="CET")
1062
+ tm.assert_index_equal(dti1, dti2)
1063
+
1064
+ @pytest.mark.parametrize("dtype", ["M8[us]", "M8[us, US/Pacific]"])
1065
+ def test_dti_constructor_with_dtype_object_int_matches_int_dtype(self, dtype):
1066
+ # Going through the object path should match the non-object path
1067
+
1068
+ vals1 = np.arange(5, dtype="i8") * 1000
1069
+ vals1[0] = pd.NaT.value
1070
+
1071
+ vals2 = vals1.astype(np.float64)
1072
+ vals2[0] = np.nan
1073
+
1074
+ vals3 = vals1.astype(object)
1075
+ # change lib.infer_dtype(vals3) from "integer" so we go through
1076
+ # array_to_datetime in _sequence_to_dt64
1077
+ vals3[0] = pd.NaT
1078
+
1079
+ vals4 = vals2.astype(object)
1080
+
1081
+ res1 = DatetimeIndex(vals1, dtype=dtype)
1082
+ res2 = DatetimeIndex(vals2, dtype=dtype)
1083
+ res3 = DatetimeIndex(vals3, dtype=dtype)
1084
+ res4 = DatetimeIndex(vals4, dtype=dtype)
1085
+
1086
+ expected = DatetimeIndex(vals1.view("M8[us]"))
1087
+ if res1.tz is not None:
1088
+ expected = expected.tz_localize("UTC").tz_convert(res1.tz)
1089
+ tm.assert_index_equal(res1, expected)
1090
+ tm.assert_index_equal(res2, expected)
1091
+ tm.assert_index_equal(res3, expected)
1092
+ tm.assert_index_equal(res4, expected)
1093
+
1094
+
1095
+ class TestTimeSeries:
1096
+ def test_dti_constructor_preserve_dti_freq(self):
1097
+ rng = date_range("1/1/2000", "1/2/2000", freq="5min")
1098
+
1099
+ rng2 = DatetimeIndex(rng)
1100
+ assert rng.freq == rng2.freq
1101
+
1102
+ def test_explicit_none_freq(self):
1103
+ # Explicitly passing freq=None is respected
1104
+ rng = date_range("1/1/2000", "1/2/2000", freq="5min")
1105
+
1106
+ result = DatetimeIndex(rng, freq=None)
1107
+ assert result.freq is None
1108
+
1109
+ result = DatetimeIndex(rng._data, freq=None)
1110
+ assert result.freq is None
1111
+
1112
+ def test_dti_constructor_small_int(self, any_int_numpy_dtype):
1113
+ # see gh-13721
1114
+ exp = DatetimeIndex(
1115
+ [
1116
+ "1970-01-01 00:00:00.00000000",
1117
+ "1970-01-01 00:00:00.00000001",
1118
+ "1970-01-01 00:00:00.00000002",
1119
+ ]
1120
+ )
1121
+
1122
+ arr = np.array([0, 10, 20], dtype=any_int_numpy_dtype)
1123
+ tm.assert_index_equal(DatetimeIndex(arr), exp)
1124
+
1125
+ def test_ctor_str_intraday(self):
1126
+ rng = DatetimeIndex(["1-1-2000 00:00:01"])
1127
+ assert rng[0].second == 1
1128
+
1129
+ def test_index_cast_datetime64_other_units(self):
1130
+ arr = np.arange(0, 100, 10, dtype=np.int64).view("M8[D]")
1131
+ idx = Index(arr)
1132
+
1133
+ assert (idx.values == astype_overflowsafe(arr, dtype=np.dtype("M8[ns]"))).all()
1134
+
1135
+ def test_constructor_int64_nocopy(self):
1136
+ # GH#1624
1137
+ arr = np.arange(1000, dtype=np.int64)
1138
+ index = DatetimeIndex(arr)
1139
+
1140
+ arr[50:100] = -1
1141
+ assert (index.asi8[50:100] == -1).all()
1142
+
1143
+ arr = np.arange(1000, dtype=np.int64)
1144
+ index = DatetimeIndex(arr, copy=True)
1145
+
1146
+ arr[50:100] = -1
1147
+ assert (index.asi8[50:100] != -1).all()
1148
+
1149
+ @pytest.mark.parametrize(
1150
+ "freq",
1151
+ ["ME", "QE", "YE", "D", "B", "bh", "min", "s", "ms", "us", "h", "ns", "C"],
1152
+ )
1153
+ def test_from_freq_recreate_from_data(self, freq):
1154
+ org = date_range(start="2001/02/01 09:00", freq=freq, periods=1)
1155
+ idx = DatetimeIndex(org, freq=freq)
1156
+ tm.assert_index_equal(idx, org)
1157
+
1158
+ org = date_range(
1159
+ start="2001/02/01 09:00", freq=freq, tz="US/Pacific", periods=1
1160
+ )
1161
+ idx = DatetimeIndex(org, freq=freq, tz="US/Pacific")
1162
+ tm.assert_index_equal(idx, org)
1163
+
1164
+ def test_datetimeindex_constructor_misc(self):
1165
+ arr = ["1/1/2005", "1/2/2005", "Jn 3, 2005", "2005-01-04"]
1166
+ msg = r"(\(')?Unknown datetime string format(:', 'Jn 3, 2005'\))?"
1167
+ with pytest.raises(ValueError, match=msg):
1168
+ DatetimeIndex(arr)
1169
+
1170
+ arr = ["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"]
1171
+ idx1 = DatetimeIndex(arr)
1172
+
1173
+ arr = [datetime(2005, 1, 1), "1/2/2005", "1/3/2005", "2005-01-04"]
1174
+ idx2 = DatetimeIndex(arr)
1175
+
1176
+ arr = [Timestamp(datetime(2005, 1, 1)), "1/2/2005", "1/3/2005", "2005-01-04"]
1177
+ idx3 = DatetimeIndex(arr)
1178
+
1179
+ arr = np.array(["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"], dtype="O")
1180
+ idx4 = DatetimeIndex(arr)
1181
+
1182
+ idx5 = DatetimeIndex(["12/05/2007", "25/01/2008"], dayfirst=True)
1183
+ idx6 = DatetimeIndex(
1184
+ ["2007/05/12", "2008/01/25"], dayfirst=False, yearfirst=True
1185
+ )
1186
+ tm.assert_index_equal(idx5, idx6)
1187
+
1188
+ for other in [idx2, idx3, idx4]:
1189
+ assert (idx1.values == other.values).all()
1190
+
1191
+ def test_dti_constructor_object_dtype_dayfirst_yearfirst_with_tz(self):
1192
+ # GH#55813
1193
+ val = "5/10/16"
1194
+
1195
+ dfirst = Timestamp(2016, 10, 5, tz="US/Pacific")
1196
+ yfirst = Timestamp(2005, 10, 16, tz="US/Pacific")
1197
+
1198
+ result1 = DatetimeIndex([val], tz="US/Pacific", dayfirst=True)
1199
+ expected1 = DatetimeIndex([dfirst])
1200
+ tm.assert_index_equal(result1, expected1)
1201
+
1202
+ result2 = DatetimeIndex([val], tz="US/Pacific", yearfirst=True)
1203
+ expected2 = DatetimeIndex([yfirst])
1204
+ tm.assert_index_equal(result2, expected2)
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_date_range.py ADDED
@@ -0,0 +1,1721 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ test date_range, bdate_range construction from the convenience range functions
3
+ """
4
+
5
+ from datetime import (
6
+ datetime,
7
+ time,
8
+ timedelta,
9
+ )
10
+ import re
11
+
12
+ import numpy as np
13
+ import pytest
14
+ import pytz
15
+ from pytz import timezone
16
+
17
+ from pandas._libs.tslibs import timezones
18
+ from pandas._libs.tslibs.offsets import (
19
+ BDay,
20
+ CDay,
21
+ DateOffset,
22
+ MonthEnd,
23
+ prefix_mapping,
24
+ )
25
+ from pandas.errors import OutOfBoundsDatetime
26
+ import pandas.util._test_decorators as td
27
+
28
+ import pandas as pd
29
+ from pandas import (
30
+ DataFrame,
31
+ DatetimeIndex,
32
+ Series,
33
+ Timedelta,
34
+ Timestamp,
35
+ bdate_range,
36
+ date_range,
37
+ offsets,
38
+ )
39
+ import pandas._testing as tm
40
+ from pandas.core.arrays.datetimes import _generate_range as generate_range
41
+ from pandas.tests.indexes.datetimes.test_timezones import (
42
+ FixedOffset,
43
+ fixed_off_no_name,
44
+ )
45
+
46
+ from pandas.tseries.holiday import USFederalHolidayCalendar
47
+
48
+ START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
49
+
50
+
51
+ def _get_expected_range(
52
+ begin_to_match,
53
+ end_to_match,
54
+ both_range,
55
+ inclusive_endpoints,
56
+ ):
57
+ """Helper to get expected range from a both inclusive range"""
58
+ left_match = begin_to_match == both_range[0]
59
+ right_match = end_to_match == both_range[-1]
60
+
61
+ if inclusive_endpoints == "left" and right_match:
62
+ expected_range = both_range[:-1]
63
+ elif inclusive_endpoints == "right" and left_match:
64
+ expected_range = both_range[1:]
65
+ elif inclusive_endpoints == "neither" and left_match and right_match:
66
+ expected_range = both_range[1:-1]
67
+ elif inclusive_endpoints == "neither" and right_match:
68
+ expected_range = both_range[:-1]
69
+ elif inclusive_endpoints == "neither" and left_match:
70
+ expected_range = both_range[1:]
71
+ elif inclusive_endpoints == "both":
72
+ expected_range = both_range[:]
73
+ else:
74
+ expected_range = both_range[:]
75
+
76
+ return expected_range
77
+
78
+
79
+ class TestTimestampEquivDateRange:
80
+ # Older tests in TestTimeSeries constructed their `stamp` objects
81
+ # using `date_range` instead of the `Timestamp` constructor.
82
+ # TestTimestampEquivDateRange checks that these are equivalent in the
83
+ # pertinent cases.
84
+
85
+ def test_date_range_timestamp_equiv(self):
86
+ rng = date_range("20090415", "20090519", tz="US/Eastern")
87
+ stamp = rng[0]
88
+
89
+ ts = Timestamp("20090415", tz="US/Eastern")
90
+ assert ts == stamp
91
+
92
+ def test_date_range_timestamp_equiv_dateutil(self):
93
+ rng = date_range("20090415", "20090519", tz="dateutil/US/Eastern")
94
+ stamp = rng[0]
95
+
96
+ ts = Timestamp("20090415", tz="dateutil/US/Eastern")
97
+ assert ts == stamp
98
+
99
+ def test_date_range_timestamp_equiv_explicit_pytz(self):
100
+ rng = date_range("20090415", "20090519", tz=pytz.timezone("US/Eastern"))
101
+ stamp = rng[0]
102
+
103
+ ts = Timestamp("20090415", tz=pytz.timezone("US/Eastern"))
104
+ assert ts == stamp
105
+
106
+ @td.skip_if_windows
107
+ def test_date_range_timestamp_equiv_explicit_dateutil(self):
108
+ from pandas._libs.tslibs.timezones import dateutil_gettz as gettz
109
+
110
+ rng = date_range("20090415", "20090519", tz=gettz("US/Eastern"))
111
+ stamp = rng[0]
112
+
113
+ ts = Timestamp("20090415", tz=gettz("US/Eastern"))
114
+ assert ts == stamp
115
+
116
+ def test_date_range_timestamp_equiv_from_datetime_instance(self):
117
+ datetime_instance = datetime(2014, 3, 4)
118
+ # build a timestamp with a frequency, since then it supports
119
+ # addition/subtraction of integers
120
+ timestamp_instance = date_range(datetime_instance, periods=1, freq="D")[0]
121
+
122
+ ts = Timestamp(datetime_instance)
123
+ assert ts == timestamp_instance
124
+
125
+ def test_date_range_timestamp_equiv_preserve_frequency(self):
126
+ timestamp_instance = date_range("2014-03-05", periods=1, freq="D")[0]
127
+ ts = Timestamp("2014-03-05")
128
+
129
+ assert timestamp_instance == ts
130
+
131
+
132
+ class TestDateRanges:
133
+ def test_date_range_name(self):
134
+ idx = date_range(start="2000-01-01", periods=1, freq="YE", name="TEST")
135
+ assert idx.name == "TEST"
136
+
137
+ def test_date_range_invalid_periods(self):
138
+ msg = "periods must be a number, got foo"
139
+ with pytest.raises(TypeError, match=msg):
140
+ date_range(start="1/1/2000", periods="foo", freq="D")
141
+
142
+ def test_date_range_fractional_period(self):
143
+ msg = "Non-integer 'periods' in pd.date_range, pd.timedelta_range"
144
+ with tm.assert_produces_warning(FutureWarning, match=msg):
145
+ rng = date_range("1/1/2000", periods=10.5)
146
+ exp = date_range("1/1/2000", periods=10)
147
+ tm.assert_index_equal(rng, exp)
148
+
149
+ @pytest.mark.parametrize(
150
+ "freq,freq_depr",
151
+ [
152
+ ("2ME", "2M"),
153
+ ("2SME", "2SM"),
154
+ ("2BQE", "2BQ"),
155
+ ("2BYE", "2BY"),
156
+ ],
157
+ )
158
+ def test_date_range_frequency_M_SM_BQ_BY_deprecated(self, freq, freq_depr):
159
+ # GH#52064
160
+ depr_msg = f"'{freq_depr[1:]}' is deprecated and will be removed "
161
+ f"in a future version, please use '{freq[1:]}' instead."
162
+
163
+ expected = date_range("1/1/2000", periods=4, freq=freq)
164
+ with tm.assert_produces_warning(FutureWarning, match=depr_msg):
165
+ result = date_range("1/1/2000", periods=4, freq=freq_depr)
166
+ tm.assert_index_equal(result, expected)
167
+
168
+ def test_date_range_tuple_freq_raises(self):
169
+ # GH#34703
170
+ edate = datetime(2000, 1, 1)
171
+ with pytest.raises(TypeError, match="pass as a string instead"):
172
+ date_range(end=edate, freq=("D", 5), periods=20)
173
+
174
+ @pytest.mark.parametrize("freq", ["ns", "us", "ms", "min", "s", "h", "D"])
175
+ def test_date_range_edges(self, freq):
176
+ # GH#13672
177
+ td = Timedelta(f"1{freq}")
178
+ ts = Timestamp("1970-01-01")
179
+
180
+ idx = date_range(
181
+ start=ts + td,
182
+ end=ts + 4 * td,
183
+ freq=freq,
184
+ )
185
+ exp = DatetimeIndex(
186
+ [ts + n * td for n in range(1, 5)],
187
+ dtype="M8[ns]",
188
+ freq=freq,
189
+ )
190
+ tm.assert_index_equal(idx, exp)
191
+
192
+ # start after end
193
+ idx = date_range(
194
+ start=ts + 4 * td,
195
+ end=ts + td,
196
+ freq=freq,
197
+ )
198
+ exp = DatetimeIndex([], dtype="M8[ns]", freq=freq)
199
+ tm.assert_index_equal(idx, exp)
200
+
201
+ # start matches end
202
+ idx = date_range(
203
+ start=ts + td,
204
+ end=ts + td,
205
+ freq=freq,
206
+ )
207
+ exp = DatetimeIndex([ts + td], dtype="M8[ns]", freq=freq)
208
+ tm.assert_index_equal(idx, exp)
209
+
210
+ def test_date_range_near_implementation_bound(self):
211
+ # GH#???
212
+ freq = Timedelta(1)
213
+
214
+ with pytest.raises(OutOfBoundsDatetime, match="Cannot generate range with"):
215
+ date_range(end=Timestamp.min, periods=2, freq=freq)
216
+
217
+ def test_date_range_nat(self):
218
+ # GH#11587
219
+ msg = "Neither `start` nor `end` can be NaT"
220
+ with pytest.raises(ValueError, match=msg):
221
+ date_range(start="2016-01-01", end=pd.NaT, freq="D")
222
+ with pytest.raises(ValueError, match=msg):
223
+ date_range(start=pd.NaT, end="2016-01-01", freq="D")
224
+
225
+ def test_date_range_multiplication_overflow(self):
226
+ # GH#24255
227
+ # check that overflows in calculating `addend = periods * stride`
228
+ # are caught
229
+ with tm.assert_produces_warning(None):
230
+ # we should _not_ be seeing a overflow RuntimeWarning
231
+ dti = date_range(start="1677-09-22", periods=213503, freq="D")
232
+
233
+ assert dti[0] == Timestamp("1677-09-22")
234
+ assert len(dti) == 213503
235
+
236
+ msg = "Cannot generate range with"
237
+ with pytest.raises(OutOfBoundsDatetime, match=msg):
238
+ date_range("1969-05-04", periods=200000000, freq="30000D")
239
+
240
+ def test_date_range_unsigned_overflow_handling(self):
241
+ # GH#24255
242
+ # case where `addend = periods * stride` overflows int64 bounds
243
+ # but not uint64 bounds
244
+ dti = date_range(start="1677-09-22", end="2262-04-11", freq="D")
245
+
246
+ dti2 = date_range(start=dti[0], periods=len(dti), freq="D")
247
+ assert dti2.equals(dti)
248
+
249
+ dti3 = date_range(end=dti[-1], periods=len(dti), freq="D")
250
+ assert dti3.equals(dti)
251
+
252
+ def test_date_range_int64_overflow_non_recoverable(self):
253
+ # GH#24255
254
+ # case with start later than 1970-01-01, overflow int64 but not uint64
255
+ msg = "Cannot generate range with"
256
+ with pytest.raises(OutOfBoundsDatetime, match=msg):
257
+ date_range(start="1970-02-01", periods=106752 * 24, freq="h")
258
+
259
+ # case with end before 1970-01-01, overflow int64 but not uint64
260
+ with pytest.raises(OutOfBoundsDatetime, match=msg):
261
+ date_range(end="1969-11-14", periods=106752 * 24, freq="h")
262
+
263
+ @pytest.mark.slow
264
+ @pytest.mark.parametrize(
265
+ "s_ts, e_ts", [("2262-02-23", "1969-11-14"), ("1970-02-01", "1677-10-22")]
266
+ )
267
+ def test_date_range_int64_overflow_stride_endpoint_different_signs(
268
+ self, s_ts, e_ts
269
+ ):
270
+ # cases where stride * periods overflow int64 and stride/endpoint
271
+ # have different signs
272
+ start = Timestamp(s_ts)
273
+ end = Timestamp(e_ts)
274
+
275
+ expected = date_range(start=start, end=end, freq="-1h")
276
+ assert expected[0] == start
277
+ assert expected[-1] == end
278
+
279
+ dti = date_range(end=end, periods=len(expected), freq="-1h")
280
+ tm.assert_index_equal(dti, expected)
281
+
282
+ def test_date_range_out_of_bounds(self):
283
+ # GH#14187
284
+ msg = "Cannot generate range"
285
+ with pytest.raises(OutOfBoundsDatetime, match=msg):
286
+ date_range("2016-01-01", periods=100000, freq="D")
287
+ with pytest.raises(OutOfBoundsDatetime, match=msg):
288
+ date_range(end="1763-10-12", periods=100000, freq="D")
289
+
290
+ def test_date_range_gen_error(self):
291
+ rng = date_range("1/1/2000 00:00", "1/1/2000 00:18", freq="5min")
292
+ assert len(rng) == 4
293
+
294
+ def test_date_range_normalize(self):
295
+ snap = datetime.today()
296
+ n = 50
297
+
298
+ rng = date_range(snap, periods=n, normalize=False, freq="2D")
299
+
300
+ offset = timedelta(2)
301
+ expected = DatetimeIndex(
302
+ [snap + i * offset for i in range(n)], dtype="M8[ns]", freq=offset
303
+ )
304
+
305
+ tm.assert_index_equal(rng, expected)
306
+
307
+ rng = date_range("1/1/2000 08:15", periods=n, normalize=False, freq="B")
308
+ the_time = time(8, 15)
309
+ for val in rng:
310
+ assert val.time() == the_time
311
+
312
+ def test_date_range_ambiguous_arguments(self):
313
+ # #2538
314
+ start = datetime(2011, 1, 1, 5, 3, 40)
315
+ end = datetime(2011, 1, 1, 8, 9, 40)
316
+
317
+ msg = (
318
+ "Of the four parameters: start, end, periods, and "
319
+ "freq, exactly three must be specified"
320
+ )
321
+ with pytest.raises(ValueError, match=msg):
322
+ date_range(start, end, periods=10, freq="s")
323
+
324
+ def test_date_range_convenience_periods(self, unit):
325
+ # GH 20808
326
+ result = date_range("2018-04-24", "2018-04-27", periods=3, unit=unit)
327
+ expected = DatetimeIndex(
328
+ ["2018-04-24 00:00:00", "2018-04-25 12:00:00", "2018-04-27 00:00:00"],
329
+ dtype=f"M8[{unit}]",
330
+ freq=None,
331
+ )
332
+
333
+ tm.assert_index_equal(result, expected)
334
+
335
+ # Test if spacing remains linear if tz changes to dst in range
336
+ result = date_range(
337
+ "2018-04-01 01:00:00",
338
+ "2018-04-01 04:00:00",
339
+ tz="Australia/Sydney",
340
+ periods=3,
341
+ unit=unit,
342
+ )
343
+ expected = DatetimeIndex(
344
+ [
345
+ Timestamp("2018-04-01 01:00:00+1100", tz="Australia/Sydney"),
346
+ Timestamp("2018-04-01 02:00:00+1000", tz="Australia/Sydney"),
347
+ Timestamp("2018-04-01 04:00:00+1000", tz="Australia/Sydney"),
348
+ ]
349
+ ).as_unit(unit)
350
+ tm.assert_index_equal(result, expected)
351
+
352
+ def test_date_range_index_comparison(self):
353
+ rng = date_range("2011-01-01", periods=3, tz="US/Eastern")
354
+ df = Series(rng).to_frame()
355
+ arr = np.array([rng.to_list()]).T
356
+ arr2 = np.array([rng]).T
357
+
358
+ with pytest.raises(ValueError, match="Unable to coerce to Series"):
359
+ rng == df
360
+
361
+ with pytest.raises(ValueError, match="Unable to coerce to Series"):
362
+ df == rng
363
+
364
+ expected = DataFrame([True, True, True])
365
+
366
+ results = df == arr2
367
+ tm.assert_frame_equal(results, expected)
368
+
369
+ expected = Series([True, True, True], name=0)
370
+
371
+ results = df[0] == arr2[:, 0]
372
+ tm.assert_series_equal(results, expected)
373
+
374
+ expected = np.array(
375
+ [[True, False, False], [False, True, False], [False, False, True]]
376
+ )
377
+ results = rng == arr
378
+ tm.assert_numpy_array_equal(results, expected)
379
+
380
+ @pytest.mark.parametrize(
381
+ "start,end,result_tz",
382
+ [
383
+ ["20180101", "20180103", "US/Eastern"],
384
+ [datetime(2018, 1, 1), datetime(2018, 1, 3), "US/Eastern"],
385
+ [Timestamp("20180101"), Timestamp("20180103"), "US/Eastern"],
386
+ [
387
+ Timestamp("20180101", tz="US/Eastern"),
388
+ Timestamp("20180103", tz="US/Eastern"),
389
+ "US/Eastern",
390
+ ],
391
+ [
392
+ Timestamp("20180101", tz="US/Eastern"),
393
+ Timestamp("20180103", tz="US/Eastern"),
394
+ None,
395
+ ],
396
+ ],
397
+ )
398
+ def test_date_range_linspacing_tz(self, start, end, result_tz):
399
+ # GH 20983
400
+ result = date_range(start, end, periods=3, tz=result_tz)
401
+ expected = date_range("20180101", periods=3, freq="D", tz="US/Eastern")
402
+ tm.assert_index_equal(result, expected)
403
+
404
+ def test_date_range_timedelta(self):
405
+ start = "2020-01-01"
406
+ end = "2020-01-11"
407
+ rng1 = date_range(start, end, freq="3D")
408
+ rng2 = date_range(start, end, freq=timedelta(days=3))
409
+ tm.assert_index_equal(rng1, rng2)
410
+
411
+ def test_range_misspecified(self):
412
+ # GH #1095
413
+ msg = (
414
+ "Of the four parameters: start, end, periods, and "
415
+ "freq, exactly three must be specified"
416
+ )
417
+
418
+ with pytest.raises(ValueError, match=msg):
419
+ date_range(start="1/1/2000")
420
+
421
+ with pytest.raises(ValueError, match=msg):
422
+ date_range(end="1/1/2000")
423
+
424
+ with pytest.raises(ValueError, match=msg):
425
+ date_range(periods=10)
426
+
427
+ with pytest.raises(ValueError, match=msg):
428
+ date_range(start="1/1/2000", freq="h")
429
+
430
+ with pytest.raises(ValueError, match=msg):
431
+ date_range(end="1/1/2000", freq="h")
432
+
433
+ with pytest.raises(ValueError, match=msg):
434
+ date_range(periods=10, freq="h")
435
+
436
+ with pytest.raises(ValueError, match=msg):
437
+ date_range()
438
+
439
+ def test_compat_replace(self):
440
+ # https://github.com/statsmodels/statsmodels/issues/3349
441
+ # replace should take ints/longs for compat
442
+ result = date_range(Timestamp("1960-04-01 00:00:00"), periods=76, freq="QS-JAN")
443
+ assert len(result) == 76
444
+
445
+ def test_catch_infinite_loop(self):
446
+ offset = offsets.DateOffset(minute=5)
447
+ # blow up, don't loop forever
448
+ msg = "Offset <DateOffset: minute=5> did not increment date"
449
+ with pytest.raises(ValueError, match=msg):
450
+ date_range(datetime(2011, 11, 11), datetime(2011, 11, 12), freq=offset)
451
+
452
+ def test_construct_over_dst(self, unit):
453
+ # GH 20854
454
+ pre_dst = Timestamp("2010-11-07 01:00:00").tz_localize(
455
+ "US/Pacific", ambiguous=True
456
+ )
457
+ pst_dst = Timestamp("2010-11-07 01:00:00").tz_localize(
458
+ "US/Pacific", ambiguous=False
459
+ )
460
+ expect_data = [
461
+ Timestamp("2010-11-07 00:00:00", tz="US/Pacific"),
462
+ pre_dst,
463
+ pst_dst,
464
+ ]
465
+ expected = DatetimeIndex(expect_data, freq="h").as_unit(unit)
466
+ result = date_range(
467
+ start="2010-11-7", periods=3, freq="h", tz="US/Pacific", unit=unit
468
+ )
469
+ tm.assert_index_equal(result, expected)
470
+
471
+ def test_construct_with_different_start_end_string_format(self, unit):
472
+ # GH 12064
473
+ result = date_range(
474
+ "2013-01-01 00:00:00+09:00",
475
+ "2013/01/01 02:00:00+09:00",
476
+ freq="h",
477
+ unit=unit,
478
+ )
479
+ expected = DatetimeIndex(
480
+ [
481
+ Timestamp("2013-01-01 00:00:00+09:00"),
482
+ Timestamp("2013-01-01 01:00:00+09:00"),
483
+ Timestamp("2013-01-01 02:00:00+09:00"),
484
+ ],
485
+ freq="h",
486
+ ).as_unit(unit)
487
+ tm.assert_index_equal(result, expected)
488
+
489
+ def test_error_with_zero_monthends(self):
490
+ msg = r"Offset <0 \* MonthEnds> did not increment date"
491
+ with pytest.raises(ValueError, match=msg):
492
+ date_range("1/1/2000", "1/1/2001", freq=MonthEnd(0))
493
+
494
+ def test_range_bug(self, unit):
495
+ # GH #770
496
+ offset = DateOffset(months=3)
497
+ result = date_range("2011-1-1", "2012-1-31", freq=offset, unit=unit)
498
+
499
+ start = datetime(2011, 1, 1)
500
+ expected = DatetimeIndex(
501
+ [start + i * offset for i in range(5)], dtype=f"M8[{unit}]", freq=offset
502
+ )
503
+ tm.assert_index_equal(result, expected)
504
+
505
+ def test_range_tz_pytz(self):
506
+ # see gh-2906
507
+ tz = timezone("US/Eastern")
508
+ start = tz.localize(datetime(2011, 1, 1))
509
+ end = tz.localize(datetime(2011, 1, 3))
510
+
511
+ dr = date_range(start=start, periods=3)
512
+ assert dr.tz.zone == tz.zone
513
+ assert dr[0] == start
514
+ assert dr[2] == end
515
+
516
+ dr = date_range(end=end, periods=3)
517
+ assert dr.tz.zone == tz.zone
518
+ assert dr[0] == start
519
+ assert dr[2] == end
520
+
521
+ dr = date_range(start=start, end=end)
522
+ assert dr.tz.zone == tz.zone
523
+ assert dr[0] == start
524
+ assert dr[2] == end
525
+
526
+ @pytest.mark.parametrize(
527
+ "start, end",
528
+ [
529
+ [
530
+ Timestamp(datetime(2014, 3, 6), tz="US/Eastern"),
531
+ Timestamp(datetime(2014, 3, 12), tz="US/Eastern"),
532
+ ],
533
+ [
534
+ Timestamp(datetime(2013, 11, 1), tz="US/Eastern"),
535
+ Timestamp(datetime(2013, 11, 6), tz="US/Eastern"),
536
+ ],
537
+ ],
538
+ )
539
+ def test_range_tz_dst_straddle_pytz(self, start, end):
540
+ dr = date_range(start, end, freq="D")
541
+ assert dr[0] == start
542
+ assert dr[-1] == end
543
+ assert np.all(dr.hour == 0)
544
+
545
+ dr = date_range(start, end, freq="D", tz="US/Eastern")
546
+ assert dr[0] == start
547
+ assert dr[-1] == end
548
+ assert np.all(dr.hour == 0)
549
+
550
+ dr = date_range(
551
+ start.replace(tzinfo=None),
552
+ end.replace(tzinfo=None),
553
+ freq="D",
554
+ tz="US/Eastern",
555
+ )
556
+ assert dr[0] == start
557
+ assert dr[-1] == end
558
+ assert np.all(dr.hour == 0)
559
+
560
+ def test_range_tz_dateutil(self):
561
+ # see gh-2906
562
+
563
+ # Use maybe_get_tz to fix filename in tz under dateutil.
564
+ from pandas._libs.tslibs.timezones import maybe_get_tz
565
+
566
+ tz = lambda x: maybe_get_tz("dateutil/" + x)
567
+
568
+ start = datetime(2011, 1, 1, tzinfo=tz("US/Eastern"))
569
+ end = datetime(2011, 1, 3, tzinfo=tz("US/Eastern"))
570
+
571
+ dr = date_range(start=start, periods=3)
572
+ assert dr.tz == tz("US/Eastern")
573
+ assert dr[0] == start
574
+ assert dr[2] == end
575
+
576
+ dr = date_range(end=end, periods=3)
577
+ assert dr.tz == tz("US/Eastern")
578
+ assert dr[0] == start
579
+ assert dr[2] == end
580
+
581
+ dr = date_range(start=start, end=end)
582
+ assert dr.tz == tz("US/Eastern")
583
+ assert dr[0] == start
584
+ assert dr[2] == end
585
+
586
+ @pytest.mark.parametrize("freq", ["1D", "3D", "2ME", "7W", "3h", "YE"])
587
+ @pytest.mark.parametrize("tz", [None, "US/Eastern"])
588
+ def test_range_closed(self, freq, tz, inclusive_endpoints_fixture):
589
+ # GH#12409, GH#12684
590
+
591
+ begin = Timestamp("2011/1/1", tz=tz)
592
+ end = Timestamp("2014/1/1", tz=tz)
593
+
594
+ result_range = date_range(
595
+ begin, end, inclusive=inclusive_endpoints_fixture, freq=freq
596
+ )
597
+ both_range = date_range(begin, end, inclusive="both", freq=freq)
598
+ expected_range = _get_expected_range(
599
+ begin, end, both_range, inclusive_endpoints_fixture
600
+ )
601
+
602
+ tm.assert_index_equal(expected_range, result_range)
603
+
604
+ @pytest.mark.parametrize("freq", ["1D", "3D", "2ME", "7W", "3h", "YE"])
605
+ def test_range_with_tz_closed_with_tz_aware_start_end(
606
+ self, freq, inclusive_endpoints_fixture
607
+ ):
608
+ begin = Timestamp("2011/1/1")
609
+ end = Timestamp("2014/1/1")
610
+ begintz = Timestamp("2011/1/1", tz="US/Eastern")
611
+ endtz = Timestamp("2014/1/1", tz="US/Eastern")
612
+
613
+ result_range = date_range(
614
+ begin,
615
+ end,
616
+ inclusive=inclusive_endpoints_fixture,
617
+ freq=freq,
618
+ tz="US/Eastern",
619
+ )
620
+ both_range = date_range(
621
+ begin, end, inclusive="both", freq=freq, tz="US/Eastern"
622
+ )
623
+ expected_range = _get_expected_range(
624
+ begintz,
625
+ endtz,
626
+ both_range,
627
+ inclusive_endpoints_fixture,
628
+ )
629
+
630
+ tm.assert_index_equal(expected_range, result_range)
631
+
632
+ def test_range_closed_boundary(self, inclusive_endpoints_fixture):
633
+ # GH#11804
634
+ right_boundary = date_range(
635
+ "2015-09-12",
636
+ "2015-12-01",
637
+ freq="QS-MAR",
638
+ inclusive=inclusive_endpoints_fixture,
639
+ )
640
+ left_boundary = date_range(
641
+ "2015-09-01",
642
+ "2015-09-12",
643
+ freq="QS-MAR",
644
+ inclusive=inclusive_endpoints_fixture,
645
+ )
646
+ both_boundary = date_range(
647
+ "2015-09-01",
648
+ "2015-12-01",
649
+ freq="QS-MAR",
650
+ inclusive=inclusive_endpoints_fixture,
651
+ )
652
+ neither_boundary = date_range(
653
+ "2015-09-11",
654
+ "2015-09-12",
655
+ freq="QS-MAR",
656
+ inclusive=inclusive_endpoints_fixture,
657
+ )
658
+
659
+ expected_right = both_boundary
660
+ expected_left = both_boundary
661
+ expected_both = both_boundary
662
+
663
+ if inclusive_endpoints_fixture == "right":
664
+ expected_left = both_boundary[1:]
665
+ elif inclusive_endpoints_fixture == "left":
666
+ expected_right = both_boundary[:-1]
667
+ elif inclusive_endpoints_fixture == "both":
668
+ expected_right = both_boundary[1:]
669
+ expected_left = both_boundary[:-1]
670
+
671
+ expected_neither = both_boundary[1:-1]
672
+
673
+ tm.assert_index_equal(right_boundary, expected_right)
674
+ tm.assert_index_equal(left_boundary, expected_left)
675
+ tm.assert_index_equal(both_boundary, expected_both)
676
+ tm.assert_index_equal(neither_boundary, expected_neither)
677
+
678
+ def test_date_range_years_only(self, tz_naive_fixture):
679
+ tz = tz_naive_fixture
680
+ # GH#6961
681
+ rng1 = date_range("2014", "2015", freq="ME", tz=tz)
682
+ expected1 = date_range("2014-01-31", "2014-12-31", freq="ME", tz=tz)
683
+ tm.assert_index_equal(rng1, expected1)
684
+
685
+ rng2 = date_range("2014", "2015", freq="MS", tz=tz)
686
+ expected2 = date_range("2014-01-01", "2015-01-01", freq="MS", tz=tz)
687
+ tm.assert_index_equal(rng2, expected2)
688
+
689
+ rng3 = date_range("2014", "2020", freq="YE", tz=tz)
690
+ expected3 = date_range("2014-12-31", "2019-12-31", freq="YE", tz=tz)
691
+ tm.assert_index_equal(rng3, expected3)
692
+
693
+ rng4 = date_range("2014", "2020", freq="YS", tz=tz)
694
+ expected4 = date_range("2014-01-01", "2020-01-01", freq="YS", tz=tz)
695
+ tm.assert_index_equal(rng4, expected4)
696
+
697
+ def test_freq_divides_end_in_nanos(self):
698
+ # GH 10885
699
+ result_1 = date_range("2005-01-12 10:00", "2005-01-12 16:00", freq="345min")
700
+ result_2 = date_range("2005-01-13 10:00", "2005-01-13 16:00", freq="345min")
701
+ expected_1 = DatetimeIndex(
702
+ ["2005-01-12 10:00:00", "2005-01-12 15:45:00"],
703
+ dtype="datetime64[ns]",
704
+ freq="345min",
705
+ tz=None,
706
+ )
707
+ expected_2 = DatetimeIndex(
708
+ ["2005-01-13 10:00:00", "2005-01-13 15:45:00"],
709
+ dtype="datetime64[ns]",
710
+ freq="345min",
711
+ tz=None,
712
+ )
713
+ tm.assert_index_equal(result_1, expected_1)
714
+ tm.assert_index_equal(result_2, expected_2)
715
+
716
+ def test_cached_range_bug(self):
717
+ rng = date_range("2010-09-01 05:00:00", periods=50, freq=DateOffset(hours=6))
718
+ assert len(rng) == 50
719
+ assert rng[0] == datetime(2010, 9, 1, 5)
720
+
721
+ def test_timezone_comparison_bug(self):
722
+ # smoke test
723
+ start = Timestamp("20130220 10:00", tz="US/Eastern")
724
+ result = date_range(start, periods=2, tz="US/Eastern")
725
+ assert len(result) == 2
726
+
727
+ def test_timezone_comparison_assert(self):
728
+ start = Timestamp("20130220 10:00", tz="US/Eastern")
729
+ msg = "Inferred time zone not equal to passed time zone"
730
+ with pytest.raises(AssertionError, match=msg):
731
+ date_range(start, periods=2, tz="Europe/Berlin")
732
+
733
+ def test_negative_non_tick_frequency_descending_dates(self, tz_aware_fixture):
734
+ # GH 23270
735
+ tz = tz_aware_fixture
736
+ result = date_range(start="2011-06-01", end="2011-01-01", freq="-1MS", tz=tz)
737
+ expected = date_range(end="2011-06-01", start="2011-01-01", freq="1MS", tz=tz)[
738
+ ::-1
739
+ ]
740
+ tm.assert_index_equal(result, expected)
741
+
742
+ def test_range_where_start_equal_end(self, inclusive_endpoints_fixture):
743
+ # GH 43394
744
+ start = "2021-09-02"
745
+ end = "2021-09-02"
746
+ result = date_range(
747
+ start=start, end=end, freq="D", inclusive=inclusive_endpoints_fixture
748
+ )
749
+
750
+ both_range = date_range(start=start, end=end, freq="D", inclusive="both")
751
+ if inclusive_endpoints_fixture == "neither":
752
+ expected = both_range[1:-1]
753
+ elif inclusive_endpoints_fixture in ("left", "right", "both"):
754
+ expected = both_range[:]
755
+
756
+ tm.assert_index_equal(result, expected)
757
+
758
+ def test_freq_dateoffset_with_relateivedelta_nanos(self):
759
+ # GH 46877
760
+ freq = DateOffset(hours=10, days=57, nanoseconds=3)
761
+ result = date_range(end="1970-01-01 00:00:00", periods=10, freq=freq, name="a")
762
+ expected = DatetimeIndex(
763
+ [
764
+ "1968-08-02T05:59:59.999999973",
765
+ "1968-09-28T15:59:59.999999976",
766
+ "1968-11-25T01:59:59.999999979",
767
+ "1969-01-21T11:59:59.999999982",
768
+ "1969-03-19T21:59:59.999999985",
769
+ "1969-05-16T07:59:59.999999988",
770
+ "1969-07-12T17:59:59.999999991",
771
+ "1969-09-08T03:59:59.999999994",
772
+ "1969-11-04T13:59:59.999999997",
773
+ "1970-01-01T00:00:00.000000000",
774
+ ],
775
+ name="a",
776
+ )
777
+ tm.assert_index_equal(result, expected)
778
+
779
+ @pytest.mark.parametrize(
780
+ "freq,freq_depr",
781
+ [
782
+ ("h", "H"),
783
+ ("2min", "2T"),
784
+ ("1s", "1S"),
785
+ ("2ms", "2L"),
786
+ ("1us", "1U"),
787
+ ("2ns", "2N"),
788
+ ],
789
+ )
790
+ def test_frequencies_H_T_S_L_U_N_deprecated(self, freq, freq_depr):
791
+ # GH#52536
792
+ freq_msg = re.split("[0-9]*", freq, maxsplit=1)[1]
793
+ freq_depr_msg = re.split("[0-9]*", freq_depr, maxsplit=1)[1]
794
+ msg = (
795
+ f"'{freq_depr_msg}' is deprecated and will be removed in a future version, "
796
+ )
797
+ f"please use '{freq_msg}' instead"
798
+
799
+ expected = date_range("1/1/2000", periods=2, freq=freq)
800
+ with tm.assert_produces_warning(FutureWarning, match=msg):
801
+ result = date_range("1/1/2000", periods=2, freq=freq_depr)
802
+ tm.assert_index_equal(result, expected)
803
+
804
+ @pytest.mark.parametrize(
805
+ "freq,freq_depr",
806
+ [
807
+ ("200YE", "200A"),
808
+ ("YE", "Y"),
809
+ ("2YE-MAY", "2A-MAY"),
810
+ ("YE-MAY", "Y-MAY"),
811
+ ],
812
+ )
813
+ def test_frequencies_A_deprecated_Y_renamed(self, freq, freq_depr):
814
+ # GH#9586, GH#54275
815
+ freq_msg = re.split("[0-9]*", freq, maxsplit=1)[1]
816
+ freq_depr_msg = re.split("[0-9]*", freq_depr, maxsplit=1)[1]
817
+ msg = f"'{freq_depr_msg}' is deprecated and will be removed "
818
+ f"in a future version, please use '{freq_msg}' instead."
819
+
820
+ expected = date_range("1/1/2000", periods=2, freq=freq)
821
+ with tm.assert_produces_warning(FutureWarning, match=msg):
822
+ result = date_range("1/1/2000", periods=2, freq=freq_depr)
823
+ tm.assert_index_equal(result, expected)
824
+
825
+ def test_to_offset_with_lowercase_deprecated_freq(self) -> None:
826
+ # https://github.com/pandas-dev/pandas/issues/56847
827
+ msg = (
828
+ "'m' is deprecated and will be removed in a future version, please use "
829
+ "'ME' instead."
830
+ )
831
+ with tm.assert_produces_warning(FutureWarning, match=msg):
832
+ result = date_range("2010-01-01", periods=2, freq="m")
833
+ expected = DatetimeIndex(["2010-01-31", "2010-02-28"], freq="ME")
834
+ tm.assert_index_equal(result, expected)
835
+
836
+ def test_date_range_bday(self):
837
+ sdate = datetime(1999, 12, 25)
838
+ idx = date_range(start=sdate, freq="1B", periods=20)
839
+ assert len(idx) == 20
840
+ assert idx[0] == sdate + 0 * offsets.BDay()
841
+ assert idx.freq == "B"
842
+
843
+
844
+ class TestDateRangeTZ:
845
+ """Tests for date_range with timezones"""
846
+
847
+ def test_hongkong_tz_convert(self):
848
+ # GH#1673 smoke test
849
+ dr = date_range("2012-01-01", "2012-01-10", freq="D", tz="Hongkong")
850
+
851
+ # it works!
852
+ dr.hour
853
+
854
+ @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
855
+ def test_date_range_span_dst_transition(self, tzstr):
856
+ # GH#1778
857
+
858
+ # Standard -> Daylight Savings Time
859
+ dr = date_range("03/06/2012 00:00", periods=200, freq="W-FRI", tz="US/Eastern")
860
+
861
+ assert (dr.hour == 0).all()
862
+
863
+ dr = date_range("2012-11-02", periods=10, tz=tzstr)
864
+ result = dr.hour
865
+ expected = pd.Index([0] * 10, dtype="int32")
866
+ tm.assert_index_equal(result, expected)
867
+
868
+ @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
869
+ def test_date_range_timezone_str_argument(self, tzstr):
870
+ tz = timezones.maybe_get_tz(tzstr)
871
+ result = date_range("1/1/2000", periods=10, tz=tzstr)
872
+ expected = date_range("1/1/2000", periods=10, tz=tz)
873
+
874
+ tm.assert_index_equal(result, expected)
875
+
876
+ def test_date_range_with_fixed_tz(self):
877
+ off = FixedOffset(420, "+07:00")
878
+ start = datetime(2012, 3, 11, 5, 0, 0, tzinfo=off)
879
+ end = datetime(2012, 6, 11, 5, 0, 0, tzinfo=off)
880
+ rng = date_range(start=start, end=end)
881
+ assert off == rng.tz
882
+
883
+ rng2 = date_range(start, periods=len(rng), tz=off)
884
+ tm.assert_index_equal(rng, rng2)
885
+
886
+ rng3 = date_range("3/11/2012 05:00:00+07:00", "6/11/2012 05:00:00+07:00")
887
+ assert (rng.values == rng3.values).all()
888
+
889
+ def test_date_range_with_fixedoffset_noname(self):
890
+ off = fixed_off_no_name
891
+ start = datetime(2012, 3, 11, 5, 0, 0, tzinfo=off)
892
+ end = datetime(2012, 6, 11, 5, 0, 0, tzinfo=off)
893
+ rng = date_range(start=start, end=end)
894
+ assert off == rng.tz
895
+
896
+ idx = pd.Index([start, end])
897
+ assert off == idx.tz
898
+
899
+ @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
900
+ def test_date_range_with_tz(self, tzstr):
901
+ stamp = Timestamp("3/11/2012 05:00", tz=tzstr)
902
+ assert stamp.hour == 5
903
+
904
+ rng = date_range("3/11/2012 04:00", periods=10, freq="h", tz=tzstr)
905
+
906
+ assert stamp == rng[1]
907
+
908
+ @pytest.mark.parametrize("tz", ["Europe/London", "dateutil/Europe/London"])
909
+ def test_date_range_ambiguous_endpoint(self, tz):
910
+ # construction with an ambiguous end-point
911
+ # GH#11626
912
+
913
+ with pytest.raises(pytz.AmbiguousTimeError, match="Cannot infer dst time"):
914
+ date_range(
915
+ "2013-10-26 23:00", "2013-10-27 01:00", tz="Europe/London", freq="h"
916
+ )
917
+
918
+ times = date_range(
919
+ "2013-10-26 23:00", "2013-10-27 01:00", freq="h", tz=tz, ambiguous="infer"
920
+ )
921
+ assert times[0] == Timestamp("2013-10-26 23:00", tz=tz)
922
+ assert times[-1] == Timestamp("2013-10-27 01:00:00+0000", tz=tz)
923
+
924
+ @pytest.mark.parametrize(
925
+ "tz, option, expected",
926
+ [
927
+ ["US/Pacific", "shift_forward", "2019-03-10 03:00"],
928
+ ["dateutil/US/Pacific", "shift_forward", "2019-03-10 03:00"],
929
+ ["US/Pacific", "shift_backward", "2019-03-10 01:00"],
930
+ ["dateutil/US/Pacific", "shift_backward", "2019-03-10 01:00"],
931
+ ["US/Pacific", timedelta(hours=1), "2019-03-10 03:00"],
932
+ ],
933
+ )
934
+ def test_date_range_nonexistent_endpoint(self, tz, option, expected):
935
+ # construction with an nonexistent end-point
936
+
937
+ with pytest.raises(pytz.NonExistentTimeError, match="2019-03-10 02:00:00"):
938
+ date_range(
939
+ "2019-03-10 00:00", "2019-03-10 02:00", tz="US/Pacific", freq="h"
940
+ )
941
+
942
+ times = date_range(
943
+ "2019-03-10 00:00", "2019-03-10 02:00", freq="h", tz=tz, nonexistent=option
944
+ )
945
+ assert times[-1] == Timestamp(expected, tz=tz)
946
+
947
+
948
+ class TestGenRangeGeneration:
949
+ @pytest.mark.parametrize(
950
+ "freqstr,offset",
951
+ [
952
+ ("B", BDay()),
953
+ ("C", CDay()),
954
+ ],
955
+ )
956
+ def test_generate(self, freqstr, offset):
957
+ rng1 = list(generate_range(START, END, periods=None, offset=offset, unit="ns"))
958
+ rng2 = list(generate_range(START, END, periods=None, offset=freqstr, unit="ns"))
959
+ assert rng1 == rng2
960
+
961
+ def test_1(self):
962
+ rng = list(
963
+ generate_range(
964
+ start=datetime(2009, 3, 25),
965
+ end=None,
966
+ periods=2,
967
+ offset=BDay(),
968
+ unit="ns",
969
+ )
970
+ )
971
+ expected = [datetime(2009, 3, 25), datetime(2009, 3, 26)]
972
+ assert rng == expected
973
+
974
+ def test_2(self):
975
+ rng = list(
976
+ generate_range(
977
+ start=datetime(2008, 1, 1),
978
+ end=datetime(2008, 1, 3),
979
+ periods=None,
980
+ offset=BDay(),
981
+ unit="ns",
982
+ )
983
+ )
984
+ expected = [datetime(2008, 1, 1), datetime(2008, 1, 2), datetime(2008, 1, 3)]
985
+ assert rng == expected
986
+
987
+ def test_3(self):
988
+ rng = list(
989
+ generate_range(
990
+ start=datetime(2008, 1, 5),
991
+ end=datetime(2008, 1, 6),
992
+ periods=None,
993
+ offset=BDay(),
994
+ unit="ns",
995
+ )
996
+ )
997
+ expected = []
998
+ assert rng == expected
999
+
1000
+ def test_precision_finer_than_offset(self):
1001
+ # GH#9907
1002
+ result1 = date_range(
1003
+ start="2015-04-15 00:00:03", end="2016-04-22 00:00:00", freq="QE"
1004
+ )
1005
+ result2 = date_range(
1006
+ start="2015-04-15 00:00:03", end="2015-06-22 00:00:04", freq="W"
1007
+ )
1008
+ expected1_list = [
1009
+ "2015-06-30 00:00:03",
1010
+ "2015-09-30 00:00:03",
1011
+ "2015-12-31 00:00:03",
1012
+ "2016-03-31 00:00:03",
1013
+ ]
1014
+ expected2_list = [
1015
+ "2015-04-19 00:00:03",
1016
+ "2015-04-26 00:00:03",
1017
+ "2015-05-03 00:00:03",
1018
+ "2015-05-10 00:00:03",
1019
+ "2015-05-17 00:00:03",
1020
+ "2015-05-24 00:00:03",
1021
+ "2015-05-31 00:00:03",
1022
+ "2015-06-07 00:00:03",
1023
+ "2015-06-14 00:00:03",
1024
+ "2015-06-21 00:00:03",
1025
+ ]
1026
+ expected1 = DatetimeIndex(
1027
+ expected1_list, dtype="datetime64[ns]", freq="QE-DEC", tz=None
1028
+ )
1029
+ expected2 = DatetimeIndex(
1030
+ expected2_list, dtype="datetime64[ns]", freq="W-SUN", tz=None
1031
+ )
1032
+ tm.assert_index_equal(result1, expected1)
1033
+ tm.assert_index_equal(result2, expected2)
1034
+
1035
+ dt1, dt2 = "2017-01-01", "2017-01-01"
1036
+ tz1, tz2 = "US/Eastern", "Europe/London"
1037
+
1038
+ @pytest.mark.parametrize(
1039
+ "start,end",
1040
+ [
1041
+ (Timestamp(dt1, tz=tz1), Timestamp(dt2)),
1042
+ (Timestamp(dt1), Timestamp(dt2, tz=tz2)),
1043
+ (Timestamp(dt1, tz=tz1), Timestamp(dt2, tz=tz2)),
1044
+ (Timestamp(dt1, tz=tz2), Timestamp(dt2, tz=tz1)),
1045
+ ],
1046
+ )
1047
+ def test_mismatching_tz_raises_err(self, start, end):
1048
+ # issue 18488
1049
+ msg = "Start and end cannot both be tz-aware with different timezones"
1050
+ with pytest.raises(TypeError, match=msg):
1051
+ date_range(start, end)
1052
+ with pytest.raises(TypeError, match=msg):
1053
+ date_range(start, end, freq=BDay())
1054
+
1055
+
1056
+ class TestBusinessDateRange:
1057
+ def test_constructor(self):
1058
+ bdate_range(START, END, freq=BDay())
1059
+ bdate_range(START, periods=20, freq=BDay())
1060
+ bdate_range(end=START, periods=20, freq=BDay())
1061
+
1062
+ msg = "periods must be a number, got B"
1063
+ with pytest.raises(TypeError, match=msg):
1064
+ date_range("2011-1-1", "2012-1-1", "B")
1065
+
1066
+ with pytest.raises(TypeError, match=msg):
1067
+ bdate_range("2011-1-1", "2012-1-1", "B")
1068
+
1069
+ msg = "freq must be specified for bdate_range; use date_range instead"
1070
+ with pytest.raises(TypeError, match=msg):
1071
+ bdate_range(START, END, periods=10, freq=None)
1072
+
1073
+ def test_misc(self):
1074
+ end = datetime(2009, 5, 13)
1075
+ dr = bdate_range(end=end, periods=20)
1076
+ firstDate = end - 19 * BDay()
1077
+
1078
+ assert len(dr) == 20
1079
+ assert dr[0] == firstDate
1080
+ assert dr[-1] == end
1081
+
1082
+ def test_date_parse_failure(self):
1083
+ badly_formed_date = "2007/100/1"
1084
+
1085
+ msg = "Unknown datetime string format, unable to parse: 2007/100/1"
1086
+ with pytest.raises(ValueError, match=msg):
1087
+ Timestamp(badly_formed_date)
1088
+
1089
+ with pytest.raises(ValueError, match=msg):
1090
+ bdate_range(start=badly_formed_date, periods=10)
1091
+
1092
+ with pytest.raises(ValueError, match=msg):
1093
+ bdate_range(end=badly_formed_date, periods=10)
1094
+
1095
+ with pytest.raises(ValueError, match=msg):
1096
+ bdate_range(badly_formed_date, badly_formed_date)
1097
+
1098
+ def test_daterange_bug_456(self):
1099
+ # GH #456
1100
+ rng1 = bdate_range("12/5/2011", "12/5/2011")
1101
+ rng2 = bdate_range("12/2/2011", "12/5/2011")
1102
+ assert rng2._data.freq == BDay()
1103
+
1104
+ result = rng1.union(rng2)
1105
+ assert isinstance(result, DatetimeIndex)
1106
+
1107
+ @pytest.mark.parametrize("inclusive", ["left", "right", "neither", "both"])
1108
+ def test_bdays_and_open_boundaries(self, inclusive):
1109
+ # GH 6673
1110
+ start = "2018-07-21" # Saturday
1111
+ end = "2018-07-29" # Sunday
1112
+ result = date_range(start, end, freq="B", inclusive=inclusive)
1113
+
1114
+ bday_start = "2018-07-23" # Monday
1115
+ bday_end = "2018-07-27" # Friday
1116
+ expected = date_range(bday_start, bday_end, freq="D")
1117
+ tm.assert_index_equal(result, expected)
1118
+ # Note: we do _not_ expect the freqs to match here
1119
+
1120
+ def test_bday_near_overflow(self):
1121
+ # GH#24252 avoid doing unnecessary addition that _would_ overflow
1122
+ start = Timestamp.max.floor("D").to_pydatetime()
1123
+ rng = date_range(start, end=None, periods=1, freq="B")
1124
+ expected = DatetimeIndex([start], freq="B").as_unit("ns")
1125
+ tm.assert_index_equal(rng, expected)
1126
+
1127
+ def test_bday_overflow_error(self):
1128
+ # GH#24252 check that we get OutOfBoundsDatetime and not OverflowError
1129
+ msg = "Out of bounds nanosecond timestamp"
1130
+ start = Timestamp.max.floor("D").to_pydatetime()
1131
+ with pytest.raises(OutOfBoundsDatetime, match=msg):
1132
+ date_range(start, periods=2, freq="B")
1133
+
1134
+
1135
+ class TestCustomDateRange:
1136
+ def test_constructor(self):
1137
+ bdate_range(START, END, freq=CDay())
1138
+ bdate_range(START, periods=20, freq=CDay())
1139
+ bdate_range(end=START, periods=20, freq=CDay())
1140
+
1141
+ msg = "periods must be a number, got C"
1142
+ with pytest.raises(TypeError, match=msg):
1143
+ date_range("2011-1-1", "2012-1-1", "C")
1144
+
1145
+ with pytest.raises(TypeError, match=msg):
1146
+ bdate_range("2011-1-1", "2012-1-1", "C")
1147
+
1148
+ def test_misc(self):
1149
+ end = datetime(2009, 5, 13)
1150
+ dr = bdate_range(end=end, periods=20, freq="C")
1151
+ firstDate = end - 19 * CDay()
1152
+
1153
+ assert len(dr) == 20
1154
+ assert dr[0] == firstDate
1155
+ assert dr[-1] == end
1156
+
1157
+ def test_daterange_bug_456(self):
1158
+ # GH #456
1159
+ rng1 = bdate_range("12/5/2011", "12/5/2011", freq="C")
1160
+ rng2 = bdate_range("12/2/2011", "12/5/2011", freq="C")
1161
+ assert rng2._data.freq == CDay()
1162
+
1163
+ result = rng1.union(rng2)
1164
+ assert isinstance(result, DatetimeIndex)
1165
+
1166
+ def test_cdaterange(self, unit):
1167
+ result = bdate_range("2013-05-01", periods=3, freq="C", unit=unit)
1168
+ expected = DatetimeIndex(
1169
+ ["2013-05-01", "2013-05-02", "2013-05-03"], dtype=f"M8[{unit}]", freq="C"
1170
+ )
1171
+ tm.assert_index_equal(result, expected)
1172
+ assert result.freq == expected.freq
1173
+
1174
+ def test_cdaterange_weekmask(self, unit):
1175
+ result = bdate_range(
1176
+ "2013-05-01", periods=3, freq="C", weekmask="Sun Mon Tue Wed Thu", unit=unit
1177
+ )
1178
+ expected = DatetimeIndex(
1179
+ ["2013-05-01", "2013-05-02", "2013-05-05"],
1180
+ dtype=f"M8[{unit}]",
1181
+ freq=result.freq,
1182
+ )
1183
+ tm.assert_index_equal(result, expected)
1184
+ assert result.freq == expected.freq
1185
+
1186
+ # raise with non-custom freq
1187
+ msg = (
1188
+ "a custom frequency string is required when holidays or "
1189
+ "weekmask are passed, got frequency B"
1190
+ )
1191
+ with pytest.raises(ValueError, match=msg):
1192
+ bdate_range("2013-05-01", periods=3, weekmask="Sun Mon Tue Wed Thu")
1193
+
1194
+ def test_cdaterange_holidays(self, unit):
1195
+ result = bdate_range(
1196
+ "2013-05-01", periods=3, freq="C", holidays=["2013-05-01"], unit=unit
1197
+ )
1198
+ expected = DatetimeIndex(
1199
+ ["2013-05-02", "2013-05-03", "2013-05-06"],
1200
+ dtype=f"M8[{unit}]",
1201
+ freq=result.freq,
1202
+ )
1203
+ tm.assert_index_equal(result, expected)
1204
+ assert result.freq == expected.freq
1205
+
1206
+ # raise with non-custom freq
1207
+ msg = (
1208
+ "a custom frequency string is required when holidays or "
1209
+ "weekmask are passed, got frequency B"
1210
+ )
1211
+ with pytest.raises(ValueError, match=msg):
1212
+ bdate_range("2013-05-01", periods=3, holidays=["2013-05-01"])
1213
+
1214
+ def test_cdaterange_weekmask_and_holidays(self, unit):
1215
+ result = bdate_range(
1216
+ "2013-05-01",
1217
+ periods=3,
1218
+ freq="C",
1219
+ weekmask="Sun Mon Tue Wed Thu",
1220
+ holidays=["2013-05-01"],
1221
+ unit=unit,
1222
+ )
1223
+ expected = DatetimeIndex(
1224
+ ["2013-05-02", "2013-05-05", "2013-05-06"],
1225
+ dtype=f"M8[{unit}]",
1226
+ freq=result.freq,
1227
+ )
1228
+ tm.assert_index_equal(result, expected)
1229
+ assert result.freq == expected.freq
1230
+
1231
+ def test_cdaterange_holidays_weekmask_requires_freqstr(self):
1232
+ # raise with non-custom freq
1233
+ msg = (
1234
+ "a custom frequency string is required when holidays or "
1235
+ "weekmask are passed, got frequency B"
1236
+ )
1237
+ with pytest.raises(ValueError, match=msg):
1238
+ bdate_range(
1239
+ "2013-05-01",
1240
+ periods=3,
1241
+ weekmask="Sun Mon Tue Wed Thu",
1242
+ holidays=["2013-05-01"],
1243
+ )
1244
+
1245
+ @pytest.mark.parametrize(
1246
+ "freq", [freq for freq in prefix_mapping if freq.startswith("C")]
1247
+ )
1248
+ def test_all_custom_freq(self, freq):
1249
+ # should not raise
1250
+ bdate_range(
1251
+ START, END, freq=freq, weekmask="Mon Wed Fri", holidays=["2009-03-14"]
1252
+ )
1253
+
1254
+ bad_freq = freq + "FOO"
1255
+ msg = f"invalid custom frequency string: {bad_freq}"
1256
+ with pytest.raises(ValueError, match=msg):
1257
+ bdate_range(START, END, freq=bad_freq)
1258
+
1259
+ @pytest.mark.parametrize(
1260
+ "start_end",
1261
+ [
1262
+ ("2018-01-01T00:00:01.000Z", "2018-01-03T00:00:01.000Z"),
1263
+ ("2018-01-01T00:00:00.010Z", "2018-01-03T00:00:00.010Z"),
1264
+ ("2001-01-01T00:00:00.010Z", "2001-01-03T00:00:00.010Z"),
1265
+ ],
1266
+ )
1267
+ def test_range_with_millisecond_resolution(self, start_end):
1268
+ # https://github.com/pandas-dev/pandas/issues/24110
1269
+ start, end = start_end
1270
+ result = date_range(start=start, end=end, periods=2, inclusive="left")
1271
+ expected = DatetimeIndex([start], dtype="M8[ns, UTC]")
1272
+ tm.assert_index_equal(result, expected)
1273
+
1274
+ @pytest.mark.parametrize(
1275
+ "start,period,expected",
1276
+ [
1277
+ ("2022-07-23 00:00:00+02:00", 1, ["2022-07-25 00:00:00+02:00"]),
1278
+ ("2022-07-22 00:00:00+02:00", 1, ["2022-07-22 00:00:00+02:00"]),
1279
+ (
1280
+ "2022-07-22 00:00:00+02:00",
1281
+ 2,
1282
+ ["2022-07-22 00:00:00+02:00", "2022-07-25 00:00:00+02:00"],
1283
+ ),
1284
+ ],
1285
+ )
1286
+ def test_range_with_timezone_and_custombusinessday(self, start, period, expected):
1287
+ # GH49441
1288
+ result = date_range(start=start, periods=period, freq="C")
1289
+ expected = DatetimeIndex(expected).as_unit("ns")
1290
+ tm.assert_index_equal(result, expected)
1291
+
1292
+
1293
+ class TestDateRangeNonNano:
1294
+ def test_date_range_reso_validation(self):
1295
+ msg = "'unit' must be one of 's', 'ms', 'us', 'ns'"
1296
+ with pytest.raises(ValueError, match=msg):
1297
+ date_range("2016-01-01", "2016-03-04", periods=3, unit="h")
1298
+
1299
+ def test_date_range_freq_higher_than_reso(self):
1300
+ # freq being higher-resolution than reso is a problem
1301
+ msg = "Use a lower freq or a higher unit instead"
1302
+ with pytest.raises(ValueError, match=msg):
1303
+ # # TODO give a more useful or informative message?
1304
+ date_range("2016-01-01", "2016-01-02", freq="ns", unit="ms")
1305
+
1306
+ def test_date_range_freq_matches_reso(self):
1307
+ # GH#49106 matching reso is OK
1308
+ dti = date_range("2016-01-01", "2016-01-01 00:00:01", freq="ms", unit="ms")
1309
+ rng = np.arange(1_451_606_400_000, 1_451_606_401_001, dtype=np.int64)
1310
+ expected = DatetimeIndex(rng.view("M8[ms]"), freq="ms")
1311
+ tm.assert_index_equal(dti, expected)
1312
+
1313
+ dti = date_range("2016-01-01", "2016-01-01 00:00:01", freq="us", unit="us")
1314
+ rng = np.arange(1_451_606_400_000_000, 1_451_606_401_000_001, dtype=np.int64)
1315
+ expected = DatetimeIndex(rng.view("M8[us]"), freq="us")
1316
+ tm.assert_index_equal(dti, expected)
1317
+
1318
+ dti = date_range("2016-01-01", "2016-01-01 00:00:00.001", freq="ns", unit="ns")
1319
+ rng = np.arange(
1320
+ 1_451_606_400_000_000_000, 1_451_606_400_001_000_001, dtype=np.int64
1321
+ )
1322
+ expected = DatetimeIndex(rng.view("M8[ns]"), freq="ns")
1323
+ tm.assert_index_equal(dti, expected)
1324
+
1325
+ def test_date_range_freq_lower_than_endpoints(self):
1326
+ start = Timestamp("2022-10-19 11:50:44.719781")
1327
+ end = Timestamp("2022-10-19 11:50:47.066458")
1328
+
1329
+ # start and end cannot be cast to "s" unit without lossy rounding,
1330
+ # so we do not allow this in date_range
1331
+ with pytest.raises(ValueError, match="Cannot losslessly convert units"):
1332
+ date_range(start, end, periods=3, unit="s")
1333
+
1334
+ # but we can losslessly cast to "us"
1335
+ dti = date_range(start, end, periods=2, unit="us")
1336
+ rng = np.array(
1337
+ [start.as_unit("us")._value, end.as_unit("us")._value], dtype=np.int64
1338
+ )
1339
+ expected = DatetimeIndex(rng.view("M8[us]"))
1340
+ tm.assert_index_equal(dti, expected)
1341
+
1342
+ def test_date_range_non_nano(self):
1343
+ start = np.datetime64("1066-10-14") # Battle of Hastings
1344
+ end = np.datetime64("2305-07-13") # Jean-Luc Picard's birthday
1345
+
1346
+ dti = date_range(start, end, freq="D", unit="s")
1347
+ assert dti.freq == "D"
1348
+ assert dti.dtype == "M8[s]"
1349
+
1350
+ exp = np.arange(
1351
+ start.astype("M8[s]").view("i8"),
1352
+ (end + 1).astype("M8[s]").view("i8"),
1353
+ 24 * 3600,
1354
+ ).view("M8[s]")
1355
+
1356
+ tm.assert_numpy_array_equal(dti.to_numpy(), exp)
1357
+
1358
+
1359
+ class TestDateRangeNonTickFreq:
1360
+ # Tests revolving around less-common (non-Tick) `freq` keywords.
1361
+
1362
+ def test_date_range_custom_business_month_begin(self, unit):
1363
+ hcal = USFederalHolidayCalendar()
1364
+ freq = offsets.CBMonthBegin(calendar=hcal)
1365
+ dti = date_range(start="20120101", end="20130101", freq=freq, unit=unit)
1366
+ assert all(freq.is_on_offset(x) for x in dti)
1367
+
1368
+ expected = DatetimeIndex(
1369
+ [
1370
+ "2012-01-03",
1371
+ "2012-02-01",
1372
+ "2012-03-01",
1373
+ "2012-04-02",
1374
+ "2012-05-01",
1375
+ "2012-06-01",
1376
+ "2012-07-02",
1377
+ "2012-08-01",
1378
+ "2012-09-04",
1379
+ "2012-10-01",
1380
+ "2012-11-01",
1381
+ "2012-12-03",
1382
+ ],
1383
+ dtype=f"M8[{unit}]",
1384
+ freq=freq,
1385
+ )
1386
+ tm.assert_index_equal(dti, expected)
1387
+
1388
+ def test_date_range_custom_business_month_end(self, unit):
1389
+ hcal = USFederalHolidayCalendar()
1390
+ freq = offsets.CBMonthEnd(calendar=hcal)
1391
+ dti = date_range(start="20120101", end="20130101", freq=freq, unit=unit)
1392
+ assert all(freq.is_on_offset(x) for x in dti)
1393
+
1394
+ expected = DatetimeIndex(
1395
+ [
1396
+ "2012-01-31",
1397
+ "2012-02-29",
1398
+ "2012-03-30",
1399
+ "2012-04-30",
1400
+ "2012-05-31",
1401
+ "2012-06-29",
1402
+ "2012-07-31",
1403
+ "2012-08-31",
1404
+ "2012-09-28",
1405
+ "2012-10-31",
1406
+ "2012-11-30",
1407
+ "2012-12-31",
1408
+ ],
1409
+ dtype=f"M8[{unit}]",
1410
+ freq=freq,
1411
+ )
1412
+ tm.assert_index_equal(dti, expected)
1413
+
1414
+ def test_date_range_with_custom_holidays(self, unit):
1415
+ # GH#30593
1416
+ freq = offsets.CustomBusinessHour(start="15:00", holidays=["2020-11-26"])
1417
+ result = date_range(start="2020-11-25 15:00", periods=4, freq=freq, unit=unit)
1418
+ expected = DatetimeIndex(
1419
+ [
1420
+ "2020-11-25 15:00:00",
1421
+ "2020-11-25 16:00:00",
1422
+ "2020-11-27 15:00:00",
1423
+ "2020-11-27 16:00:00",
1424
+ ],
1425
+ dtype=f"M8[{unit}]",
1426
+ freq=freq,
1427
+ )
1428
+ tm.assert_index_equal(result, expected)
1429
+
1430
+ def test_date_range_businesshour(self, unit):
1431
+ idx = DatetimeIndex(
1432
+ [
1433
+ "2014-07-04 09:00",
1434
+ "2014-07-04 10:00",
1435
+ "2014-07-04 11:00",
1436
+ "2014-07-04 12:00",
1437
+ "2014-07-04 13:00",
1438
+ "2014-07-04 14:00",
1439
+ "2014-07-04 15:00",
1440
+ "2014-07-04 16:00",
1441
+ ],
1442
+ dtype=f"M8[{unit}]",
1443
+ freq="bh",
1444
+ )
1445
+ rng = date_range("2014-07-04 09:00", "2014-07-04 16:00", freq="bh", unit=unit)
1446
+ tm.assert_index_equal(idx, rng)
1447
+
1448
+ idx = DatetimeIndex(
1449
+ ["2014-07-04 16:00", "2014-07-07 09:00"], dtype=f"M8[{unit}]", freq="bh"
1450
+ )
1451
+ rng = date_range("2014-07-04 16:00", "2014-07-07 09:00", freq="bh", unit=unit)
1452
+ tm.assert_index_equal(idx, rng)
1453
+
1454
+ idx = DatetimeIndex(
1455
+ [
1456
+ "2014-07-04 09:00",
1457
+ "2014-07-04 10:00",
1458
+ "2014-07-04 11:00",
1459
+ "2014-07-04 12:00",
1460
+ "2014-07-04 13:00",
1461
+ "2014-07-04 14:00",
1462
+ "2014-07-04 15:00",
1463
+ "2014-07-04 16:00",
1464
+ "2014-07-07 09:00",
1465
+ "2014-07-07 10:00",
1466
+ "2014-07-07 11:00",
1467
+ "2014-07-07 12:00",
1468
+ "2014-07-07 13:00",
1469
+ "2014-07-07 14:00",
1470
+ "2014-07-07 15:00",
1471
+ "2014-07-07 16:00",
1472
+ "2014-07-08 09:00",
1473
+ "2014-07-08 10:00",
1474
+ "2014-07-08 11:00",
1475
+ "2014-07-08 12:00",
1476
+ "2014-07-08 13:00",
1477
+ "2014-07-08 14:00",
1478
+ "2014-07-08 15:00",
1479
+ "2014-07-08 16:00",
1480
+ ],
1481
+ dtype=f"M8[{unit}]",
1482
+ freq="bh",
1483
+ )
1484
+ rng = date_range("2014-07-04 09:00", "2014-07-08 16:00", freq="bh", unit=unit)
1485
+ tm.assert_index_equal(idx, rng)
1486
+
1487
+ def test_date_range_business_hour2(self, unit):
1488
+ idx1 = date_range(
1489
+ start="2014-07-04 15:00", end="2014-07-08 10:00", freq="bh", unit=unit
1490
+ )
1491
+ idx2 = date_range(start="2014-07-04 15:00", periods=12, freq="bh", unit=unit)
1492
+ idx3 = date_range(end="2014-07-08 10:00", periods=12, freq="bh", unit=unit)
1493
+ expected = DatetimeIndex(
1494
+ [
1495
+ "2014-07-04 15:00",
1496
+ "2014-07-04 16:00",
1497
+ "2014-07-07 09:00",
1498
+ "2014-07-07 10:00",
1499
+ "2014-07-07 11:00",
1500
+ "2014-07-07 12:00",
1501
+ "2014-07-07 13:00",
1502
+ "2014-07-07 14:00",
1503
+ "2014-07-07 15:00",
1504
+ "2014-07-07 16:00",
1505
+ "2014-07-08 09:00",
1506
+ "2014-07-08 10:00",
1507
+ ],
1508
+ dtype=f"M8[{unit}]",
1509
+ freq="bh",
1510
+ )
1511
+ tm.assert_index_equal(idx1, expected)
1512
+ tm.assert_index_equal(idx2, expected)
1513
+ tm.assert_index_equal(idx3, expected)
1514
+
1515
+ idx4 = date_range(
1516
+ start="2014-07-04 15:45", end="2014-07-08 10:45", freq="bh", unit=unit
1517
+ )
1518
+ idx5 = date_range(start="2014-07-04 15:45", periods=12, freq="bh", unit=unit)
1519
+ idx6 = date_range(end="2014-07-08 10:45", periods=12, freq="bh", unit=unit)
1520
+
1521
+ expected2 = expected + Timedelta(minutes=45).as_unit(unit)
1522
+ expected2.freq = "bh"
1523
+ tm.assert_index_equal(idx4, expected2)
1524
+ tm.assert_index_equal(idx5, expected2)
1525
+ tm.assert_index_equal(idx6, expected2)
1526
+
1527
+ def test_date_range_business_hour_short(self, unit):
1528
+ # GH#49835
1529
+ idx4 = date_range(start="2014-07-01 10:00", freq="bh", periods=1, unit=unit)
1530
+ expected4 = DatetimeIndex(["2014-07-01 10:00"], dtype=f"M8[{unit}]", freq="bh")
1531
+ tm.assert_index_equal(idx4, expected4)
1532
+
1533
+ def test_date_range_year_start(self, unit):
1534
+ # see GH#9313
1535
+ rng = date_range("1/1/2013", "7/1/2017", freq="YS", unit=unit)
1536
+ exp = DatetimeIndex(
1537
+ ["2013-01-01", "2014-01-01", "2015-01-01", "2016-01-01", "2017-01-01"],
1538
+ dtype=f"M8[{unit}]",
1539
+ freq="YS",
1540
+ )
1541
+ tm.assert_index_equal(rng, exp)
1542
+
1543
+ def test_date_range_year_end(self, unit):
1544
+ # see GH#9313
1545
+ rng = date_range("1/1/2013", "7/1/2017", freq="YE", unit=unit)
1546
+ exp = DatetimeIndex(
1547
+ ["2013-12-31", "2014-12-31", "2015-12-31", "2016-12-31"],
1548
+ dtype=f"M8[{unit}]",
1549
+ freq="YE",
1550
+ )
1551
+ tm.assert_index_equal(rng, exp)
1552
+
1553
+ def test_date_range_negative_freq_year_end(self, unit):
1554
+ # GH#11018
1555
+ rng = date_range("2011-12-31", freq="-2YE", periods=3, unit=unit)
1556
+ exp = DatetimeIndex(
1557
+ ["2011-12-31", "2009-12-31", "2007-12-31"], dtype=f"M8[{unit}]", freq="-2YE"
1558
+ )
1559
+ tm.assert_index_equal(rng, exp)
1560
+ assert rng.freq == "-2YE"
1561
+
1562
+ def test_date_range_business_year_end_year(self, unit):
1563
+ # see GH#9313
1564
+ rng = date_range("1/1/2013", "7/1/2017", freq="BYE", unit=unit)
1565
+ exp = DatetimeIndex(
1566
+ ["2013-12-31", "2014-12-31", "2015-12-31", "2016-12-30"],
1567
+ dtype=f"M8[{unit}]",
1568
+ freq="BYE",
1569
+ )
1570
+ tm.assert_index_equal(rng, exp)
1571
+
1572
+ def test_date_range_bms(self, unit):
1573
+ # GH#1645
1574
+ result = date_range("1/1/2000", periods=10, freq="BMS", unit=unit)
1575
+
1576
+ expected = DatetimeIndex(
1577
+ [
1578
+ "2000-01-03",
1579
+ "2000-02-01",
1580
+ "2000-03-01",
1581
+ "2000-04-03",
1582
+ "2000-05-01",
1583
+ "2000-06-01",
1584
+ "2000-07-03",
1585
+ "2000-08-01",
1586
+ "2000-09-01",
1587
+ "2000-10-02",
1588
+ ],
1589
+ dtype=f"M8[{unit}]",
1590
+ freq="BMS",
1591
+ )
1592
+ tm.assert_index_equal(result, expected)
1593
+
1594
+ def test_date_range_semi_month_begin(self, unit):
1595
+ dates = [
1596
+ datetime(2007, 12, 15),
1597
+ datetime(2008, 1, 1),
1598
+ datetime(2008, 1, 15),
1599
+ datetime(2008, 2, 1),
1600
+ datetime(2008, 2, 15),
1601
+ datetime(2008, 3, 1),
1602
+ datetime(2008, 3, 15),
1603
+ datetime(2008, 4, 1),
1604
+ datetime(2008, 4, 15),
1605
+ datetime(2008, 5, 1),
1606
+ datetime(2008, 5, 15),
1607
+ datetime(2008, 6, 1),
1608
+ datetime(2008, 6, 15),
1609
+ datetime(2008, 7, 1),
1610
+ datetime(2008, 7, 15),
1611
+ datetime(2008, 8, 1),
1612
+ datetime(2008, 8, 15),
1613
+ datetime(2008, 9, 1),
1614
+ datetime(2008, 9, 15),
1615
+ datetime(2008, 10, 1),
1616
+ datetime(2008, 10, 15),
1617
+ datetime(2008, 11, 1),
1618
+ datetime(2008, 11, 15),
1619
+ datetime(2008, 12, 1),
1620
+ datetime(2008, 12, 15),
1621
+ ]
1622
+ # ensure generating a range with DatetimeIndex gives same result
1623
+ result = date_range(start=dates[0], end=dates[-1], freq="SMS", unit=unit)
1624
+ exp = DatetimeIndex(dates, dtype=f"M8[{unit}]", freq="SMS")
1625
+ tm.assert_index_equal(result, exp)
1626
+
1627
+ def test_date_range_semi_month_end(self, unit):
1628
+ dates = [
1629
+ datetime(2007, 12, 31),
1630
+ datetime(2008, 1, 15),
1631
+ datetime(2008, 1, 31),
1632
+ datetime(2008, 2, 15),
1633
+ datetime(2008, 2, 29),
1634
+ datetime(2008, 3, 15),
1635
+ datetime(2008, 3, 31),
1636
+ datetime(2008, 4, 15),
1637
+ datetime(2008, 4, 30),
1638
+ datetime(2008, 5, 15),
1639
+ datetime(2008, 5, 31),
1640
+ datetime(2008, 6, 15),
1641
+ datetime(2008, 6, 30),
1642
+ datetime(2008, 7, 15),
1643
+ datetime(2008, 7, 31),
1644
+ datetime(2008, 8, 15),
1645
+ datetime(2008, 8, 31),
1646
+ datetime(2008, 9, 15),
1647
+ datetime(2008, 9, 30),
1648
+ datetime(2008, 10, 15),
1649
+ datetime(2008, 10, 31),
1650
+ datetime(2008, 11, 15),
1651
+ datetime(2008, 11, 30),
1652
+ datetime(2008, 12, 15),
1653
+ datetime(2008, 12, 31),
1654
+ ]
1655
+ # ensure generating a range with DatetimeIndex gives same result
1656
+ result = date_range(start=dates[0], end=dates[-1], freq="SME", unit=unit)
1657
+ exp = DatetimeIndex(dates, dtype=f"M8[{unit}]", freq="SME")
1658
+ tm.assert_index_equal(result, exp)
1659
+
1660
+ def test_date_range_week_of_month(self, unit):
1661
+ # GH#20517
1662
+ # Note the start here is not on_offset for this freq
1663
+ result = date_range(start="20110101", periods=1, freq="WOM-1MON", unit=unit)
1664
+ expected = DatetimeIndex(["2011-01-03"], dtype=f"M8[{unit}]", freq="WOM-1MON")
1665
+ tm.assert_index_equal(result, expected)
1666
+
1667
+ result2 = date_range(start="20110101", periods=2, freq="WOM-1MON", unit=unit)
1668
+ expected2 = DatetimeIndex(
1669
+ ["2011-01-03", "2011-02-07"], dtype=f"M8[{unit}]", freq="WOM-1MON"
1670
+ )
1671
+ tm.assert_index_equal(result2, expected2)
1672
+
1673
+ def test_date_range_week_of_month2(self, unit):
1674
+ # GH#5115, GH#5348
1675
+ result = date_range("2013-1-1", periods=4, freq="WOM-1SAT", unit=unit)
1676
+ expected = DatetimeIndex(
1677
+ ["2013-01-05", "2013-02-02", "2013-03-02", "2013-04-06"],
1678
+ dtype=f"M8[{unit}]",
1679
+ freq="WOM-1SAT",
1680
+ )
1681
+ tm.assert_index_equal(result, expected)
1682
+
1683
+ def test_date_range_negative_freq_month_end(self, unit):
1684
+ # GH#11018
1685
+ rng = date_range("2011-01-31", freq="-2ME", periods=3, unit=unit)
1686
+ exp = DatetimeIndex(
1687
+ ["2011-01-31", "2010-11-30", "2010-09-30"], dtype=f"M8[{unit}]", freq="-2ME"
1688
+ )
1689
+ tm.assert_index_equal(rng, exp)
1690
+ assert rng.freq == "-2ME"
1691
+
1692
+ def test_date_range_fy5253(self, unit):
1693
+ freq = offsets.FY5253(startingMonth=1, weekday=3, variation="nearest")
1694
+ dti = date_range(
1695
+ start="2013-01-01",
1696
+ periods=2,
1697
+ freq=freq,
1698
+ unit=unit,
1699
+ )
1700
+ expected = DatetimeIndex(
1701
+ ["2013-01-31", "2014-01-30"], dtype=f"M8[{unit}]", freq=freq
1702
+ )
1703
+
1704
+ tm.assert_index_equal(dti, expected)
1705
+
1706
+ @pytest.mark.parametrize(
1707
+ "freqstr,offset",
1708
+ [
1709
+ ("QS", offsets.QuarterBegin(startingMonth=1)),
1710
+ ("BQE", offsets.BQuarterEnd(startingMonth=12)),
1711
+ ("W-SUN", offsets.Week(weekday=6)),
1712
+ ],
1713
+ )
1714
+ def test_date_range_freqstr_matches_offset(self, freqstr, offset):
1715
+ sdate = datetime(1999, 12, 25)
1716
+ edate = datetime(2000, 1, 1)
1717
+
1718
+ idx1 = date_range(start=sdate, end=edate, freq=freqstr)
1719
+ idx2 = date_range(start=sdate, end=edate, freq=offset)
1720
+ assert len(idx1) == len(idx2)
1721
+ assert idx1.freq == idx2.freq
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_datetime.py ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime as dt
2
+ from datetime import date
3
+ import re
4
+
5
+ import numpy as np
6
+ import pytest
7
+
8
+ from pandas.compat.numpy import np_long
9
+
10
+ import pandas as pd
11
+ from pandas import (
12
+ DataFrame,
13
+ DatetimeIndex,
14
+ Index,
15
+ Timestamp,
16
+ date_range,
17
+ offsets,
18
+ )
19
+ import pandas._testing as tm
20
+
21
+
22
+ class TestDatetimeIndex:
23
+ def test_is_(self):
24
+ dti = date_range(start="1/1/2005", end="12/1/2005", freq="ME")
25
+ assert dti.is_(dti)
26
+ assert dti.is_(dti.view())
27
+ assert not dti.is_(dti.copy())
28
+
29
+ def test_time_overflow_for_32bit_machines(self):
30
+ # GH8943. On some machines NumPy defaults to np.int32 (for example,
31
+ # 32-bit Linux machines). In the function _generate_regular_range
32
+ # found in tseries/index.py, `periods` gets multiplied by `strides`
33
+ # (which has value 1e9) and since the max value for np.int32 is ~2e9,
34
+ # and since those machines won't promote np.int32 to np.int64, we get
35
+ # overflow.
36
+ periods = np_long(1000)
37
+
38
+ idx1 = date_range(start="2000", periods=periods, freq="s")
39
+ assert len(idx1) == periods
40
+
41
+ idx2 = date_range(end="2000", periods=periods, freq="s")
42
+ assert len(idx2) == periods
43
+
44
+ def test_nat(self):
45
+ assert DatetimeIndex([np.nan])[0] is pd.NaT
46
+
47
+ def test_week_of_month_frequency(self):
48
+ # GH 5348: "ValueError: Could not evaluate WOM-1SUN" shouldn't raise
49
+ d1 = date(2002, 9, 1)
50
+ d2 = date(2013, 10, 27)
51
+ d3 = date(2012, 9, 30)
52
+ idx1 = DatetimeIndex([d1, d2])
53
+ idx2 = DatetimeIndex([d3])
54
+ result_append = idx1.append(idx2)
55
+ expected = DatetimeIndex([d1, d2, d3])
56
+ tm.assert_index_equal(result_append, expected)
57
+ result_union = idx1.union(idx2)
58
+ expected = DatetimeIndex([d1, d3, d2])
59
+ tm.assert_index_equal(result_union, expected)
60
+
61
+ def test_append_nondatetimeindex(self):
62
+ rng = date_range("1/1/2000", periods=10)
63
+ idx = Index(["a", "b", "c", "d"])
64
+
65
+ result = rng.append(idx)
66
+ assert isinstance(result[0], Timestamp)
67
+
68
+ def test_misc_coverage(self):
69
+ rng = date_range("1/1/2000", periods=5)
70
+ result = rng.groupby(rng.day)
71
+ assert isinstance(next(iter(result.values()))[0], Timestamp)
72
+
73
+ # TODO: belongs in frame groupby tests?
74
+ def test_groupby_function_tuple_1677(self):
75
+ df = DataFrame(
76
+ np.random.default_rng(2).random(100),
77
+ index=date_range("1/1/2000", periods=100),
78
+ )
79
+ monthly_group = df.groupby(lambda x: (x.year, x.month))
80
+
81
+ result = monthly_group.mean()
82
+ assert isinstance(result.index[0], tuple)
83
+
84
+ def assert_index_parameters(self, index):
85
+ assert index.freq == "40960ns"
86
+ assert index.inferred_freq == "40960ns"
87
+
88
+ def test_ns_index(self):
89
+ nsamples = 400
90
+ ns = int(1e9 / 24414)
91
+ dtstart = np.datetime64("2012-09-20T00:00:00")
92
+
93
+ dt = dtstart + np.arange(nsamples) * np.timedelta64(ns, "ns")
94
+ freq = ns * offsets.Nano()
95
+ index = DatetimeIndex(dt, freq=freq, name="time")
96
+ self.assert_index_parameters(index)
97
+
98
+ new_index = date_range(start=index[0], end=index[-1], freq=index.freq)
99
+ self.assert_index_parameters(new_index)
100
+
101
+ def test_asarray_tz_naive(self):
102
+ # This shouldn't produce a warning.
103
+ idx = date_range("2000", periods=2)
104
+ # M8[ns] by default
105
+ result = np.asarray(idx)
106
+
107
+ expected = np.array(["2000-01-01", "2000-01-02"], dtype="M8[ns]")
108
+ tm.assert_numpy_array_equal(result, expected)
109
+
110
+ # optionally, object
111
+ result = np.asarray(idx, dtype=object)
112
+
113
+ expected = np.array([Timestamp("2000-01-01"), Timestamp("2000-01-02")])
114
+ tm.assert_numpy_array_equal(result, expected)
115
+
116
+ def test_asarray_tz_aware(self):
117
+ tz = "US/Central"
118
+ idx = date_range("2000", periods=2, tz=tz)
119
+ expected = np.array(["2000-01-01T06", "2000-01-02T06"], dtype="M8[ns]")
120
+ result = np.asarray(idx, dtype="datetime64[ns]")
121
+
122
+ tm.assert_numpy_array_equal(result, expected)
123
+
124
+ # Old behavior with no warning
125
+ result = np.asarray(idx, dtype="M8[ns]")
126
+
127
+ tm.assert_numpy_array_equal(result, expected)
128
+
129
+ # Future behavior with no warning
130
+ expected = np.array(
131
+ [Timestamp("2000-01-01", tz=tz), Timestamp("2000-01-02", tz=tz)]
132
+ )
133
+ result = np.asarray(idx, dtype=object)
134
+
135
+ tm.assert_numpy_array_equal(result, expected)
136
+
137
+ def test_CBH_deprecated(self):
138
+ msg = "'CBH' is deprecated and will be removed in a future version."
139
+
140
+ with tm.assert_produces_warning(FutureWarning, match=msg):
141
+ expected = date_range(
142
+ dt.datetime(2022, 12, 11), dt.datetime(2022, 12, 13), freq="CBH"
143
+ )
144
+ result = DatetimeIndex(
145
+ [
146
+ "2022-12-12 09:00:00",
147
+ "2022-12-12 10:00:00",
148
+ "2022-12-12 11:00:00",
149
+ "2022-12-12 12:00:00",
150
+ "2022-12-12 13:00:00",
151
+ "2022-12-12 14:00:00",
152
+ "2022-12-12 15:00:00",
153
+ "2022-12-12 16:00:00",
154
+ ],
155
+ dtype="datetime64[ns]",
156
+ freq="cbh",
157
+ )
158
+
159
+ tm.assert_index_equal(result, expected)
160
+
161
+ @pytest.mark.parametrize(
162
+ "freq_depr, expected_values, expected_freq",
163
+ [
164
+ (
165
+ "AS-AUG",
166
+ ["2021-08-01", "2022-08-01", "2023-08-01"],
167
+ "YS-AUG",
168
+ ),
169
+ (
170
+ "1BAS-MAY",
171
+ ["2021-05-03", "2022-05-02", "2023-05-01"],
172
+ "1BYS-MAY",
173
+ ),
174
+ ],
175
+ )
176
+ def test_AS_BAS_deprecated(self, freq_depr, expected_values, expected_freq):
177
+ # GH#55479
178
+ freq_msg = re.split("[0-9]*", freq_depr, maxsplit=1)[1]
179
+ msg = f"'{freq_msg}' is deprecated and will be removed in a future version."
180
+
181
+ with tm.assert_produces_warning(FutureWarning, match=msg):
182
+ expected = date_range(
183
+ dt.datetime(2020, 12, 1), dt.datetime(2023, 12, 1), freq=freq_depr
184
+ )
185
+ result = DatetimeIndex(
186
+ expected_values,
187
+ dtype="datetime64[ns]",
188
+ freq=expected_freq,
189
+ )
190
+
191
+ tm.assert_index_equal(result, expected)
192
+
193
+ @pytest.mark.parametrize(
194
+ "freq, expected_values, freq_depr",
195
+ [
196
+ ("2BYE-MAR", ["2016-03-31"], "2BA-MAR"),
197
+ ("2BYE-JUN", ["2016-06-30"], "2BY-JUN"),
198
+ ("2BME", ["2016-02-29", "2016-04-29", "2016-06-30"], "2BM"),
199
+ ("2BQE", ["2016-03-31"], "2BQ"),
200
+ ("1BQE-MAR", ["2016-03-31", "2016-06-30"], "1BQ-MAR"),
201
+ ],
202
+ )
203
+ def test_BM_BQ_BY_deprecated(self, freq, expected_values, freq_depr):
204
+ # GH#52064
205
+ msg = f"'{freq_depr[1:]}' is deprecated and will be removed "
206
+ f"in a future version, please use '{freq[1:]}' instead."
207
+
208
+ with tm.assert_produces_warning(FutureWarning, match=msg):
209
+ expected = date_range(start="2016-02-21", end="2016-08-21", freq=freq_depr)
210
+ result = DatetimeIndex(
211
+ data=expected_values,
212
+ dtype="datetime64[ns]",
213
+ freq=freq,
214
+ )
215
+
216
+ tm.assert_index_equal(result, expected)
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_formats.py ADDED
@@ -0,0 +1,356 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime
2
+
3
+ import dateutil.tz
4
+ import numpy as np
5
+ import pytest
6
+ import pytz
7
+
8
+ import pandas as pd
9
+ from pandas import (
10
+ DatetimeIndex,
11
+ NaT,
12
+ Series,
13
+ )
14
+ import pandas._testing as tm
15
+
16
+
17
+ @pytest.fixture(params=["s", "ms", "us", "ns"])
18
+ def unit(request):
19
+ return request.param
20
+
21
+
22
+ def test_get_values_for_csv():
23
+ index = pd.date_range(freq="1D", periods=3, start="2017-01-01")
24
+
25
+ # First, with no arguments.
26
+ expected = np.array(["2017-01-01", "2017-01-02", "2017-01-03"], dtype=object)
27
+
28
+ result = index._get_values_for_csv()
29
+ tm.assert_numpy_array_equal(result, expected)
30
+
31
+ # No NaN values, so na_rep has no effect
32
+ result = index._get_values_for_csv(na_rep="pandas")
33
+ tm.assert_numpy_array_equal(result, expected)
34
+
35
+ # Make sure date formatting works
36
+ expected = np.array(["01-2017-01", "01-2017-02", "01-2017-03"], dtype=object)
37
+
38
+ result = index._get_values_for_csv(date_format="%m-%Y-%d")
39
+ tm.assert_numpy_array_equal(result, expected)
40
+
41
+ # NULL object handling should work
42
+ index = DatetimeIndex(["2017-01-01", NaT, "2017-01-03"])
43
+ expected = np.array(["2017-01-01", "NaT", "2017-01-03"], dtype=object)
44
+
45
+ result = index._get_values_for_csv(na_rep="NaT")
46
+ tm.assert_numpy_array_equal(result, expected)
47
+
48
+ expected = np.array(["2017-01-01", "pandas", "2017-01-03"], dtype=object)
49
+
50
+ result = index._get_values_for_csv(na_rep="pandas")
51
+ tm.assert_numpy_array_equal(result, expected)
52
+
53
+ result = index._get_values_for_csv(na_rep="NaT", date_format="%Y-%m-%d %H:%M:%S.%f")
54
+ expected = np.array(
55
+ ["2017-01-01 00:00:00.000000", "NaT", "2017-01-03 00:00:00.000000"],
56
+ dtype=object,
57
+ )
58
+ tm.assert_numpy_array_equal(result, expected)
59
+
60
+ # invalid format
61
+ result = index._get_values_for_csv(na_rep="NaT", date_format="foo")
62
+ expected = np.array(["foo", "NaT", "foo"], dtype=object)
63
+ tm.assert_numpy_array_equal(result, expected)
64
+
65
+
66
+ class TestDatetimeIndexRendering:
67
+ @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
68
+ def test_dti_with_timezone_repr(self, tzstr):
69
+ rng = pd.date_range("4/13/2010", "5/6/2010")
70
+
71
+ rng_eastern = rng.tz_localize(tzstr)
72
+
73
+ rng_repr = repr(rng_eastern)
74
+ assert "2010-04-13 00:00:00" in rng_repr
75
+
76
+ def test_dti_repr_dates(self):
77
+ text = str(pd.to_datetime([datetime(2013, 1, 1), datetime(2014, 1, 1)]))
78
+ assert "['2013-01-01'," in text
79
+ assert ", '2014-01-01']" in text
80
+
81
+ def test_dti_repr_mixed(self):
82
+ text = str(
83
+ pd.to_datetime(
84
+ [datetime(2013, 1, 1), datetime(2014, 1, 1, 12), datetime(2014, 1, 1)]
85
+ )
86
+ )
87
+ assert "'2013-01-01 00:00:00'," in text
88
+ assert "'2014-01-01 00:00:00']" in text
89
+
90
+ def test_dti_repr_short(self):
91
+ dr = pd.date_range(start="1/1/2012", periods=1)
92
+ repr(dr)
93
+
94
+ dr = pd.date_range(start="1/1/2012", periods=2)
95
+ repr(dr)
96
+
97
+ dr = pd.date_range(start="1/1/2012", periods=3)
98
+ repr(dr)
99
+
100
+ @pytest.mark.parametrize(
101
+ "dates, freq, expected_repr",
102
+ [
103
+ (
104
+ ["2012-01-01 00:00:00"],
105
+ "60min",
106
+ (
107
+ "DatetimeIndex(['2012-01-01 00:00:00'], "
108
+ "dtype='datetime64[ns]', freq='60min')"
109
+ ),
110
+ ),
111
+ (
112
+ ["2012-01-01 00:00:00", "2012-01-01 01:00:00"],
113
+ "60min",
114
+ "DatetimeIndex(['2012-01-01 00:00:00', '2012-01-01 01:00:00'], "
115
+ "dtype='datetime64[ns]', freq='60min')",
116
+ ),
117
+ (
118
+ ["2012-01-01"],
119
+ "24h",
120
+ "DatetimeIndex(['2012-01-01'], dtype='datetime64[ns]', freq='24h')",
121
+ ),
122
+ ],
123
+ )
124
+ def test_dti_repr_time_midnight(self, dates, freq, expected_repr, unit):
125
+ # GH53634
126
+ dti = DatetimeIndex(dates, freq).as_unit(unit)
127
+ actual_repr = repr(dti)
128
+ assert actual_repr == expected_repr.replace("[ns]", f"[{unit}]")
129
+
130
+ def test_dti_representation(self, unit):
131
+ idxs = []
132
+ idxs.append(DatetimeIndex([], freq="D"))
133
+ idxs.append(DatetimeIndex(["2011-01-01"], freq="D"))
134
+ idxs.append(DatetimeIndex(["2011-01-01", "2011-01-02"], freq="D"))
135
+ idxs.append(DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D"))
136
+ idxs.append(
137
+ DatetimeIndex(
138
+ ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
139
+ freq="h",
140
+ tz="Asia/Tokyo",
141
+ )
142
+ )
143
+ idxs.append(
144
+ DatetimeIndex(
145
+ ["2011-01-01 09:00", "2011-01-01 10:00", NaT], tz="US/Eastern"
146
+ )
147
+ )
148
+ idxs.append(
149
+ DatetimeIndex(["2011-01-01 09:00", "2011-01-01 10:00", NaT], tz="UTC")
150
+ )
151
+
152
+ exp = []
153
+ exp.append("DatetimeIndex([], dtype='datetime64[ns]', freq='D')")
154
+ exp.append("DatetimeIndex(['2011-01-01'], dtype='datetime64[ns]', freq='D')")
155
+ exp.append(
156
+ "DatetimeIndex(['2011-01-01', '2011-01-02'], "
157
+ "dtype='datetime64[ns]', freq='D')"
158
+ )
159
+ exp.append(
160
+ "DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], "
161
+ "dtype='datetime64[ns]', freq='D')"
162
+ )
163
+ exp.append(
164
+ "DatetimeIndex(['2011-01-01 09:00:00+09:00', "
165
+ "'2011-01-01 10:00:00+09:00', '2011-01-01 11:00:00+09:00']"
166
+ ", dtype='datetime64[ns, Asia/Tokyo]', freq='h')"
167
+ )
168
+ exp.append(
169
+ "DatetimeIndex(['2011-01-01 09:00:00-05:00', "
170
+ "'2011-01-01 10:00:00-05:00', 'NaT'], "
171
+ "dtype='datetime64[ns, US/Eastern]', freq=None)"
172
+ )
173
+ exp.append(
174
+ "DatetimeIndex(['2011-01-01 09:00:00+00:00', "
175
+ "'2011-01-01 10:00:00+00:00', 'NaT'], "
176
+ "dtype='datetime64[ns, UTC]', freq=None)"
177
+ ""
178
+ )
179
+
180
+ with pd.option_context("display.width", 300):
181
+ for index, expected in zip(idxs, exp):
182
+ index = index.as_unit(unit)
183
+ expected = expected.replace("[ns", f"[{unit}")
184
+ result = repr(index)
185
+ assert result == expected
186
+ result = str(index)
187
+ assert result == expected
188
+
189
+ # TODO: this is a Series.__repr__ test
190
+ def test_dti_representation_to_series(self, unit):
191
+ idx1 = DatetimeIndex([], freq="D")
192
+ idx2 = DatetimeIndex(["2011-01-01"], freq="D")
193
+ idx3 = DatetimeIndex(["2011-01-01", "2011-01-02"], freq="D")
194
+ idx4 = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D")
195
+ idx5 = DatetimeIndex(
196
+ ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
197
+ freq="h",
198
+ tz="Asia/Tokyo",
199
+ )
200
+ idx6 = DatetimeIndex(
201
+ ["2011-01-01 09:00", "2011-01-01 10:00", NaT], tz="US/Eastern"
202
+ )
203
+ idx7 = DatetimeIndex(["2011-01-01 09:00", "2011-01-02 10:15"])
204
+
205
+ exp1 = """Series([], dtype: datetime64[ns])"""
206
+
207
+ exp2 = "0 2011-01-01\ndtype: datetime64[ns]"
208
+
209
+ exp3 = "0 2011-01-01\n1 2011-01-02\ndtype: datetime64[ns]"
210
+
211
+ exp4 = (
212
+ "0 2011-01-01\n"
213
+ "1 2011-01-02\n"
214
+ "2 2011-01-03\n"
215
+ "dtype: datetime64[ns]"
216
+ )
217
+
218
+ exp5 = (
219
+ "0 2011-01-01 09:00:00+09:00\n"
220
+ "1 2011-01-01 10:00:00+09:00\n"
221
+ "2 2011-01-01 11:00:00+09:00\n"
222
+ "dtype: datetime64[ns, Asia/Tokyo]"
223
+ )
224
+
225
+ exp6 = (
226
+ "0 2011-01-01 09:00:00-05:00\n"
227
+ "1 2011-01-01 10:00:00-05:00\n"
228
+ "2 NaT\n"
229
+ "dtype: datetime64[ns, US/Eastern]"
230
+ )
231
+
232
+ exp7 = (
233
+ "0 2011-01-01 09:00:00\n"
234
+ "1 2011-01-02 10:15:00\n"
235
+ "dtype: datetime64[ns]"
236
+ )
237
+
238
+ with pd.option_context("display.width", 300):
239
+ for idx, expected in zip(
240
+ [idx1, idx2, idx3, idx4, idx5, idx6, idx7],
241
+ [exp1, exp2, exp3, exp4, exp5, exp6, exp7],
242
+ ):
243
+ ser = Series(idx.as_unit(unit))
244
+ result = repr(ser)
245
+ assert result == expected.replace("[ns", f"[{unit}")
246
+
247
+ def test_dti_summary(self):
248
+ # GH#9116
249
+ idx1 = DatetimeIndex([], freq="D")
250
+ idx2 = DatetimeIndex(["2011-01-01"], freq="D")
251
+ idx3 = DatetimeIndex(["2011-01-01", "2011-01-02"], freq="D")
252
+ idx4 = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D")
253
+ idx5 = DatetimeIndex(
254
+ ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
255
+ freq="h",
256
+ tz="Asia/Tokyo",
257
+ )
258
+ idx6 = DatetimeIndex(
259
+ ["2011-01-01 09:00", "2011-01-01 10:00", NaT], tz="US/Eastern"
260
+ )
261
+
262
+ exp1 = "DatetimeIndex: 0 entries\nFreq: D"
263
+
264
+ exp2 = "DatetimeIndex: 1 entries, 2011-01-01 to 2011-01-01\nFreq: D"
265
+
266
+ exp3 = "DatetimeIndex: 2 entries, 2011-01-01 to 2011-01-02\nFreq: D"
267
+
268
+ exp4 = "DatetimeIndex: 3 entries, 2011-01-01 to 2011-01-03\nFreq: D"
269
+
270
+ exp5 = (
271
+ "DatetimeIndex: 3 entries, 2011-01-01 09:00:00+09:00 "
272
+ "to 2011-01-01 11:00:00+09:00\n"
273
+ "Freq: h"
274
+ )
275
+
276
+ exp6 = """DatetimeIndex: 3 entries, 2011-01-01 09:00:00-05:00 to NaT"""
277
+
278
+ for idx, expected in zip(
279
+ [idx1, idx2, idx3, idx4, idx5, idx6], [exp1, exp2, exp3, exp4, exp5, exp6]
280
+ ):
281
+ result = idx._summary()
282
+ assert result == expected
283
+
284
+ @pytest.mark.parametrize("tz", [None, pytz.utc, dateutil.tz.tzutc()])
285
+ @pytest.mark.parametrize("freq", ["B", "C"])
286
+ def test_dti_business_repr_etc_smoke(self, tz, freq):
287
+ # only really care that it works
288
+ dti = pd.bdate_range(
289
+ datetime(2009, 1, 1), datetime(2010, 1, 1), tz=tz, freq=freq
290
+ )
291
+ repr(dti)
292
+ dti._summary()
293
+ dti[2:2]._summary()
294
+
295
+
296
+ class TestFormat:
297
+ def test_format(self):
298
+ # GH#35439
299
+ idx = pd.date_range("20130101", periods=5)
300
+ expected = [f"{x:%Y-%m-%d}" for x in idx]
301
+ msg = r"DatetimeIndex\.format is deprecated"
302
+ with tm.assert_produces_warning(FutureWarning, match=msg):
303
+ assert idx.format() == expected
304
+
305
+ def test_format_with_name_time_info(self):
306
+ # bug I fixed 12/20/2011
307
+ dates = pd.date_range("2011-01-01 04:00:00", periods=10, name="something")
308
+
309
+ msg = "DatetimeIndex.format is deprecated"
310
+ with tm.assert_produces_warning(FutureWarning, match=msg):
311
+ formatted = dates.format(name=True)
312
+ assert formatted[0] == "something"
313
+
314
+ def test_format_datetime_with_time(self):
315
+ dti = DatetimeIndex([datetime(2012, 2, 7), datetime(2012, 2, 7, 23)])
316
+
317
+ msg = "DatetimeIndex.format is deprecated"
318
+ with tm.assert_produces_warning(FutureWarning, match=msg):
319
+ result = dti.format()
320
+ expected = ["2012-02-07 00:00:00", "2012-02-07 23:00:00"]
321
+ assert len(result) == 2
322
+ assert result == expected
323
+
324
+ def test_format_datetime(self):
325
+ msg = "DatetimeIndex.format is deprecated"
326
+ with tm.assert_produces_warning(FutureWarning, match=msg):
327
+ formatted = pd.to_datetime([datetime(2003, 1, 1, 12), NaT]).format()
328
+ assert formatted[0] == "2003-01-01 12:00:00"
329
+ assert formatted[1] == "NaT"
330
+
331
+ def test_format_date(self):
332
+ msg = "DatetimeIndex.format is deprecated"
333
+ with tm.assert_produces_warning(FutureWarning, match=msg):
334
+ formatted = pd.to_datetime([datetime(2003, 1, 1), NaT]).format()
335
+ assert formatted[0] == "2003-01-01"
336
+ assert formatted[1] == "NaT"
337
+
338
+ def test_format_date_tz(self):
339
+ dti = pd.to_datetime([datetime(2013, 1, 1)], utc=True)
340
+ msg = "DatetimeIndex.format is deprecated"
341
+ with tm.assert_produces_warning(FutureWarning, match=msg):
342
+ formatted = dti.format()
343
+ assert formatted[0] == "2013-01-01 00:00:00+00:00"
344
+
345
+ dti = pd.to_datetime([datetime(2013, 1, 1), NaT], utc=True)
346
+ with tm.assert_produces_warning(FutureWarning, match=msg):
347
+ formatted = dti.format()
348
+ assert formatted[0] == "2013-01-01 00:00:00+00:00"
349
+
350
+ def test_format_date_explicit_date_format(self):
351
+ dti = pd.to_datetime([datetime(2003, 2, 1), NaT])
352
+ msg = "DatetimeIndex.format is deprecated"
353
+ with tm.assert_produces_warning(FutureWarning, match=msg):
354
+ formatted = dti.format(date_format="%m-%d-%Y", na_rep="UT")
355
+ assert formatted[0] == "02-01-2003"
356
+ assert formatted[1] == "UT"
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_freq_attr.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+
3
+ from pandas import (
4
+ DatetimeIndex,
5
+ date_range,
6
+ )
7
+
8
+ from pandas.tseries.offsets import (
9
+ BDay,
10
+ DateOffset,
11
+ Day,
12
+ Hour,
13
+ )
14
+
15
+
16
+ class TestFreq:
17
+ def test_freq_setter_errors(self):
18
+ # GH#20678
19
+ idx = DatetimeIndex(["20180101", "20180103", "20180105"])
20
+
21
+ # setting with an incompatible freq
22
+ msg = (
23
+ "Inferred frequency 2D from passed values does not conform to "
24
+ "passed frequency 5D"
25
+ )
26
+ with pytest.raises(ValueError, match=msg):
27
+ idx._data.freq = "5D"
28
+
29
+ # setting with non-freq string
30
+ with pytest.raises(ValueError, match="Invalid frequency"):
31
+ idx._data.freq = "foo"
32
+
33
+ @pytest.mark.parametrize("values", [["20180101", "20180103", "20180105"], []])
34
+ @pytest.mark.parametrize("freq", ["2D", Day(2), "2B", BDay(2), "48h", Hour(48)])
35
+ @pytest.mark.parametrize("tz", [None, "US/Eastern"])
36
+ def test_freq_setter(self, values, freq, tz):
37
+ # GH#20678
38
+ idx = DatetimeIndex(values, tz=tz)
39
+
40
+ # can set to an offset, converting from string if necessary
41
+ idx._data.freq = freq
42
+ assert idx.freq == freq
43
+ assert isinstance(idx.freq, DateOffset)
44
+
45
+ # can reset to None
46
+ idx._data.freq = None
47
+ assert idx.freq is None
48
+
49
+ def test_freq_view_safe(self):
50
+ # Setting the freq for one DatetimeIndex shouldn't alter the freq
51
+ # for another that views the same data
52
+
53
+ dti = date_range("2016-01-01", periods=5)
54
+ dta = dti._data
55
+
56
+ dti2 = DatetimeIndex(dta)._with_freq(None)
57
+ assert dti2.freq is None
58
+
59
+ # Original was not altered
60
+ assert dti.freq == "D"
61
+ assert dta.freq == "D"
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_indexing.py ADDED
@@ -0,0 +1,717 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import (
2
+ date,
3
+ datetime,
4
+ time,
5
+ timedelta,
6
+ )
7
+
8
+ import numpy as np
9
+ import pytest
10
+
11
+ from pandas._libs import index as libindex
12
+ from pandas.compat.numpy import np_long
13
+
14
+ import pandas as pd
15
+ from pandas import (
16
+ DatetimeIndex,
17
+ Index,
18
+ Timestamp,
19
+ bdate_range,
20
+ date_range,
21
+ notna,
22
+ )
23
+ import pandas._testing as tm
24
+
25
+ from pandas.tseries.frequencies import to_offset
26
+
27
+ START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
28
+
29
+
30
+ class TestGetItem:
31
+ def test_getitem_slice_keeps_name(self):
32
+ # GH4226
33
+ st = Timestamp("2013-07-01 00:00:00", tz="America/Los_Angeles")
34
+ et = Timestamp("2013-07-02 00:00:00", tz="America/Los_Angeles")
35
+ dr = date_range(st, et, freq="h", name="timebucket")
36
+ assert dr[1:].name == dr.name
37
+
38
+ @pytest.mark.parametrize("tz", [None, "Asia/Tokyo"])
39
+ def test_getitem(self, tz):
40
+ idx = date_range("2011-01-01", "2011-01-31", freq="D", tz=tz, name="idx")
41
+
42
+ result = idx[0]
43
+ assert result == Timestamp("2011-01-01", tz=idx.tz)
44
+
45
+ result = idx[0:5]
46
+ expected = date_range(
47
+ "2011-01-01", "2011-01-05", freq="D", tz=idx.tz, name="idx"
48
+ )
49
+ tm.assert_index_equal(result, expected)
50
+ assert result.freq == expected.freq
51
+
52
+ result = idx[0:10:2]
53
+ expected = date_range(
54
+ "2011-01-01", "2011-01-09", freq="2D", tz=idx.tz, name="idx"
55
+ )
56
+ tm.assert_index_equal(result, expected)
57
+ assert result.freq == expected.freq
58
+
59
+ result = idx[-20:-5:3]
60
+ expected = date_range(
61
+ "2011-01-12", "2011-01-24", freq="3D", tz=idx.tz, name="idx"
62
+ )
63
+ tm.assert_index_equal(result, expected)
64
+ assert result.freq == expected.freq
65
+
66
+ result = idx[4::-1]
67
+ expected = DatetimeIndex(
68
+ ["2011-01-05", "2011-01-04", "2011-01-03", "2011-01-02", "2011-01-01"],
69
+ dtype=idx.dtype,
70
+ freq="-1D",
71
+ name="idx",
72
+ )
73
+ tm.assert_index_equal(result, expected)
74
+ assert result.freq == expected.freq
75
+
76
+ @pytest.mark.parametrize("freq", ["B", "C"])
77
+ def test_dti_business_getitem(self, freq):
78
+ rng = bdate_range(START, END, freq=freq)
79
+ smaller = rng[:5]
80
+ exp = DatetimeIndex(rng.view(np.ndarray)[:5], freq=freq)
81
+ tm.assert_index_equal(smaller, exp)
82
+ assert smaller.freq == exp.freq
83
+ assert smaller.freq == rng.freq
84
+
85
+ sliced = rng[::5]
86
+ assert sliced.freq == to_offset(freq) * 5
87
+
88
+ fancy_indexed = rng[[4, 3, 2, 1, 0]]
89
+ assert len(fancy_indexed) == 5
90
+ assert isinstance(fancy_indexed, DatetimeIndex)
91
+ assert fancy_indexed.freq is None
92
+
93
+ # 32-bit vs. 64-bit platforms
94
+ assert rng[4] == rng[np_long(4)]
95
+
96
+ @pytest.mark.parametrize("freq", ["B", "C"])
97
+ def test_dti_business_getitem_matplotlib_hackaround(self, freq):
98
+ rng = bdate_range(START, END, freq=freq)
99
+ with pytest.raises(ValueError, match="Multi-dimensional indexing"):
100
+ # GH#30588 multi-dimensional indexing deprecated
101
+ rng[:, None]
102
+
103
+ def test_getitem_int_list(self):
104
+ dti = date_range(start="1/1/2005", end="12/1/2005", freq="ME")
105
+ dti2 = dti[[1, 3, 5]]
106
+
107
+ v1 = dti2[0]
108
+ v2 = dti2[1]
109
+ v3 = dti2[2]
110
+
111
+ assert v1 == Timestamp("2/28/2005")
112
+ assert v2 == Timestamp("4/30/2005")
113
+ assert v3 == Timestamp("6/30/2005")
114
+
115
+ # getitem with non-slice drops freq
116
+ assert dti2.freq is None
117
+
118
+
119
+ class TestWhere:
120
+ def test_where_doesnt_retain_freq(self):
121
+ dti = date_range("20130101", periods=3, freq="D", name="idx")
122
+ cond = [True, True, False]
123
+ expected = DatetimeIndex([dti[0], dti[1], dti[0]], freq=None, name="idx")
124
+
125
+ result = dti.where(cond, dti[::-1])
126
+ tm.assert_index_equal(result, expected)
127
+
128
+ def test_where_other(self):
129
+ # other is ndarray or Index
130
+ i = date_range("20130101", periods=3, tz="US/Eastern")
131
+
132
+ for arr in [np.nan, pd.NaT]:
133
+ result = i.where(notna(i), other=arr)
134
+ expected = i
135
+ tm.assert_index_equal(result, expected)
136
+
137
+ i2 = i.copy()
138
+ i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist())
139
+ result = i.where(notna(i2), i2)
140
+ tm.assert_index_equal(result, i2)
141
+
142
+ i2 = i.copy()
143
+ i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist())
144
+ result = i.where(notna(i2), i2._values)
145
+ tm.assert_index_equal(result, i2)
146
+
147
+ def test_where_invalid_dtypes(self):
148
+ dti = date_range("20130101", periods=3, tz="US/Eastern")
149
+
150
+ tail = dti[2:].tolist()
151
+ i2 = Index([pd.NaT, pd.NaT] + tail)
152
+
153
+ mask = notna(i2)
154
+
155
+ # passing tz-naive ndarray to tzaware DTI
156
+ result = dti.where(mask, i2.values)
157
+ expected = Index([pd.NaT.asm8, pd.NaT.asm8] + tail, dtype=object)
158
+ tm.assert_index_equal(result, expected)
159
+
160
+ # passing tz-aware DTI to tznaive DTI
161
+ naive = dti.tz_localize(None)
162
+ result = naive.where(mask, i2)
163
+ expected = Index([i2[0], i2[1]] + naive[2:].tolist(), dtype=object)
164
+ tm.assert_index_equal(result, expected)
165
+
166
+ pi = i2.tz_localize(None).to_period("D")
167
+ result = dti.where(mask, pi)
168
+ expected = Index([pi[0], pi[1]] + tail, dtype=object)
169
+ tm.assert_index_equal(result, expected)
170
+
171
+ tda = i2.asi8.view("timedelta64[ns]")
172
+ result = dti.where(mask, tda)
173
+ expected = Index([tda[0], tda[1]] + tail, dtype=object)
174
+ assert isinstance(expected[0], np.timedelta64)
175
+ tm.assert_index_equal(result, expected)
176
+
177
+ result = dti.where(mask, i2.asi8)
178
+ expected = Index([pd.NaT._value, pd.NaT._value] + tail, dtype=object)
179
+ assert isinstance(expected[0], int)
180
+ tm.assert_index_equal(result, expected)
181
+
182
+ # non-matching scalar
183
+ td = pd.Timedelta(days=4)
184
+ result = dti.where(mask, td)
185
+ expected = Index([td, td] + tail, dtype=object)
186
+ assert expected[0] is td
187
+ tm.assert_index_equal(result, expected)
188
+
189
+ def test_where_mismatched_nat(self, tz_aware_fixture):
190
+ tz = tz_aware_fixture
191
+ dti = date_range("2013-01-01", periods=3, tz=tz)
192
+ cond = np.array([True, False, True])
193
+
194
+ tdnat = np.timedelta64("NaT", "ns")
195
+ expected = Index([dti[0], tdnat, dti[2]], dtype=object)
196
+ assert expected[1] is tdnat
197
+
198
+ result = dti.where(cond, tdnat)
199
+ tm.assert_index_equal(result, expected)
200
+
201
+ def test_where_tz(self):
202
+ i = date_range("20130101", periods=3, tz="US/Eastern")
203
+ result = i.where(notna(i))
204
+ expected = i
205
+ tm.assert_index_equal(result, expected)
206
+
207
+ i2 = i.copy()
208
+ i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist())
209
+ result = i.where(notna(i2))
210
+ expected = i2
211
+ tm.assert_index_equal(result, expected)
212
+
213
+
214
+ class TestTake:
215
+ @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
216
+ def test_dti_take_dont_lose_meta(self, tzstr):
217
+ rng = date_range("1/1/2000", periods=20, tz=tzstr)
218
+
219
+ result = rng.take(range(5))
220
+ assert result.tz == rng.tz
221
+ assert result.freq == rng.freq
222
+
223
+ def test_take_nan_first_datetime(self):
224
+ index = DatetimeIndex([pd.NaT, Timestamp("20130101"), Timestamp("20130102")])
225
+ result = index.take([-1, 0, 1])
226
+ expected = DatetimeIndex([index[-1], index[0], index[1]])
227
+ tm.assert_index_equal(result, expected)
228
+
229
+ @pytest.mark.parametrize("tz", [None, "Asia/Tokyo"])
230
+ def test_take(self, tz):
231
+ # GH#10295
232
+ idx = date_range("2011-01-01", "2011-01-31", freq="D", name="idx", tz=tz)
233
+
234
+ result = idx.take([0])
235
+ assert result == Timestamp("2011-01-01", tz=idx.tz)
236
+
237
+ result = idx.take([0, 1, 2])
238
+ expected = date_range(
239
+ "2011-01-01", "2011-01-03", freq="D", tz=idx.tz, name="idx"
240
+ )
241
+ tm.assert_index_equal(result, expected)
242
+ assert result.freq == expected.freq
243
+
244
+ result = idx.take([0, 2, 4])
245
+ expected = date_range(
246
+ "2011-01-01", "2011-01-05", freq="2D", tz=idx.tz, name="idx"
247
+ )
248
+ tm.assert_index_equal(result, expected)
249
+ assert result.freq == expected.freq
250
+
251
+ result = idx.take([7, 4, 1])
252
+ expected = date_range(
253
+ "2011-01-08", "2011-01-02", freq="-3D", tz=idx.tz, name="idx"
254
+ )
255
+ tm.assert_index_equal(result, expected)
256
+ assert result.freq == expected.freq
257
+
258
+ result = idx.take([3, 2, 5])
259
+ expected = DatetimeIndex(
260
+ ["2011-01-04", "2011-01-03", "2011-01-06"],
261
+ dtype=idx.dtype,
262
+ freq=None,
263
+ name="idx",
264
+ )
265
+ tm.assert_index_equal(result, expected)
266
+ assert result.freq is None
267
+
268
+ result = idx.take([-3, 2, 5])
269
+ expected = DatetimeIndex(
270
+ ["2011-01-29", "2011-01-03", "2011-01-06"],
271
+ dtype=idx.dtype,
272
+ freq=None,
273
+ name="idx",
274
+ )
275
+ tm.assert_index_equal(result, expected)
276
+ assert result.freq is None
277
+
278
+ def test_take_invalid_kwargs(self):
279
+ idx = date_range("2011-01-01", "2011-01-31", freq="D", name="idx")
280
+ indices = [1, 6, 5, 9, 10, 13, 15, 3]
281
+
282
+ msg = r"take\(\) got an unexpected keyword argument 'foo'"
283
+ with pytest.raises(TypeError, match=msg):
284
+ idx.take(indices, foo=2)
285
+
286
+ msg = "the 'out' parameter is not supported"
287
+ with pytest.raises(ValueError, match=msg):
288
+ idx.take(indices, out=indices)
289
+
290
+ msg = "the 'mode' parameter is not supported"
291
+ with pytest.raises(ValueError, match=msg):
292
+ idx.take(indices, mode="clip")
293
+
294
+ # TODO: This method came from test_datetime; de-dup with version above
295
+ @pytest.mark.parametrize("tz", [None, "US/Eastern", "Asia/Tokyo"])
296
+ def test_take2(self, tz):
297
+ dates = [
298
+ datetime(2010, 1, 1, 14),
299
+ datetime(2010, 1, 1, 15),
300
+ datetime(2010, 1, 1, 17),
301
+ datetime(2010, 1, 1, 21),
302
+ ]
303
+
304
+ idx = date_range(
305
+ start="2010-01-01 09:00",
306
+ end="2010-02-01 09:00",
307
+ freq="h",
308
+ tz=tz,
309
+ name="idx",
310
+ )
311
+ expected = DatetimeIndex(dates, freq=None, name="idx", dtype=idx.dtype)
312
+
313
+ taken1 = idx.take([5, 6, 8, 12])
314
+ taken2 = idx[[5, 6, 8, 12]]
315
+
316
+ for taken in [taken1, taken2]:
317
+ tm.assert_index_equal(taken, expected)
318
+ assert isinstance(taken, DatetimeIndex)
319
+ assert taken.freq is None
320
+ assert taken.tz == expected.tz
321
+ assert taken.name == expected.name
322
+
323
+ def test_take_fill_value(self):
324
+ # GH#12631
325
+ idx = DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx")
326
+ result = idx.take(np.array([1, 0, -1]))
327
+ expected = DatetimeIndex(["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx")
328
+ tm.assert_index_equal(result, expected)
329
+
330
+ # fill_value
331
+ result = idx.take(np.array([1, 0, -1]), fill_value=True)
332
+ expected = DatetimeIndex(["2011-02-01", "2011-01-01", "NaT"], name="xxx")
333
+ tm.assert_index_equal(result, expected)
334
+
335
+ # allow_fill=False
336
+ result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
337
+ expected = DatetimeIndex(["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx")
338
+ tm.assert_index_equal(result, expected)
339
+
340
+ msg = (
341
+ "When allow_fill=True and fill_value is not None, "
342
+ "all indices must be >= -1"
343
+ )
344
+ with pytest.raises(ValueError, match=msg):
345
+ idx.take(np.array([1, 0, -2]), fill_value=True)
346
+ with pytest.raises(ValueError, match=msg):
347
+ idx.take(np.array([1, 0, -5]), fill_value=True)
348
+
349
+ msg = "out of bounds"
350
+ with pytest.raises(IndexError, match=msg):
351
+ idx.take(np.array([1, -5]))
352
+
353
+ def test_take_fill_value_with_timezone(self):
354
+ idx = DatetimeIndex(
355
+ ["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx", tz="US/Eastern"
356
+ )
357
+ result = idx.take(np.array([1, 0, -1]))
358
+ expected = DatetimeIndex(
359
+ ["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx", tz="US/Eastern"
360
+ )
361
+ tm.assert_index_equal(result, expected)
362
+
363
+ # fill_value
364
+ result = idx.take(np.array([1, 0, -1]), fill_value=True)
365
+ expected = DatetimeIndex(
366
+ ["2011-02-01", "2011-01-01", "NaT"], name="xxx", tz="US/Eastern"
367
+ )
368
+ tm.assert_index_equal(result, expected)
369
+
370
+ # allow_fill=False
371
+ result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
372
+ expected = DatetimeIndex(
373
+ ["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx", tz="US/Eastern"
374
+ )
375
+ tm.assert_index_equal(result, expected)
376
+
377
+ msg = (
378
+ "When allow_fill=True and fill_value is not None, "
379
+ "all indices must be >= -1"
380
+ )
381
+ with pytest.raises(ValueError, match=msg):
382
+ idx.take(np.array([1, 0, -2]), fill_value=True)
383
+ with pytest.raises(ValueError, match=msg):
384
+ idx.take(np.array([1, 0, -5]), fill_value=True)
385
+
386
+ msg = "out of bounds"
387
+ with pytest.raises(IndexError, match=msg):
388
+ idx.take(np.array([1, -5]))
389
+
390
+
391
+ class TestGetLoc:
392
+ def test_get_loc_key_unit_mismatch(self):
393
+ idx = date_range("2000-01-01", periods=3)
394
+ key = idx[1].as_unit("ms")
395
+ loc = idx.get_loc(key)
396
+ assert loc == 1
397
+ assert key in idx
398
+
399
+ def test_get_loc_key_unit_mismatch_not_castable(self):
400
+ dta = date_range("2000-01-01", periods=3)._data.astype("M8[s]")
401
+ dti = DatetimeIndex(dta)
402
+ key = dta[0].as_unit("ns") + pd.Timedelta(1)
403
+
404
+ with pytest.raises(
405
+ KeyError, match=r"Timestamp\('2000-01-01 00:00:00.000000001'\)"
406
+ ):
407
+ dti.get_loc(key)
408
+
409
+ assert key not in dti
410
+
411
+ def test_get_loc_time_obj(self):
412
+ # time indexing
413
+ idx = date_range("2000-01-01", periods=24, freq="h")
414
+
415
+ result = idx.get_loc(time(12))
416
+ expected = np.array([12])
417
+ tm.assert_numpy_array_equal(result, expected, check_dtype=False)
418
+
419
+ result = idx.get_loc(time(12, 30))
420
+ expected = np.array([])
421
+ tm.assert_numpy_array_equal(result, expected, check_dtype=False)
422
+
423
+ @pytest.mark.parametrize("offset", [-10, 10])
424
+ def test_get_loc_time_obj2(self, monkeypatch, offset):
425
+ # GH#8667
426
+ size_cutoff = 50
427
+ n = size_cutoff + offset
428
+ key = time(15, 11, 30)
429
+ start = key.hour * 3600 + key.minute * 60 + key.second
430
+ step = 24 * 3600
431
+
432
+ with monkeypatch.context():
433
+ monkeypatch.setattr(libindex, "_SIZE_CUTOFF", size_cutoff)
434
+ idx = date_range("2014-11-26", periods=n, freq="s")
435
+ ts = pd.Series(np.random.default_rng(2).standard_normal(n), index=idx)
436
+ locs = np.arange(start, n, step, dtype=np.intp)
437
+
438
+ result = ts.index.get_loc(key)
439
+ tm.assert_numpy_array_equal(result, locs)
440
+ tm.assert_series_equal(ts[key], ts.iloc[locs])
441
+
442
+ left, right = ts.copy(), ts.copy()
443
+ left[key] *= -10
444
+ right.iloc[locs] *= -10
445
+ tm.assert_series_equal(left, right)
446
+
447
+ def test_get_loc_time_nat(self):
448
+ # GH#35114
449
+ # Case where key's total microseconds happens to match iNaT % 1e6 // 1000
450
+ tic = time(minute=12, second=43, microsecond=145224)
451
+ dti = DatetimeIndex([pd.NaT])
452
+
453
+ loc = dti.get_loc(tic)
454
+ expected = np.array([], dtype=np.intp)
455
+ tm.assert_numpy_array_equal(loc, expected)
456
+
457
+ def test_get_loc_nat(self):
458
+ # GH#20464
459
+ index = DatetimeIndex(["1/3/2000", "NaT"])
460
+ assert index.get_loc(pd.NaT) == 1
461
+
462
+ assert index.get_loc(None) == 1
463
+
464
+ assert index.get_loc(np.nan) == 1
465
+
466
+ assert index.get_loc(pd.NA) == 1
467
+
468
+ assert index.get_loc(np.datetime64("NaT")) == 1
469
+
470
+ with pytest.raises(KeyError, match="NaT"):
471
+ index.get_loc(np.timedelta64("NaT"))
472
+
473
+ @pytest.mark.parametrize("key", [pd.Timedelta(0), pd.Timedelta(1), timedelta(0)])
474
+ def test_get_loc_timedelta_invalid_key(self, key):
475
+ # GH#20464
476
+ dti = date_range("1970-01-01", periods=10)
477
+ msg = "Cannot index DatetimeIndex with [Tt]imedelta"
478
+ with pytest.raises(TypeError, match=msg):
479
+ dti.get_loc(key)
480
+
481
+ def test_get_loc_reasonable_key_error(self):
482
+ # GH#1062
483
+ index = DatetimeIndex(["1/3/2000"])
484
+ with pytest.raises(KeyError, match="2000"):
485
+ index.get_loc("1/1/2000")
486
+
487
+ def test_get_loc_year_str(self):
488
+ rng = date_range("1/1/2000", "1/1/2010")
489
+
490
+ result = rng.get_loc("2009")
491
+ expected = slice(3288, 3653)
492
+ assert result == expected
493
+
494
+
495
+ class TestContains:
496
+ def test_dti_contains_with_duplicates(self):
497
+ d = datetime(2011, 12, 5, 20, 30)
498
+ ix = DatetimeIndex([d, d])
499
+ assert d in ix
500
+
501
+ @pytest.mark.parametrize(
502
+ "vals",
503
+ [
504
+ [0, 1, 0],
505
+ [0, 0, -1],
506
+ [0, -1, -1],
507
+ ["2015", "2015", "2016"],
508
+ ["2015", "2015", "2014"],
509
+ ],
510
+ )
511
+ def test_contains_nonunique(self, vals):
512
+ # GH#9512
513
+ idx = DatetimeIndex(vals)
514
+ assert idx[0] in idx
515
+
516
+
517
+ class TestGetIndexer:
518
+ def test_get_indexer_date_objs(self):
519
+ rng = date_range("1/1/2000", periods=20)
520
+
521
+ result = rng.get_indexer(rng.map(lambda x: x.date()))
522
+ expected = rng.get_indexer(rng)
523
+ tm.assert_numpy_array_equal(result, expected)
524
+
525
+ def test_get_indexer(self):
526
+ idx = date_range("2000-01-01", periods=3)
527
+ exp = np.array([0, 1, 2], dtype=np.intp)
528
+ tm.assert_numpy_array_equal(idx.get_indexer(idx), exp)
529
+
530
+ target = idx[0] + pd.to_timedelta(["-1 hour", "12 hours", "1 day 1 hour"])
531
+ tm.assert_numpy_array_equal(
532
+ idx.get_indexer(target, "pad"), np.array([-1, 0, 1], dtype=np.intp)
533
+ )
534
+ tm.assert_numpy_array_equal(
535
+ idx.get_indexer(target, "backfill"), np.array([0, 1, 2], dtype=np.intp)
536
+ )
537
+ tm.assert_numpy_array_equal(
538
+ idx.get_indexer(target, "nearest"), np.array([0, 1, 1], dtype=np.intp)
539
+ )
540
+ tm.assert_numpy_array_equal(
541
+ idx.get_indexer(target, "nearest", tolerance=pd.Timedelta("1 hour")),
542
+ np.array([0, -1, 1], dtype=np.intp),
543
+ )
544
+ tol_raw = [
545
+ pd.Timedelta("1 hour"),
546
+ pd.Timedelta("1 hour"),
547
+ pd.Timedelta("1 hour").to_timedelta64(),
548
+ ]
549
+ tm.assert_numpy_array_equal(
550
+ idx.get_indexer(
551
+ target, "nearest", tolerance=[np.timedelta64(x) for x in tol_raw]
552
+ ),
553
+ np.array([0, -1, 1], dtype=np.intp),
554
+ )
555
+ tol_bad = [
556
+ pd.Timedelta("2 hour").to_timedelta64(),
557
+ pd.Timedelta("1 hour").to_timedelta64(),
558
+ "foo",
559
+ ]
560
+ msg = "Could not convert 'foo' to NumPy timedelta"
561
+ with pytest.raises(ValueError, match=msg):
562
+ idx.get_indexer(target, "nearest", tolerance=tol_bad)
563
+ with pytest.raises(ValueError, match="abbreviation w/o a number"):
564
+ idx.get_indexer(idx[[0]], method="nearest", tolerance="foo")
565
+
566
+ @pytest.mark.parametrize(
567
+ "target",
568
+ [
569
+ [date(2020, 1, 1), Timestamp("2020-01-02")],
570
+ [Timestamp("2020-01-01"), date(2020, 1, 2)],
571
+ ],
572
+ )
573
+ def test_get_indexer_mixed_dtypes(self, target):
574
+ # https://github.com/pandas-dev/pandas/issues/33741
575
+ values = DatetimeIndex([Timestamp("2020-01-01"), Timestamp("2020-01-02")])
576
+ result = values.get_indexer(target)
577
+ expected = np.array([0, 1], dtype=np.intp)
578
+ tm.assert_numpy_array_equal(result, expected)
579
+
580
+ @pytest.mark.parametrize(
581
+ "target, positions",
582
+ [
583
+ ([date(9999, 1, 1), Timestamp("2020-01-01")], [-1, 0]),
584
+ ([Timestamp("2020-01-01"), date(9999, 1, 1)], [0, -1]),
585
+ ([date(9999, 1, 1), date(9999, 1, 1)], [-1, -1]),
586
+ ],
587
+ )
588
+ def test_get_indexer_out_of_bounds_date(self, target, positions):
589
+ values = DatetimeIndex([Timestamp("2020-01-01"), Timestamp("2020-01-02")])
590
+
591
+ result = values.get_indexer(target)
592
+ expected = np.array(positions, dtype=np.intp)
593
+ tm.assert_numpy_array_equal(result, expected)
594
+
595
+ def test_get_indexer_pad_requires_monotonicity(self):
596
+ rng = date_range("1/1/2000", "3/1/2000", freq="B")
597
+
598
+ # neither monotonic increasing or decreasing
599
+ rng2 = rng[[1, 0, 2]]
600
+
601
+ msg = "index must be monotonic increasing or decreasing"
602
+ with pytest.raises(ValueError, match=msg):
603
+ rng2.get_indexer(rng, method="pad")
604
+
605
+
606
+ class TestMaybeCastSliceBound:
607
+ def test_maybe_cast_slice_bounds_empty(self):
608
+ # GH#14354
609
+ empty_idx = date_range(freq="1h", periods=0, end="2015")
610
+
611
+ right = empty_idx._maybe_cast_slice_bound("2015-01-02", "right")
612
+ exp = Timestamp("2015-01-02 23:59:59.999999999")
613
+ assert right == exp
614
+
615
+ left = empty_idx._maybe_cast_slice_bound("2015-01-02", "left")
616
+ exp = Timestamp("2015-01-02 00:00:00")
617
+ assert left == exp
618
+
619
+ def test_maybe_cast_slice_duplicate_monotonic(self):
620
+ # https://github.com/pandas-dev/pandas/issues/16515
621
+ idx = DatetimeIndex(["2017", "2017"])
622
+ result = idx._maybe_cast_slice_bound("2017-01-01", "left")
623
+ expected = Timestamp("2017-01-01")
624
+ assert result == expected
625
+
626
+
627
+ class TestGetSliceBounds:
628
+ @pytest.mark.parametrize("box", [date, datetime, Timestamp])
629
+ @pytest.mark.parametrize("side, expected", [("left", 4), ("right", 5)])
630
+ def test_get_slice_bounds_datetime_within(
631
+ self, box, side, expected, tz_aware_fixture
632
+ ):
633
+ # GH 35690
634
+ tz = tz_aware_fixture
635
+ index = bdate_range("2000-01-03", "2000-02-11").tz_localize(tz)
636
+ key = box(year=2000, month=1, day=7)
637
+
638
+ if tz is not None:
639
+ with pytest.raises(TypeError, match="Cannot compare tz-naive"):
640
+ # GH#36148 we require tzawareness-compat as of 2.0
641
+ index.get_slice_bound(key, side=side)
642
+ else:
643
+ result = index.get_slice_bound(key, side=side)
644
+ assert result == expected
645
+
646
+ @pytest.mark.parametrize("box", [datetime, Timestamp])
647
+ @pytest.mark.parametrize("side", ["left", "right"])
648
+ @pytest.mark.parametrize("year, expected", [(1999, 0), (2020, 30)])
649
+ def test_get_slice_bounds_datetime_outside(
650
+ self, box, side, year, expected, tz_aware_fixture
651
+ ):
652
+ # GH 35690
653
+ tz = tz_aware_fixture
654
+ index = bdate_range("2000-01-03", "2000-02-11").tz_localize(tz)
655
+ key = box(year=year, month=1, day=7)
656
+
657
+ if tz is not None:
658
+ with pytest.raises(TypeError, match="Cannot compare tz-naive"):
659
+ # GH#36148 we require tzawareness-compat as of 2.0
660
+ index.get_slice_bound(key, side=side)
661
+ else:
662
+ result = index.get_slice_bound(key, side=side)
663
+ assert result == expected
664
+
665
+ @pytest.mark.parametrize("box", [datetime, Timestamp])
666
+ def test_slice_datetime_locs(self, box, tz_aware_fixture):
667
+ # GH 34077
668
+ tz = tz_aware_fixture
669
+ index = DatetimeIndex(["2010-01-01", "2010-01-03"]).tz_localize(tz)
670
+ key = box(2010, 1, 1)
671
+
672
+ if tz is not None:
673
+ with pytest.raises(TypeError, match="Cannot compare tz-naive"):
674
+ # GH#36148 we require tzawareness-compat as of 2.0
675
+ index.slice_locs(key, box(2010, 1, 2))
676
+ else:
677
+ result = index.slice_locs(key, box(2010, 1, 2))
678
+ expected = (0, 1)
679
+ assert result == expected
680
+
681
+
682
+ class TestIndexerBetweenTime:
683
+ def test_indexer_between_time(self):
684
+ # GH#11818
685
+ rng = date_range("1/1/2000", "1/5/2000", freq="5min")
686
+ msg = r"Cannot convert arg \[datetime\.datetime\(2010, 1, 2, 1, 0\)\] to a time"
687
+ with pytest.raises(ValueError, match=msg):
688
+ rng.indexer_between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5))
689
+
690
+ @pytest.mark.parametrize("unit", ["us", "ms", "s"])
691
+ def test_indexer_between_time_non_nano(self, unit):
692
+ # For simple cases like this, the non-nano indexer_between_time
693
+ # should match the nano result
694
+
695
+ rng = date_range("1/1/2000", "1/5/2000", freq="5min")
696
+ arr_nano = rng._data._ndarray
697
+
698
+ arr = arr_nano.astype(f"M8[{unit}]")
699
+
700
+ dta = type(rng._data)._simple_new(arr, dtype=arr.dtype)
701
+ dti = DatetimeIndex(dta)
702
+ assert dti.dtype == arr.dtype
703
+
704
+ tic = time(1, 25)
705
+ toc = time(2, 29)
706
+
707
+ result = dti.indexer_between_time(tic, toc)
708
+ expected = rng.indexer_between_time(tic, toc)
709
+ tm.assert_numpy_array_equal(result, expected)
710
+
711
+ # case with non-zero micros in arguments
712
+ tic = time(1, 25, 0, 45678)
713
+ toc = time(2, 29, 0, 1234)
714
+
715
+ result = dti.indexer_between_time(tic, toc)
716
+ expected = rng.indexer_between_time(tic, toc)
717
+ tm.assert_numpy_array_equal(result, expected)
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_iter.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import dateutil.tz
2
+ import numpy as np
3
+ import pytest
4
+
5
+ from pandas import (
6
+ DatetimeIndex,
7
+ date_range,
8
+ to_datetime,
9
+ )
10
+ from pandas.core.arrays import datetimes
11
+
12
+
13
+ class TestDatetimeIndexIteration:
14
+ @pytest.mark.parametrize(
15
+ "tz", [None, "UTC", "US/Central", dateutil.tz.tzoffset(None, -28800)]
16
+ )
17
+ def test_iteration_preserves_nanoseconds(self, tz):
18
+ # GH#19603
19
+ index = DatetimeIndex(
20
+ ["2018-02-08 15:00:00.168456358", "2018-02-08 15:00:00.168456359"], tz=tz
21
+ )
22
+ for i, ts in enumerate(index):
23
+ assert ts == index[i] # pylint: disable=unnecessary-list-index-lookup
24
+
25
+ def test_iter_readonly(self):
26
+ # GH#28055 ints_to_pydatetime with readonly array
27
+ arr = np.array([np.datetime64("2012-02-15T12:00:00.000000000")])
28
+ arr.setflags(write=False)
29
+ dti = to_datetime(arr)
30
+ list(dti)
31
+
32
+ def test_iteration_preserves_tz(self):
33
+ # see GH#8890
34
+ index = date_range("2012-01-01", periods=3, freq="h", tz="US/Eastern")
35
+
36
+ for i, ts in enumerate(index):
37
+ result = ts
38
+ expected = index[i] # pylint: disable=unnecessary-list-index-lookup
39
+ assert result == expected
40
+
41
+ def test_iteration_preserves_tz2(self):
42
+ index = date_range(
43
+ "2012-01-01", periods=3, freq="h", tz=dateutil.tz.tzoffset(None, -28800)
44
+ )
45
+
46
+ for i, ts in enumerate(index):
47
+ result = ts
48
+ expected = index[i] # pylint: disable=unnecessary-list-index-lookup
49
+ assert result._repr_base == expected._repr_base
50
+ assert result == expected
51
+
52
+ def test_iteration_preserves_tz3(self):
53
+ # GH#9100
54
+ index = DatetimeIndex(
55
+ ["2014-12-01 03:32:39.987000-08:00", "2014-12-01 04:12:34.987000-08:00"]
56
+ )
57
+ for i, ts in enumerate(index):
58
+ result = ts
59
+ expected = index[i] # pylint: disable=unnecessary-list-index-lookup
60
+ assert result._repr_base == expected._repr_base
61
+ assert result == expected
62
+
63
+ @pytest.mark.parametrize("offset", [-5, -1, 0, 1])
64
+ def test_iteration_over_chunksize(self, offset, monkeypatch):
65
+ # GH#21012
66
+ chunksize = 5
67
+ index = date_range(
68
+ "2000-01-01 00:00:00", periods=chunksize - offset, freq="min"
69
+ )
70
+ num = 0
71
+ with monkeypatch.context() as m:
72
+ m.setattr(datetimes, "_ITER_CHUNKSIZE", chunksize)
73
+ for stamp in index:
74
+ assert index[num] == stamp
75
+ num += 1
76
+ assert num == len(index)
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_join.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import (
2
+ datetime,
3
+ timezone,
4
+ )
5
+
6
+ import numpy as np
7
+ import pytest
8
+
9
+ from pandas import (
10
+ DataFrame,
11
+ DatetimeIndex,
12
+ Index,
13
+ Timestamp,
14
+ date_range,
15
+ period_range,
16
+ to_datetime,
17
+ )
18
+ import pandas._testing as tm
19
+
20
+ from pandas.tseries.offsets import (
21
+ BDay,
22
+ BMonthEnd,
23
+ )
24
+
25
+
26
+ class TestJoin:
27
+ def test_does_not_convert_mixed_integer(self):
28
+ df = DataFrame(np.ones((3, 2)), columns=date_range("2020-01-01", periods=2))
29
+ cols = df.columns.join(df.index, how="outer")
30
+ joined = cols.join(df.columns)
31
+ assert cols.dtype == np.dtype("O")
32
+ assert cols.dtype == joined.dtype
33
+ tm.assert_numpy_array_equal(cols.values, joined.values)
34
+
35
+ def test_join_self(self, join_type):
36
+ index = date_range("1/1/2000", periods=10)
37
+ joined = index.join(index, how=join_type)
38
+ assert index is joined
39
+
40
+ def test_join_with_period_index(self, join_type):
41
+ df = DataFrame(
42
+ np.ones((10, 2)),
43
+ index=date_range("2020-01-01", periods=10),
44
+ columns=period_range("2020-01-01", periods=2),
45
+ )
46
+ s = df.iloc[:5, 0]
47
+
48
+ expected = df.columns.astype("O").join(s.index, how=join_type)
49
+ result = df.columns.join(s.index, how=join_type)
50
+ tm.assert_index_equal(expected, result)
51
+
52
+ def test_join_object_index(self):
53
+ rng = date_range("1/1/2000", periods=10)
54
+ idx = Index(["a", "b", "c", "d"])
55
+
56
+ result = rng.join(idx, how="outer")
57
+ assert isinstance(result[0], Timestamp)
58
+
59
+ def test_join_utc_convert(self, join_type):
60
+ rng = date_range("1/1/2011", periods=100, freq="h", tz="utc")
61
+
62
+ left = rng.tz_convert("US/Eastern")
63
+ right = rng.tz_convert("Europe/Berlin")
64
+
65
+ result = left.join(left[:-5], how=join_type)
66
+ assert isinstance(result, DatetimeIndex)
67
+ assert result.tz == left.tz
68
+
69
+ result = left.join(right[:-5], how=join_type)
70
+ assert isinstance(result, DatetimeIndex)
71
+ assert result.tz is timezone.utc
72
+
73
+ def test_datetimeindex_union_join_empty(self, sort, using_infer_string):
74
+ dti = date_range(start="1/1/2001", end="2/1/2001", freq="D")
75
+ empty = Index([])
76
+
77
+ result = dti.union(empty, sort=sort)
78
+ if using_infer_string:
79
+ assert isinstance(result, DatetimeIndex)
80
+ tm.assert_index_equal(result, dti)
81
+ else:
82
+ expected = dti.astype("O")
83
+ tm.assert_index_equal(result, expected)
84
+
85
+ result = dti.join(empty)
86
+ assert isinstance(result, DatetimeIndex)
87
+ tm.assert_index_equal(result, dti)
88
+
89
+ def test_join_nonunique(self):
90
+ idx1 = to_datetime(["2012-11-06 16:00:11.477563", "2012-11-06 16:00:11.477563"])
91
+ idx2 = to_datetime(["2012-11-06 15:11:09.006507", "2012-11-06 15:11:09.006507"])
92
+ rs = idx1.join(idx2, how="outer")
93
+ assert rs.is_monotonic_increasing
94
+
95
+ @pytest.mark.parametrize("freq", ["B", "C"])
96
+ def test_outer_join(self, freq):
97
+ # should just behave as union
98
+ start, end = datetime(2009, 1, 1), datetime(2010, 1, 1)
99
+ rng = date_range(start=start, end=end, freq=freq)
100
+
101
+ # overlapping
102
+ left = rng[:10]
103
+ right = rng[5:10]
104
+
105
+ the_join = left.join(right, how="outer")
106
+ assert isinstance(the_join, DatetimeIndex)
107
+
108
+ # non-overlapping, gap in middle
109
+ left = rng[:5]
110
+ right = rng[10:]
111
+
112
+ the_join = left.join(right, how="outer")
113
+ assert isinstance(the_join, DatetimeIndex)
114
+ assert the_join.freq is None
115
+
116
+ # non-overlapping, no gap
117
+ left = rng[:5]
118
+ right = rng[5:10]
119
+
120
+ the_join = left.join(right, how="outer")
121
+ assert isinstance(the_join, DatetimeIndex)
122
+
123
+ # overlapping, but different offset
124
+ other = date_range(start, end, freq=BMonthEnd())
125
+
126
+ the_join = rng.join(other, how="outer")
127
+ assert isinstance(the_join, DatetimeIndex)
128
+ assert the_join.freq is None
129
+
130
+ def test_naive_aware_conflicts(self):
131
+ start, end = datetime(2009, 1, 1), datetime(2010, 1, 1)
132
+ naive = date_range(start, end, freq=BDay(), tz=None)
133
+ aware = date_range(start, end, freq=BDay(), tz="Asia/Hong_Kong")
134
+
135
+ msg = "tz-naive.*tz-aware"
136
+ with pytest.raises(TypeError, match=msg):
137
+ naive.join(aware)
138
+
139
+ with pytest.raises(TypeError, match=msg):
140
+ aware.join(naive)
141
+
142
+ @pytest.mark.parametrize("tz", [None, "US/Pacific"])
143
+ def test_join_preserves_freq(self, tz):
144
+ # GH#32157
145
+ dti = date_range("2016-01-01", periods=10, tz=tz)
146
+ result = dti[:5].join(dti[5:], how="outer")
147
+ assert result.freq == dti.freq
148
+ tm.assert_index_equal(result, dti)
149
+
150
+ result = dti[:5].join(dti[6:], how="outer")
151
+ assert result.freq is None
152
+ expected = dti.delete(5)
153
+ tm.assert_index_equal(result, expected)
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_npfuncs.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+ from pandas import date_range
4
+ import pandas._testing as tm
5
+
6
+
7
+ class TestSplit:
8
+ def test_split_non_utc(self):
9
+ # GH#14042
10
+ indices = date_range("2016-01-01 00:00:00+0200", freq="s", periods=10)
11
+ result = np.split(indices, indices_or_sections=[])[0]
12
+ expected = indices._with_freq(None)
13
+ tm.assert_index_equal(result, expected)
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_ops.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime
2
+
3
+ import pytest
4
+
5
+ from pandas import (
6
+ DatetimeIndex,
7
+ Index,
8
+ bdate_range,
9
+ date_range,
10
+ )
11
+ import pandas._testing as tm
12
+
13
+
14
+ class TestDatetimeIndexOps:
15
+ def test_infer_freq(self, freq_sample):
16
+ # GH 11018
17
+ idx = date_range("2011-01-01 09:00:00", freq=freq_sample, periods=10)
18
+ result = DatetimeIndex(idx.asi8, freq="infer")
19
+ tm.assert_index_equal(idx, result)
20
+ assert result.freq == freq_sample
21
+
22
+
23
+ @pytest.mark.parametrize("freq", ["B", "C"])
24
+ class TestBusinessDatetimeIndex:
25
+ @pytest.fixture
26
+ def rng(self, freq):
27
+ START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
28
+ return bdate_range(START, END, freq=freq)
29
+
30
+ def test_comparison(self, rng):
31
+ d = rng[10]
32
+
33
+ comp = rng > d
34
+ assert comp[11]
35
+ assert not comp[9]
36
+
37
+ def test_copy(self, rng):
38
+ cp = rng.copy()
39
+ tm.assert_index_equal(cp, rng)
40
+
41
+ def test_identical(self, rng):
42
+ t1 = rng.copy()
43
+ t2 = rng.copy()
44
+ assert t1.identical(t2)
45
+
46
+ # name
47
+ t1 = t1.rename("foo")
48
+ assert t1.equals(t2)
49
+ assert not t1.identical(t2)
50
+ t2 = t2.rename("foo")
51
+ assert t1.identical(t2)
52
+
53
+ # freq
54
+ t2v = Index(t2.values)
55
+ assert t1.equals(t2v)
56
+ assert not t1.identical(t2v)
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_partial_slicing.py ADDED
@@ -0,0 +1,466 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ test partial slicing on Series/Frame """
2
+
3
+ from datetime import datetime
4
+
5
+ import numpy as np
6
+ import pytest
7
+
8
+ from pandas import (
9
+ DataFrame,
10
+ DatetimeIndex,
11
+ Index,
12
+ MultiIndex,
13
+ Series,
14
+ Timedelta,
15
+ Timestamp,
16
+ date_range,
17
+ )
18
+ import pandas._testing as tm
19
+
20
+
21
+ class TestSlicing:
22
+ def test_string_index_series_name_converted(self):
23
+ # GH#1644
24
+ df = DataFrame(
25
+ np.random.default_rng(2).standard_normal((10, 4)),
26
+ index=date_range("1/1/2000", periods=10),
27
+ )
28
+
29
+ result = df.loc["1/3/2000"]
30
+ assert result.name == df.index[2]
31
+
32
+ result = df.T["1/3/2000"]
33
+ assert result.name == df.index[2]
34
+
35
+ def test_stringified_slice_with_tz(self):
36
+ # GH#2658
37
+ start = "2013-01-07"
38
+ idx = date_range(start=start, freq="1d", periods=10, tz="US/Eastern")
39
+ df = DataFrame(np.arange(10), index=idx)
40
+ df["2013-01-14 23:44:34.437768-05:00":] # no exception here
41
+
42
+ def test_return_type_doesnt_depend_on_monotonicity(self):
43
+ # GH#24892 we get Series back regardless of whether our DTI is monotonic
44
+ dti = date_range(start="2015-5-13 23:59:00", freq="min", periods=3)
45
+ ser = Series(range(3), index=dti)
46
+
47
+ # non-monotonic index
48
+ ser2 = Series(range(3), index=[dti[1], dti[0], dti[2]])
49
+
50
+ # key with resolution strictly lower than "min"
51
+ key = "2015-5-14 00"
52
+
53
+ # monotonic increasing index
54
+ result = ser.loc[key]
55
+ expected = ser.iloc[1:]
56
+ tm.assert_series_equal(result, expected)
57
+
58
+ # monotonic decreasing index
59
+ result = ser.iloc[::-1].loc[key]
60
+ expected = ser.iloc[::-1][:-1]
61
+ tm.assert_series_equal(result, expected)
62
+
63
+ # non-monotonic index
64
+ result2 = ser2.loc[key]
65
+ expected2 = ser2.iloc[::2]
66
+ tm.assert_series_equal(result2, expected2)
67
+
68
+ def test_return_type_doesnt_depend_on_monotonicity_higher_reso(self):
69
+ # GH#24892 we get Series back regardless of whether our DTI is monotonic
70
+ dti = date_range(start="2015-5-13 23:59:00", freq="min", periods=3)
71
+ ser = Series(range(3), index=dti)
72
+
73
+ # non-monotonic index
74
+ ser2 = Series(range(3), index=[dti[1], dti[0], dti[2]])
75
+
76
+ # key with resolution strictly *higher) than "min"
77
+ key = "2015-5-14 00:00:00"
78
+
79
+ # monotonic increasing index
80
+ result = ser.loc[key]
81
+ assert result == 1
82
+
83
+ # monotonic decreasing index
84
+ result = ser.iloc[::-1].loc[key]
85
+ assert result == 1
86
+
87
+ # non-monotonic index
88
+ result2 = ser2.loc[key]
89
+ assert result2 == 0
90
+
91
+ def test_monotone_DTI_indexing_bug(self):
92
+ # GH 19362
93
+ # Testing accessing the first element in a monotonic descending
94
+ # partial string indexing.
95
+
96
+ df = DataFrame(list(range(5)))
97
+ date_list = [
98
+ "2018-01-02",
99
+ "2017-02-10",
100
+ "2016-03-10",
101
+ "2015-03-15",
102
+ "2014-03-16",
103
+ ]
104
+ date_index = DatetimeIndex(date_list)
105
+ df["date"] = date_index
106
+ expected = DataFrame({0: list(range(5)), "date": date_index})
107
+ tm.assert_frame_equal(df, expected)
108
+
109
+ # We get a slice because df.index's resolution is hourly and we
110
+ # are slicing with a daily-resolution string. If both were daily,
111
+ # we would get a single item back
112
+ dti = date_range("20170101 01:00:00", periods=3)
113
+ df = DataFrame({"A": [1, 2, 3]}, index=dti[::-1])
114
+
115
+ expected = DataFrame({"A": 1}, index=dti[-1:][::-1])
116
+ result = df.loc["2017-01-03"]
117
+ tm.assert_frame_equal(result, expected)
118
+
119
+ result2 = df.iloc[::-1].loc["2017-01-03"]
120
+ expected2 = expected.iloc[::-1]
121
+ tm.assert_frame_equal(result2, expected2)
122
+
123
+ def test_slice_year(self):
124
+ dti = date_range(freq="B", start=datetime(2005, 1, 1), periods=500)
125
+
126
+ s = Series(np.arange(len(dti)), index=dti)
127
+ result = s["2005"]
128
+ expected = s[s.index.year == 2005]
129
+ tm.assert_series_equal(result, expected)
130
+
131
+ df = DataFrame(np.random.default_rng(2).random((len(dti), 5)), index=dti)
132
+ result = df.loc["2005"]
133
+ expected = df[df.index.year == 2005]
134
+ tm.assert_frame_equal(result, expected)
135
+
136
+ @pytest.mark.parametrize(
137
+ "partial_dtime",
138
+ [
139
+ "2019",
140
+ "2019Q4",
141
+ "Dec 2019",
142
+ "2019-12-31",
143
+ "2019-12-31 23",
144
+ "2019-12-31 23:59",
145
+ ],
146
+ )
147
+ def test_slice_end_of_period_resolution(self, partial_dtime):
148
+ # GH#31064
149
+ dti = date_range("2019-12-31 23:59:55.999999999", periods=10, freq="s")
150
+
151
+ ser = Series(range(10), index=dti)
152
+ result = ser[partial_dtime]
153
+ expected = ser.iloc[:5]
154
+ tm.assert_series_equal(result, expected)
155
+
156
+ def test_slice_quarter(self):
157
+ dti = date_range(freq="D", start=datetime(2000, 6, 1), periods=500)
158
+
159
+ s = Series(np.arange(len(dti)), index=dti)
160
+ assert len(s["2001Q1"]) == 90
161
+
162
+ df = DataFrame(np.random.default_rng(2).random((len(dti), 5)), index=dti)
163
+ assert len(df.loc["1Q01"]) == 90
164
+
165
+ def test_slice_month(self):
166
+ dti = date_range(freq="D", start=datetime(2005, 1, 1), periods=500)
167
+ s = Series(np.arange(len(dti)), index=dti)
168
+ assert len(s["2005-11"]) == 30
169
+
170
+ df = DataFrame(np.random.default_rng(2).random((len(dti), 5)), index=dti)
171
+ assert len(df.loc["2005-11"]) == 30
172
+
173
+ tm.assert_series_equal(s["2005-11"], s["11-2005"])
174
+
175
+ def test_partial_slice(self):
176
+ rng = date_range(freq="D", start=datetime(2005, 1, 1), periods=500)
177
+ s = Series(np.arange(len(rng)), index=rng)
178
+
179
+ result = s["2005-05":"2006-02"]
180
+ expected = s["20050501":"20060228"]
181
+ tm.assert_series_equal(result, expected)
182
+
183
+ result = s["2005-05":]
184
+ expected = s["20050501":]
185
+ tm.assert_series_equal(result, expected)
186
+
187
+ result = s[:"2006-02"]
188
+ expected = s[:"20060228"]
189
+ tm.assert_series_equal(result, expected)
190
+
191
+ result = s["2005-1-1"]
192
+ assert result == s.iloc[0]
193
+
194
+ with pytest.raises(KeyError, match=r"^'2004-12-31'$"):
195
+ s["2004-12-31"]
196
+
197
+ def test_partial_slice_daily(self):
198
+ rng = date_range(freq="h", start=datetime(2005, 1, 31), periods=500)
199
+ s = Series(np.arange(len(rng)), index=rng)
200
+
201
+ result = s["2005-1-31"]
202
+ tm.assert_series_equal(result, s.iloc[:24])
203
+
204
+ with pytest.raises(KeyError, match=r"^'2004-12-31 00'$"):
205
+ s["2004-12-31 00"]
206
+
207
+ def test_partial_slice_hourly(self):
208
+ rng = date_range(freq="min", start=datetime(2005, 1, 1, 20, 0, 0), periods=500)
209
+ s = Series(np.arange(len(rng)), index=rng)
210
+
211
+ result = s["2005-1-1"]
212
+ tm.assert_series_equal(result, s.iloc[: 60 * 4])
213
+
214
+ result = s["2005-1-1 20"]
215
+ tm.assert_series_equal(result, s.iloc[:60])
216
+
217
+ assert s["2005-1-1 20:00"] == s.iloc[0]
218
+ with pytest.raises(KeyError, match=r"^'2004-12-31 00:15'$"):
219
+ s["2004-12-31 00:15"]
220
+
221
+ def test_partial_slice_minutely(self):
222
+ rng = date_range(freq="s", start=datetime(2005, 1, 1, 23, 59, 0), periods=500)
223
+ s = Series(np.arange(len(rng)), index=rng)
224
+
225
+ result = s["2005-1-1 23:59"]
226
+ tm.assert_series_equal(result, s.iloc[:60])
227
+
228
+ result = s["2005-1-1"]
229
+ tm.assert_series_equal(result, s.iloc[:60])
230
+
231
+ assert s[Timestamp("2005-1-1 23:59:00")] == s.iloc[0]
232
+ with pytest.raises(KeyError, match=r"^'2004-12-31 00:00:00'$"):
233
+ s["2004-12-31 00:00:00"]
234
+
235
+ def test_partial_slice_second_precision(self):
236
+ rng = date_range(
237
+ start=datetime(2005, 1, 1, 0, 0, 59, microsecond=999990),
238
+ periods=20,
239
+ freq="us",
240
+ )
241
+ s = Series(np.arange(20), rng)
242
+
243
+ tm.assert_series_equal(s["2005-1-1 00:00"], s.iloc[:10])
244
+ tm.assert_series_equal(s["2005-1-1 00:00:59"], s.iloc[:10])
245
+
246
+ tm.assert_series_equal(s["2005-1-1 00:01"], s.iloc[10:])
247
+ tm.assert_series_equal(s["2005-1-1 00:01:00"], s.iloc[10:])
248
+
249
+ assert s[Timestamp("2005-1-1 00:00:59.999990")] == s.iloc[0]
250
+ with pytest.raises(KeyError, match="2005-1-1 00:00:00"):
251
+ s["2005-1-1 00:00:00"]
252
+
253
+ def test_partial_slicing_dataframe(self):
254
+ # GH14856
255
+ # Test various combinations of string slicing resolution vs.
256
+ # index resolution
257
+ # - If string resolution is less precise than index resolution,
258
+ # string is considered a slice
259
+ # - If string resolution is equal to or more precise than index
260
+ # resolution, string is considered an exact match
261
+ formats = [
262
+ "%Y",
263
+ "%Y-%m",
264
+ "%Y-%m-%d",
265
+ "%Y-%m-%d %H",
266
+ "%Y-%m-%d %H:%M",
267
+ "%Y-%m-%d %H:%M:%S",
268
+ ]
269
+ resolutions = ["year", "month", "day", "hour", "minute", "second"]
270
+ for rnum, resolution in enumerate(resolutions[2:], 2):
271
+ # we check only 'day', 'hour', 'minute' and 'second'
272
+ unit = Timedelta("1 " + resolution)
273
+ middate = datetime(2012, 1, 1, 0, 0, 0)
274
+ index = DatetimeIndex([middate - unit, middate, middate + unit])
275
+ values = [1, 2, 3]
276
+ df = DataFrame({"a": values}, index, dtype=np.int64)
277
+ assert df.index.resolution == resolution
278
+
279
+ # Timestamp with the same resolution as index
280
+ # Should be exact match for Series (return scalar)
281
+ # and raise KeyError for Frame
282
+ for timestamp, expected in zip(index, values):
283
+ ts_string = timestamp.strftime(formats[rnum])
284
+ # make ts_string as precise as index
285
+ result = df["a"][ts_string]
286
+ assert isinstance(result, np.int64)
287
+ assert result == expected
288
+ msg = rf"^'{ts_string}'$"
289
+ with pytest.raises(KeyError, match=msg):
290
+ df[ts_string]
291
+
292
+ # Timestamp with resolution less precise than index
293
+ for fmt in formats[:rnum]:
294
+ for element, theslice in [[0, slice(None, 1)], [1, slice(1, None)]]:
295
+ ts_string = index[element].strftime(fmt)
296
+
297
+ # Series should return slice
298
+ result = df["a"][ts_string]
299
+ expected = df["a"][theslice]
300
+ tm.assert_series_equal(result, expected)
301
+
302
+ # pre-2.0 df[ts_string] was overloaded to interpret this
303
+ # as slicing along index
304
+ with pytest.raises(KeyError, match=ts_string):
305
+ df[ts_string]
306
+
307
+ # Timestamp with resolution more precise than index
308
+ # Compatible with existing key
309
+ # Should return scalar for Series
310
+ # and raise KeyError for Frame
311
+ for fmt in formats[rnum + 1 :]:
312
+ ts_string = index[1].strftime(fmt)
313
+ result = df["a"][ts_string]
314
+ assert isinstance(result, np.int64)
315
+ assert result == 2
316
+ msg = rf"^'{ts_string}'$"
317
+ with pytest.raises(KeyError, match=msg):
318
+ df[ts_string]
319
+
320
+ # Not compatible with existing key
321
+ # Should raise KeyError
322
+ for fmt, res in list(zip(formats, resolutions))[rnum + 1 :]:
323
+ ts = index[1] + Timedelta("1 " + res)
324
+ ts_string = ts.strftime(fmt)
325
+ msg = rf"^'{ts_string}'$"
326
+ with pytest.raises(KeyError, match=msg):
327
+ df["a"][ts_string]
328
+ with pytest.raises(KeyError, match=msg):
329
+ df[ts_string]
330
+
331
+ def test_partial_slicing_with_multiindex(self):
332
+ # GH 4758
333
+ # partial string indexing with a multi-index buggy
334
+ df = DataFrame(
335
+ {
336
+ "ACCOUNT": ["ACCT1", "ACCT1", "ACCT1", "ACCT2"],
337
+ "TICKER": ["ABC", "MNP", "XYZ", "XYZ"],
338
+ "val": [1, 2, 3, 4],
339
+ },
340
+ index=date_range("2013-06-19 09:30:00", periods=4, freq="5min"),
341
+ )
342
+ df_multi = df.set_index(["ACCOUNT", "TICKER"], append=True)
343
+
344
+ expected = DataFrame(
345
+ [[1]], index=Index(["ABC"], name="TICKER"), columns=["val"]
346
+ )
347
+ result = df_multi.loc[("2013-06-19 09:30:00", "ACCT1")]
348
+ tm.assert_frame_equal(result, expected)
349
+
350
+ expected = df_multi.loc[
351
+ (Timestamp("2013-06-19 09:30:00", tz=None), "ACCT1", "ABC")
352
+ ]
353
+ result = df_multi.loc[("2013-06-19 09:30:00", "ACCT1", "ABC")]
354
+ tm.assert_series_equal(result, expected)
355
+
356
+ # partial string indexing on first level, scalar indexing on the other two
357
+ result = df_multi.loc[("2013-06-19", "ACCT1", "ABC")]
358
+ expected = df_multi.iloc[:1].droplevel([1, 2])
359
+ tm.assert_frame_equal(result, expected)
360
+
361
+ def test_partial_slicing_with_multiindex_series(self):
362
+ # GH 4294
363
+ # partial slice on a series mi
364
+ ser = Series(
365
+ range(250),
366
+ index=MultiIndex.from_product(
367
+ [date_range("2000-1-1", periods=50), range(5)]
368
+ ),
369
+ )
370
+
371
+ s2 = ser[:-1].copy()
372
+ expected = s2["2000-1-4"]
373
+ result = s2[Timestamp("2000-1-4")]
374
+ tm.assert_series_equal(result, expected)
375
+
376
+ result = ser[Timestamp("2000-1-4")]
377
+ expected = ser["2000-1-4"]
378
+ tm.assert_series_equal(result, expected)
379
+
380
+ df2 = DataFrame(ser)
381
+ expected = df2.xs("2000-1-4")
382
+ result = df2.loc[Timestamp("2000-1-4")]
383
+ tm.assert_frame_equal(result, expected)
384
+
385
+ def test_partial_slice_requires_monotonicity(self):
386
+ # Disallowed since 2.0 (GH 37819)
387
+ ser = Series(np.arange(10), date_range("2014-01-01", periods=10))
388
+
389
+ nonmonotonic = ser.iloc[[3, 5, 4]]
390
+ timestamp = Timestamp("2014-01-10")
391
+ with pytest.raises(
392
+ KeyError, match="Value based partial slicing on non-monotonic"
393
+ ):
394
+ nonmonotonic["2014-01-10":]
395
+
396
+ with pytest.raises(KeyError, match=r"Timestamp\('2014-01-10 00:00:00'\)"):
397
+ nonmonotonic[timestamp:]
398
+
399
+ with pytest.raises(
400
+ KeyError, match="Value based partial slicing on non-monotonic"
401
+ ):
402
+ nonmonotonic.loc["2014-01-10":]
403
+
404
+ with pytest.raises(KeyError, match=r"Timestamp\('2014-01-10 00:00:00'\)"):
405
+ nonmonotonic.loc[timestamp:]
406
+
407
+ def test_loc_datetime_length_one(self):
408
+ # GH16071
409
+ df = DataFrame(
410
+ columns=["1"],
411
+ index=date_range("2016-10-01T00:00:00", "2016-10-01T23:59:59"),
412
+ )
413
+ result = df.loc[datetime(2016, 10, 1) :]
414
+ tm.assert_frame_equal(result, df)
415
+
416
+ result = df.loc["2016-10-01T00:00:00":]
417
+ tm.assert_frame_equal(result, df)
418
+
419
+ @pytest.mark.parametrize(
420
+ "start",
421
+ [
422
+ "2018-12-02 21:50:00+00:00",
423
+ Timestamp("2018-12-02 21:50:00+00:00"),
424
+ Timestamp("2018-12-02 21:50:00+00:00").to_pydatetime(),
425
+ ],
426
+ )
427
+ @pytest.mark.parametrize(
428
+ "end",
429
+ [
430
+ "2018-12-02 21:52:00+00:00",
431
+ Timestamp("2018-12-02 21:52:00+00:00"),
432
+ Timestamp("2018-12-02 21:52:00+00:00").to_pydatetime(),
433
+ ],
434
+ )
435
+ def test_getitem_with_datestring_with_UTC_offset(self, start, end):
436
+ # GH 24076
437
+ idx = date_range(
438
+ start="2018-12-02 14:50:00-07:00",
439
+ end="2018-12-02 14:50:00-07:00",
440
+ freq="1min",
441
+ )
442
+ df = DataFrame(1, index=idx, columns=["A"])
443
+ result = df[start:end]
444
+ expected = df.iloc[0:3, :]
445
+ tm.assert_frame_equal(result, expected)
446
+
447
+ # GH 16785
448
+ start = str(start)
449
+ end = str(end)
450
+ with pytest.raises(ValueError, match="Both dates must"):
451
+ df[start : end[:-4] + "1:00"]
452
+
453
+ with pytest.raises(ValueError, match="The index must be timezone"):
454
+ df = df.tz_localize(None)
455
+ df[start:end]
456
+
457
+ def test_slice_reduce_to_series(self):
458
+ # GH 27516
459
+ df = DataFrame(
460
+ {"A": range(24)}, index=date_range("2000", periods=24, freq="ME")
461
+ )
462
+ expected = Series(
463
+ range(12), index=date_range("2000", periods=12, freq="ME"), name="A"
464
+ )
465
+ result = df.loc["2000", "A"]
466
+ tm.assert_series_equal(result, expected)
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_pickle.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+
3
+ from pandas import (
4
+ NaT,
5
+ date_range,
6
+ to_datetime,
7
+ )
8
+ import pandas._testing as tm
9
+
10
+
11
+ class TestPickle:
12
+ def test_pickle(self):
13
+ # GH#4606
14
+ idx = to_datetime(["2013-01-01", NaT, "2014-01-06"])
15
+ idx_p = tm.round_trip_pickle(idx)
16
+ assert idx_p[0] == idx[0]
17
+ assert idx_p[1] is NaT
18
+ assert idx_p[2] == idx[2]
19
+
20
+ def test_pickle_dont_infer_freq(self):
21
+ # GH#11002
22
+ # don't infer freq
23
+ idx = date_range("1750-1-1", "2050-1-1", freq="7D")
24
+ idx_p = tm.round_trip_pickle(idx)
25
+ tm.assert_index_equal(idx, idx_p)
26
+
27
+ def test_pickle_after_set_freq(self):
28
+ dti = date_range("20130101", periods=3, tz="US/Eastern", name="foo")
29
+ dti = dti._with_freq(None)
30
+
31
+ res = tm.round_trip_pickle(dti)
32
+ tm.assert_index_equal(res, dti)
33
+
34
+ def test_roundtrip_pickle_with_tz(self):
35
+ # GH#8367
36
+ # round-trip of timezone
37
+ index = date_range("20130101", periods=3, tz="US/Eastern", name="foo")
38
+ unpickled = tm.round_trip_pickle(index)
39
+ tm.assert_index_equal(index, unpickled)
40
+
41
+ @pytest.mark.parametrize("freq", ["B", "C"])
42
+ def test_pickle_unpickle(self, freq):
43
+ rng = date_range("2009-01-01", "2010-01-01", freq=freq)
44
+ unpickled = tm.round_trip_pickle(rng)
45
+ assert unpickled.freq == freq
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_reindex.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import timedelta
2
+
3
+ import numpy as np
4
+
5
+ from pandas import (
6
+ DatetimeIndex,
7
+ date_range,
8
+ )
9
+ import pandas._testing as tm
10
+
11
+
12
+ class TestDatetimeIndexReindex:
13
+ def test_reindex_preserves_tz_if_target_is_empty_list_or_array(self):
14
+ # GH#7774
15
+ index = date_range("2013-01-01", periods=3, tz="US/Eastern")
16
+ assert str(index.reindex([])[0].tz) == "US/Eastern"
17
+ assert str(index.reindex(np.array([]))[0].tz) == "US/Eastern"
18
+
19
+ def test_reindex_with_same_tz_nearest(self):
20
+ # GH#32740
21
+ rng_a = date_range("2010-01-01", "2010-01-02", periods=24, tz="utc")
22
+ rng_b = date_range("2010-01-01", "2010-01-02", periods=23, tz="utc")
23
+ result1, result2 = rng_a.reindex(
24
+ rng_b, method="nearest", tolerance=timedelta(seconds=20)
25
+ )
26
+ expected_list1 = [
27
+ "2010-01-01 00:00:00",
28
+ "2010-01-01 01:05:27.272727272",
29
+ "2010-01-01 02:10:54.545454545",
30
+ "2010-01-01 03:16:21.818181818",
31
+ "2010-01-01 04:21:49.090909090",
32
+ "2010-01-01 05:27:16.363636363",
33
+ "2010-01-01 06:32:43.636363636",
34
+ "2010-01-01 07:38:10.909090909",
35
+ "2010-01-01 08:43:38.181818181",
36
+ "2010-01-01 09:49:05.454545454",
37
+ "2010-01-01 10:54:32.727272727",
38
+ "2010-01-01 12:00:00",
39
+ "2010-01-01 13:05:27.272727272",
40
+ "2010-01-01 14:10:54.545454545",
41
+ "2010-01-01 15:16:21.818181818",
42
+ "2010-01-01 16:21:49.090909090",
43
+ "2010-01-01 17:27:16.363636363",
44
+ "2010-01-01 18:32:43.636363636",
45
+ "2010-01-01 19:38:10.909090909",
46
+ "2010-01-01 20:43:38.181818181",
47
+ "2010-01-01 21:49:05.454545454",
48
+ "2010-01-01 22:54:32.727272727",
49
+ "2010-01-02 00:00:00",
50
+ ]
51
+ expected1 = DatetimeIndex(
52
+ expected_list1, dtype="datetime64[ns, UTC]", freq=None
53
+ )
54
+ expected2 = np.array([0] + [-1] * 21 + [23], dtype=np.dtype("intp"))
55
+ tm.assert_index_equal(result1, expected1)
56
+ tm.assert_numpy_array_equal(result2, expected2)
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_scalar_compat.py ADDED
@@ -0,0 +1,329 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Tests for DatetimeIndex methods behaving like their Timestamp counterparts
3
+ """
4
+
5
+ import calendar
6
+ from datetime import (
7
+ date,
8
+ datetime,
9
+ time,
10
+ )
11
+ import locale
12
+ import unicodedata
13
+
14
+ import numpy as np
15
+ import pytest
16
+
17
+ from pandas._libs.tslibs import timezones
18
+
19
+ from pandas import (
20
+ DatetimeIndex,
21
+ Index,
22
+ NaT,
23
+ Timestamp,
24
+ date_range,
25
+ offsets,
26
+ )
27
+ import pandas._testing as tm
28
+ from pandas.core.arrays import DatetimeArray
29
+
30
+
31
+ class TestDatetimeIndexOps:
32
+ def test_dti_no_millisecond_field(self):
33
+ msg = "type object 'DatetimeIndex' has no attribute 'millisecond'"
34
+ with pytest.raises(AttributeError, match=msg):
35
+ DatetimeIndex.millisecond
36
+
37
+ msg = "'DatetimeIndex' object has no attribute 'millisecond'"
38
+ with pytest.raises(AttributeError, match=msg):
39
+ DatetimeIndex([]).millisecond
40
+
41
+ def test_dti_time(self):
42
+ rng = date_range("1/1/2000", freq="12min", periods=10)
43
+ result = Index(rng).time
44
+ expected = [t.time() for t in rng]
45
+ assert (result == expected).all()
46
+
47
+ def test_dti_date(self):
48
+ rng = date_range("1/1/2000", freq="12h", periods=10)
49
+ result = Index(rng).date
50
+ expected = [t.date() for t in rng]
51
+ assert (result == expected).all()
52
+
53
+ @pytest.mark.parametrize(
54
+ "dtype",
55
+ [None, "datetime64[ns, CET]", "datetime64[ns, EST]", "datetime64[ns, UTC]"],
56
+ )
57
+ def test_dti_date2(self, dtype):
58
+ # Regression test for GH#21230
59
+ expected = np.array([date(2018, 6, 4), NaT])
60
+
61
+ index = DatetimeIndex(["2018-06-04 10:00:00", NaT], dtype=dtype)
62
+ result = index.date
63
+
64
+ tm.assert_numpy_array_equal(result, expected)
65
+
66
+ @pytest.mark.parametrize(
67
+ "dtype",
68
+ [None, "datetime64[ns, CET]", "datetime64[ns, EST]", "datetime64[ns, UTC]"],
69
+ )
70
+ def test_dti_time2(self, dtype):
71
+ # Regression test for GH#21267
72
+ expected = np.array([time(10, 20, 30), NaT])
73
+
74
+ index = DatetimeIndex(["2018-06-04 10:20:30", NaT], dtype=dtype)
75
+ result = index.time
76
+
77
+ tm.assert_numpy_array_equal(result, expected)
78
+
79
+ def test_dti_timetz(self, tz_naive_fixture):
80
+ # GH#21358
81
+ tz = timezones.maybe_get_tz(tz_naive_fixture)
82
+
83
+ expected = np.array([time(10, 20, 30, tzinfo=tz), NaT])
84
+
85
+ index = DatetimeIndex(["2018-06-04 10:20:30", NaT], tz=tz)
86
+ result = index.timetz
87
+
88
+ tm.assert_numpy_array_equal(result, expected)
89
+
90
+ @pytest.mark.parametrize(
91
+ "field",
92
+ [
93
+ "dayofweek",
94
+ "day_of_week",
95
+ "dayofyear",
96
+ "day_of_year",
97
+ "quarter",
98
+ "days_in_month",
99
+ "is_month_start",
100
+ "is_month_end",
101
+ "is_quarter_start",
102
+ "is_quarter_end",
103
+ "is_year_start",
104
+ "is_year_end",
105
+ ],
106
+ )
107
+ def test_dti_timestamp_fields(self, field):
108
+ # extra fields from DatetimeIndex like quarter and week
109
+ idx = date_range("2020-01-01", periods=10)
110
+ expected = getattr(idx, field)[-1]
111
+
112
+ result = getattr(Timestamp(idx[-1]), field)
113
+ assert result == expected
114
+
115
+ def test_dti_nanosecond(self):
116
+ dti = DatetimeIndex(np.arange(10))
117
+ expected = Index(np.arange(10, dtype=np.int32))
118
+
119
+ tm.assert_index_equal(dti.nanosecond, expected)
120
+
121
+ @pytest.mark.parametrize("prefix", ["", "dateutil/"])
122
+ def test_dti_hour_tzaware(self, prefix):
123
+ strdates = ["1/1/2012", "3/1/2012", "4/1/2012"]
124
+ rng = DatetimeIndex(strdates, tz=prefix + "US/Eastern")
125
+ assert (rng.hour == 0).all()
126
+
127
+ # a more unusual time zone, GH#1946
128
+ dr = date_range(
129
+ "2011-10-02 00:00", freq="h", periods=10, tz=prefix + "America/Atikokan"
130
+ )
131
+
132
+ expected = Index(np.arange(10, dtype=np.int32))
133
+ tm.assert_index_equal(dr.hour, expected)
134
+
135
+ # GH#12806
136
+ # error: Unsupported operand types for + ("List[None]" and "List[str]")
137
+ @pytest.mark.parametrize(
138
+ "time_locale", [None] + tm.get_locales() # type: ignore[operator]
139
+ )
140
+ def test_day_name_month_name(self, time_locale):
141
+ # Test Monday -> Sunday and January -> December, in that sequence
142
+ if time_locale is None:
143
+ # If the time_locale is None, day-name and month_name should
144
+ # return the english attributes
145
+ expected_days = [
146
+ "Monday",
147
+ "Tuesday",
148
+ "Wednesday",
149
+ "Thursday",
150
+ "Friday",
151
+ "Saturday",
152
+ "Sunday",
153
+ ]
154
+ expected_months = [
155
+ "January",
156
+ "February",
157
+ "March",
158
+ "April",
159
+ "May",
160
+ "June",
161
+ "July",
162
+ "August",
163
+ "September",
164
+ "October",
165
+ "November",
166
+ "December",
167
+ ]
168
+ else:
169
+ with tm.set_locale(time_locale, locale.LC_TIME):
170
+ expected_days = calendar.day_name[:]
171
+ expected_months = calendar.month_name[1:]
172
+
173
+ # GH#11128
174
+ dti = date_range(freq="D", start=datetime(1998, 1, 1), periods=365)
175
+ english_days = [
176
+ "Monday",
177
+ "Tuesday",
178
+ "Wednesday",
179
+ "Thursday",
180
+ "Friday",
181
+ "Saturday",
182
+ "Sunday",
183
+ ]
184
+ for day, name, eng_name in zip(range(4, 11), expected_days, english_days):
185
+ name = name.capitalize()
186
+ assert dti.day_name(locale=time_locale)[day] == name
187
+ assert dti.day_name(locale=None)[day] == eng_name
188
+ ts = Timestamp(datetime(2016, 4, day))
189
+ assert ts.day_name(locale=time_locale) == name
190
+ dti = dti.append(DatetimeIndex([NaT]))
191
+ assert np.isnan(dti.day_name(locale=time_locale)[-1])
192
+ ts = Timestamp(NaT)
193
+ assert np.isnan(ts.day_name(locale=time_locale))
194
+
195
+ # GH#12805
196
+ dti = date_range(freq="ME", start="2012", end="2013")
197
+ result = dti.month_name(locale=time_locale)
198
+ expected = Index([month.capitalize() for month in expected_months])
199
+
200
+ # work around different normalization schemes GH#22342
201
+ result = result.str.normalize("NFD")
202
+ expected = expected.str.normalize("NFD")
203
+
204
+ tm.assert_index_equal(result, expected)
205
+
206
+ for item, expected in zip(dti, expected_months):
207
+ result = item.month_name(locale=time_locale)
208
+ expected = expected.capitalize()
209
+
210
+ result = unicodedata.normalize("NFD", result)
211
+ expected = unicodedata.normalize("NFD", result)
212
+
213
+ assert result == expected
214
+ dti = dti.append(DatetimeIndex([NaT]))
215
+ assert np.isnan(dti.month_name(locale=time_locale)[-1])
216
+
217
+ def test_dti_week(self):
218
+ # GH#6538: Check that DatetimeIndex and its TimeStamp elements
219
+ # return the same weekofyear accessor close to new year w/ tz
220
+ dates = ["2013/12/29", "2013/12/30", "2013/12/31"]
221
+ dates = DatetimeIndex(dates, tz="Europe/Brussels")
222
+ expected = [52, 1, 1]
223
+ assert dates.isocalendar().week.tolist() == expected
224
+ assert [d.weekofyear for d in dates] == expected
225
+
226
+ @pytest.mark.parametrize("tz", [None, "US/Eastern"])
227
+ def test_dti_fields(self, tz):
228
+ # GH#13303
229
+ dti = date_range(freq="D", start=datetime(1998, 1, 1), periods=365, tz=tz)
230
+ assert dti.year[0] == 1998
231
+ assert dti.month[0] == 1
232
+ assert dti.day[0] == 1
233
+ assert dti.hour[0] == 0
234
+ assert dti.minute[0] == 0
235
+ assert dti.second[0] == 0
236
+ assert dti.microsecond[0] == 0
237
+ assert dti.dayofweek[0] == 3
238
+
239
+ assert dti.dayofyear[0] == 1
240
+ assert dti.dayofyear[120] == 121
241
+
242
+ assert dti.isocalendar().week.iloc[0] == 1
243
+ assert dti.isocalendar().week.iloc[120] == 18
244
+
245
+ assert dti.quarter[0] == 1
246
+ assert dti.quarter[120] == 2
247
+
248
+ assert dti.days_in_month[0] == 31
249
+ assert dti.days_in_month[90] == 30
250
+
251
+ assert dti.is_month_start[0]
252
+ assert not dti.is_month_start[1]
253
+ assert dti.is_month_start[31]
254
+ assert dti.is_quarter_start[0]
255
+ assert dti.is_quarter_start[90]
256
+ assert dti.is_year_start[0]
257
+ assert not dti.is_year_start[364]
258
+ assert not dti.is_month_end[0]
259
+ assert dti.is_month_end[30]
260
+ assert not dti.is_month_end[31]
261
+ assert dti.is_month_end[364]
262
+ assert not dti.is_quarter_end[0]
263
+ assert not dti.is_quarter_end[30]
264
+ assert dti.is_quarter_end[89]
265
+ assert dti.is_quarter_end[364]
266
+ assert not dti.is_year_end[0]
267
+ assert dti.is_year_end[364]
268
+
269
+ assert len(dti.year) == 365
270
+ assert len(dti.month) == 365
271
+ assert len(dti.day) == 365
272
+ assert len(dti.hour) == 365
273
+ assert len(dti.minute) == 365
274
+ assert len(dti.second) == 365
275
+ assert len(dti.microsecond) == 365
276
+ assert len(dti.dayofweek) == 365
277
+ assert len(dti.dayofyear) == 365
278
+ assert len(dti.isocalendar()) == 365
279
+ assert len(dti.quarter) == 365
280
+ assert len(dti.is_month_start) == 365
281
+ assert len(dti.is_month_end) == 365
282
+ assert len(dti.is_quarter_start) == 365
283
+ assert len(dti.is_quarter_end) == 365
284
+ assert len(dti.is_year_start) == 365
285
+ assert len(dti.is_year_end) == 365
286
+
287
+ dti.name = "name"
288
+
289
+ # non boolean accessors -> return Index
290
+ for accessor in DatetimeArray._field_ops:
291
+ res = getattr(dti, accessor)
292
+ assert len(res) == 365
293
+ assert isinstance(res, Index)
294
+ assert res.name == "name"
295
+
296
+ # boolean accessors -> return array
297
+ for accessor in DatetimeArray._bool_ops:
298
+ res = getattr(dti, accessor)
299
+ assert len(res) == 365
300
+ assert isinstance(res, np.ndarray)
301
+
302
+ # test boolean indexing
303
+ res = dti[dti.is_quarter_start]
304
+ exp = dti[[0, 90, 181, 273]]
305
+ tm.assert_index_equal(res, exp)
306
+ res = dti[dti.is_leap_year]
307
+ exp = DatetimeIndex([], freq="D", tz=dti.tz, name="name").as_unit("ns")
308
+ tm.assert_index_equal(res, exp)
309
+
310
+ def test_dti_is_year_quarter_start(self):
311
+ dti = date_range(freq="BQE-FEB", start=datetime(1998, 1, 1), periods=4)
312
+
313
+ assert sum(dti.is_quarter_start) == 0
314
+ assert sum(dti.is_quarter_end) == 4
315
+ assert sum(dti.is_year_start) == 0
316
+ assert sum(dti.is_year_end) == 1
317
+
318
+ def test_dti_is_month_start(self):
319
+ dti = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-03"])
320
+
321
+ assert dti.is_month_start[0] == 1
322
+
323
+ def test_dti_is_month_start_custom(self):
324
+ # Ensure is_start/end accessors throw ValueError for CustomBusinessDay,
325
+ bday_egypt = offsets.CustomBusinessDay(weekmask="Sun Mon Tue Wed Thu")
326
+ dti = date_range(datetime(2013, 4, 30), periods=5, freq=bday_egypt)
327
+ msg = "Custom business days is not supported by is_month_start"
328
+ with pytest.raises(ValueError, match=msg):
329
+ dti.is_month_start
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimes/test_setops.py ADDED
@@ -0,0 +1,666 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import (
2
+ datetime,
3
+ timezone,
4
+ )
5
+
6
+ import numpy as np
7
+ import pytest
8
+ import pytz
9
+
10
+ import pandas.util._test_decorators as td
11
+
12
+ import pandas as pd
13
+ from pandas import (
14
+ DataFrame,
15
+ DatetimeIndex,
16
+ Index,
17
+ Series,
18
+ Timestamp,
19
+ bdate_range,
20
+ date_range,
21
+ )
22
+ import pandas._testing as tm
23
+
24
+ from pandas.tseries.offsets import (
25
+ BMonthEnd,
26
+ Minute,
27
+ MonthEnd,
28
+ )
29
+
30
+ START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
31
+
32
+
33
+ class TestDatetimeIndexSetOps:
34
+ tz = [
35
+ None,
36
+ "UTC",
37
+ "Asia/Tokyo",
38
+ "US/Eastern",
39
+ "dateutil/Asia/Singapore",
40
+ "dateutil/US/Pacific",
41
+ ]
42
+
43
+ # TODO: moved from test_datetimelike; dedup with version below
44
+ def test_union2(self, sort):
45
+ everything = date_range("2020-01-01", periods=10)
46
+ first = everything[:5]
47
+ second = everything[5:]
48
+ union = first.union(second, sort=sort)
49
+ tm.assert_index_equal(union, everything)
50
+
51
+ @pytest.mark.parametrize("box", [np.array, Series, list])
52
+ def test_union3(self, sort, box):
53
+ everything = date_range("2020-01-01", periods=10)
54
+ first = everything[:5]
55
+ second = everything[5:]
56
+
57
+ # GH 10149 support listlike inputs other than Index objects
58
+ expected = first.union(second, sort=sort)
59
+ case = box(second.values)
60
+ result = first.union(case, sort=sort)
61
+ tm.assert_index_equal(result, expected)
62
+
63
+ @pytest.mark.parametrize("tz", tz)
64
+ def test_union(self, tz, sort):
65
+ rng1 = date_range("1/1/2000", freq="D", periods=5, tz=tz)
66
+ other1 = date_range("1/6/2000", freq="D", periods=5, tz=tz)
67
+ expected1 = date_range("1/1/2000", freq="D", periods=10, tz=tz)
68
+ expected1_notsorted = DatetimeIndex(list(other1) + list(rng1))
69
+
70
+ rng2 = date_range("1/1/2000", freq="D", periods=5, tz=tz)
71
+ other2 = date_range("1/4/2000", freq="D", periods=5, tz=tz)
72
+ expected2 = date_range("1/1/2000", freq="D", periods=8, tz=tz)
73
+ expected2_notsorted = DatetimeIndex(list(other2) + list(rng2[:3]))
74
+
75
+ rng3 = date_range("1/1/2000", freq="D", periods=5, tz=tz)
76
+ other3 = DatetimeIndex([], tz=tz).as_unit("ns")
77
+ expected3 = date_range("1/1/2000", freq="D", periods=5, tz=tz)
78
+ expected3_notsorted = rng3
79
+
80
+ for rng, other, exp, exp_notsorted in [
81
+ (rng1, other1, expected1, expected1_notsorted),
82
+ (rng2, other2, expected2, expected2_notsorted),
83
+ (rng3, other3, expected3, expected3_notsorted),
84
+ ]:
85
+ result_union = rng.union(other, sort=sort)
86
+ tm.assert_index_equal(result_union, exp)
87
+
88
+ result_union = other.union(rng, sort=sort)
89
+ if sort is None:
90
+ tm.assert_index_equal(result_union, exp)
91
+ else:
92
+ tm.assert_index_equal(result_union, exp_notsorted)
93
+
94
+ def test_union_coverage(self, sort):
95
+ idx = DatetimeIndex(["2000-01-03", "2000-01-01", "2000-01-02"])
96
+ ordered = DatetimeIndex(idx.sort_values(), freq="infer")
97
+ result = ordered.union(idx, sort=sort)
98
+ tm.assert_index_equal(result, ordered)
99
+
100
+ result = ordered[:0].union(ordered, sort=sort)
101
+ tm.assert_index_equal(result, ordered)
102
+ assert result.freq == ordered.freq
103
+
104
+ def test_union_bug_1730(self, sort):
105
+ rng_a = date_range("1/1/2012", periods=4, freq="3h")
106
+ rng_b = date_range("1/1/2012", periods=4, freq="4h")
107
+
108
+ result = rng_a.union(rng_b, sort=sort)
109
+ exp = list(rng_a) + list(rng_b[1:])
110
+ if sort is None:
111
+ exp = DatetimeIndex(sorted(exp))
112
+ else:
113
+ exp = DatetimeIndex(exp)
114
+ tm.assert_index_equal(result, exp)
115
+
116
+ def test_union_bug_1745(self, sort):
117
+ left = DatetimeIndex(["2012-05-11 15:19:49.695000"])
118
+ right = DatetimeIndex(
119
+ [
120
+ "2012-05-29 13:04:21.322000",
121
+ "2012-05-11 15:27:24.873000",
122
+ "2012-05-11 15:31:05.350000",
123
+ ]
124
+ )
125
+
126
+ result = left.union(right, sort=sort)
127
+ exp = DatetimeIndex(
128
+ [
129
+ "2012-05-11 15:19:49.695000",
130
+ "2012-05-29 13:04:21.322000",
131
+ "2012-05-11 15:27:24.873000",
132
+ "2012-05-11 15:31:05.350000",
133
+ ]
134
+ )
135
+ if sort is None:
136
+ exp = exp.sort_values()
137
+ tm.assert_index_equal(result, exp)
138
+
139
+ def test_union_bug_4564(self, sort):
140
+ from pandas import DateOffset
141
+
142
+ left = date_range("2013-01-01", "2013-02-01")
143
+ right = left + DateOffset(minutes=15)
144
+
145
+ result = left.union(right, sort=sort)
146
+ exp = list(left) + list(right)
147
+ if sort is None:
148
+ exp = DatetimeIndex(sorted(exp))
149
+ else:
150
+ exp = DatetimeIndex(exp)
151
+ tm.assert_index_equal(result, exp)
152
+
153
+ def test_union_freq_both_none(self, sort):
154
+ # GH11086
155
+ expected = bdate_range("20150101", periods=10)
156
+ expected._data.freq = None
157
+
158
+ result = expected.union(expected, sort=sort)
159
+ tm.assert_index_equal(result, expected)
160
+ assert result.freq is None
161
+
162
+ def test_union_freq_infer(self):
163
+ # When taking the union of two DatetimeIndexes, we infer
164
+ # a freq even if the arguments don't have freq. This matches
165
+ # TimedeltaIndex behavior.
166
+ dti = date_range("2016-01-01", periods=5)
167
+ left = dti[[0, 1, 3, 4]]
168
+ right = dti[[2, 3, 1]]
169
+
170
+ assert left.freq is None
171
+ assert right.freq is None
172
+
173
+ result = left.union(right)
174
+ tm.assert_index_equal(result, dti)
175
+ assert result.freq == "D"
176
+
177
+ def test_union_dataframe_index(self):
178
+ rng1 = date_range("1/1/1999", "1/1/2012", freq="MS")
179
+ s1 = Series(np.random.default_rng(2).standard_normal(len(rng1)), rng1)
180
+
181
+ rng2 = date_range("1/1/1980", "12/1/2001", freq="MS")
182
+ s2 = Series(np.random.default_rng(2).standard_normal(len(rng2)), rng2)
183
+ df = DataFrame({"s1": s1, "s2": s2})
184
+
185
+ exp = date_range("1/1/1980", "1/1/2012", freq="MS")
186
+ tm.assert_index_equal(df.index, exp)
187
+
188
+ def test_union_with_DatetimeIndex(self, sort):
189
+ i1 = Index(np.arange(0, 20, 2, dtype=np.int64))
190
+ i2 = date_range(start="2012-01-03 00:00:00", periods=10, freq="D")
191
+ # Works
192
+ i1.union(i2, sort=sort)
193
+ # Fails with "AttributeError: can't set attribute"
194
+ i2.union(i1, sort=sort)
195
+
196
+ def test_union_same_timezone_different_units(self):
197
+ # GH 55238
198
+ idx1 = date_range("2000-01-01", periods=3, tz="UTC").as_unit("ms")
199
+ idx2 = date_range("2000-01-01", periods=3, tz="UTC").as_unit("us")
200
+ result = idx1.union(idx2)
201
+ expected = date_range("2000-01-01", periods=3, tz="UTC").as_unit("us")
202
+ tm.assert_index_equal(result, expected)
203
+
204
+ # TODO: moved from test_datetimelike; de-duplicate with version below
205
+ def test_intersection2(self):
206
+ first = date_range("2020-01-01", periods=10)
207
+ second = first[5:]
208
+ intersect = first.intersection(second)
209
+ tm.assert_index_equal(intersect, second)
210
+
211
+ # GH 10149
212
+ cases = [klass(second.values) for klass in [np.array, Series, list]]
213
+ for case in cases:
214
+ result = first.intersection(case)
215
+ tm.assert_index_equal(result, second)
216
+
217
+ third = Index(["a", "b", "c"])
218
+ result = first.intersection(third)
219
+ expected = Index([], dtype=object)
220
+ tm.assert_index_equal(result, expected)
221
+
222
+ @pytest.mark.parametrize(
223
+ "tz", [None, "Asia/Tokyo", "US/Eastern", "dateutil/US/Pacific"]
224
+ )
225
+ def test_intersection(self, tz, sort):
226
+ # GH 4690 (with tz)
227
+ base = date_range("6/1/2000", "6/30/2000", freq="D", name="idx")
228
+
229
+ # if target has the same name, it is preserved
230
+ rng2 = date_range("5/15/2000", "6/20/2000", freq="D", name="idx")
231
+ expected2 = date_range("6/1/2000", "6/20/2000", freq="D", name="idx")
232
+
233
+ # if target name is different, it will be reset
234
+ rng3 = date_range("5/15/2000", "6/20/2000", freq="D", name="other")
235
+ expected3 = date_range("6/1/2000", "6/20/2000", freq="D", name=None)
236
+
237
+ rng4 = date_range("7/1/2000", "7/31/2000", freq="D", name="idx")
238
+ expected4 = DatetimeIndex([], freq="D", name="idx", dtype="M8[ns]")
239
+
240
+ for rng, expected in [
241
+ (rng2, expected2),
242
+ (rng3, expected3),
243
+ (rng4, expected4),
244
+ ]:
245
+ result = base.intersection(rng)
246
+ tm.assert_index_equal(result, expected)
247
+ assert result.freq == expected.freq
248
+
249
+ # non-monotonic
250
+ base = DatetimeIndex(
251
+ ["2011-01-05", "2011-01-04", "2011-01-02", "2011-01-03"], tz=tz, name="idx"
252
+ ).as_unit("ns")
253
+
254
+ rng2 = DatetimeIndex(
255
+ ["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"], tz=tz, name="idx"
256
+ ).as_unit("ns")
257
+ expected2 = DatetimeIndex(
258
+ ["2011-01-04", "2011-01-02"], tz=tz, name="idx"
259
+ ).as_unit("ns")
260
+
261
+ rng3 = DatetimeIndex(
262
+ ["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"],
263
+ tz=tz,
264
+ name="other",
265
+ ).as_unit("ns")
266
+ expected3 = DatetimeIndex(
267
+ ["2011-01-04", "2011-01-02"], tz=tz, name=None
268
+ ).as_unit("ns")
269
+
270
+ # GH 7880
271
+ rng4 = date_range("7/1/2000", "7/31/2000", freq="D", tz=tz, name="idx")
272
+ expected4 = DatetimeIndex([], tz=tz, name="idx").as_unit("ns")
273
+ assert expected4.freq is None
274
+
275
+ for rng, expected in [
276
+ (rng2, expected2),
277
+ (rng3, expected3),
278
+ (rng4, expected4),
279
+ ]:
280
+ result = base.intersection(rng, sort=sort)
281
+ if sort is None:
282
+ expected = expected.sort_values()
283
+ tm.assert_index_equal(result, expected)
284
+ assert result.freq == expected.freq
285
+
286
+ # parametrize over both anchored and non-anchored freqs, as they
287
+ # have different code paths
288
+ @pytest.mark.parametrize("freq", ["min", "B"])
289
+ def test_intersection_empty(self, tz_aware_fixture, freq):
290
+ # empty same freq GH2129
291
+ tz = tz_aware_fixture
292
+ rng = date_range("6/1/2000", "6/15/2000", freq=freq, tz=tz)
293
+ result = rng[0:0].intersection(rng)
294
+ assert len(result) == 0
295
+ assert result.freq == rng.freq
296
+
297
+ result = rng.intersection(rng[0:0])
298
+ assert len(result) == 0
299
+ assert result.freq == rng.freq
300
+
301
+ # no overlap GH#33604
302
+ check_freq = freq != "min" # We don't preserve freq on non-anchored offsets
303
+ result = rng[:3].intersection(rng[-3:])
304
+ tm.assert_index_equal(result, rng[:0])
305
+ if check_freq:
306
+ # We don't preserve freq on non-anchored offsets
307
+ assert result.freq == rng.freq
308
+
309
+ # swapped left and right
310
+ result = rng[-3:].intersection(rng[:3])
311
+ tm.assert_index_equal(result, rng[:0])
312
+ if check_freq:
313
+ # We don't preserve freq on non-anchored offsets
314
+ assert result.freq == rng.freq
315
+
316
+ def test_intersection_bug_1708(self):
317
+ from pandas import DateOffset
318
+
319
+ index_1 = date_range("1/1/2012", periods=4, freq="12h")
320
+ index_2 = index_1 + DateOffset(hours=1)
321
+
322
+ result = index_1.intersection(index_2)
323
+ assert len(result) == 0
324
+
325
+ @pytest.mark.parametrize("tz", tz)
326
+ def test_difference(self, tz, sort):
327
+ rng_dates = ["1/2/2000", "1/3/2000", "1/1/2000", "1/4/2000", "1/5/2000"]
328
+
329
+ rng1 = DatetimeIndex(rng_dates, tz=tz)
330
+ other1 = date_range("1/6/2000", freq="D", periods=5, tz=tz)
331
+ expected1 = DatetimeIndex(rng_dates, tz=tz)
332
+
333
+ rng2 = DatetimeIndex(rng_dates, tz=tz)
334
+ other2 = date_range("1/4/2000", freq="D", periods=5, tz=tz)
335
+ expected2 = DatetimeIndex(rng_dates[:3], tz=tz)
336
+
337
+ rng3 = DatetimeIndex(rng_dates, tz=tz)
338
+ other3 = DatetimeIndex([], tz=tz)
339
+ expected3 = DatetimeIndex(rng_dates, tz=tz)
340
+
341
+ for rng, other, expected in [
342
+ (rng1, other1, expected1),
343
+ (rng2, other2, expected2),
344
+ (rng3, other3, expected3),
345
+ ]:
346
+ result_diff = rng.difference(other, sort)
347
+ if sort is None and len(other):
348
+ # We dont sort (yet?) when empty GH#24959
349
+ expected = expected.sort_values()
350
+ tm.assert_index_equal(result_diff, expected)
351
+
352
+ def test_difference_freq(self, sort):
353
+ # GH14323: difference of DatetimeIndex should not preserve frequency
354
+
355
+ index = date_range("20160920", "20160925", freq="D")
356
+ other = date_range("20160921", "20160924", freq="D")
357
+ expected = DatetimeIndex(["20160920", "20160925"], dtype="M8[ns]", freq=None)
358
+ idx_diff = index.difference(other, sort)
359
+ tm.assert_index_equal(idx_diff, expected)
360
+ tm.assert_attr_equal("freq", idx_diff, expected)
361
+
362
+ # preserve frequency when the difference is a contiguous
363
+ # subset of the original range
364
+ other = date_range("20160922", "20160925", freq="D")
365
+ idx_diff = index.difference(other, sort)
366
+ expected = DatetimeIndex(["20160920", "20160921"], dtype="M8[ns]", freq="D")
367
+ tm.assert_index_equal(idx_diff, expected)
368
+ tm.assert_attr_equal("freq", idx_diff, expected)
369
+
370
+ def test_datetimeindex_diff(self, sort):
371
+ dti1 = date_range(freq="QE-JAN", start=datetime(1997, 12, 31), periods=100)
372
+ dti2 = date_range(freq="QE-JAN", start=datetime(1997, 12, 31), periods=98)
373
+ assert len(dti1.difference(dti2, sort)) == 2
374
+
375
+ @pytest.mark.parametrize("tz", [None, "Asia/Tokyo", "US/Eastern"])
376
+ def test_setops_preserve_freq(self, tz):
377
+ rng = date_range("1/1/2000", "1/1/2002", name="idx", tz=tz)
378
+
379
+ result = rng[:50].union(rng[50:100])
380
+ assert result.name == rng.name
381
+ assert result.freq == rng.freq
382
+ assert result.tz == rng.tz
383
+
384
+ result = rng[:50].union(rng[30:100])
385
+ assert result.name == rng.name
386
+ assert result.freq == rng.freq
387
+ assert result.tz == rng.tz
388
+
389
+ result = rng[:50].union(rng[60:100])
390
+ assert result.name == rng.name
391
+ assert result.freq is None
392
+ assert result.tz == rng.tz
393
+
394
+ result = rng[:50].intersection(rng[25:75])
395
+ assert result.name == rng.name
396
+ assert result.freqstr == "D"
397
+ assert result.tz == rng.tz
398
+
399
+ nofreq = DatetimeIndex(list(rng[25:75]), name="other")
400
+ result = rng[:50].union(nofreq)
401
+ assert result.name is None
402
+ assert result.freq == rng.freq
403
+ assert result.tz == rng.tz
404
+
405
+ result = rng[:50].intersection(nofreq)
406
+ assert result.name is None
407
+ assert result.freq == rng.freq
408
+ assert result.tz == rng.tz
409
+
410
+ def test_intersection_non_tick_no_fastpath(self):
411
+ # GH#42104
412
+ dti = DatetimeIndex(
413
+ [
414
+ "2018-12-31",
415
+ "2019-03-31",
416
+ "2019-06-30",
417
+ "2019-09-30",
418
+ "2019-12-31",
419
+ "2020-03-31",
420
+ ],
421
+ freq="QE-DEC",
422
+ )
423
+ result = dti[::2].intersection(dti[1::2])
424
+ expected = dti[:0]
425
+ tm.assert_index_equal(result, expected)
426
+
427
+ def test_dti_intersection(self):
428
+ rng = date_range("1/1/2011", periods=100, freq="h", tz="utc")
429
+
430
+ left = rng[10:90][::-1]
431
+ right = rng[20:80][::-1]
432
+
433
+ assert left.tz == rng.tz
434
+ result = left.intersection(right)
435
+ assert result.tz == left.tz
436
+
437
+ # Note: not difference, as there is no symmetry requirement there
438
+ @pytest.mark.parametrize("setop", ["union", "intersection", "symmetric_difference"])
439
+ def test_dti_setop_aware(self, setop):
440
+ # non-overlapping
441
+ # GH#39328 as of 2.0 we cast these to UTC instead of object
442
+ rng = date_range("2012-11-15 00:00:00", periods=6, freq="h", tz="US/Central")
443
+
444
+ rng2 = date_range("2012-11-15 12:00:00", periods=6, freq="h", tz="US/Eastern")
445
+
446
+ result = getattr(rng, setop)(rng2)
447
+
448
+ left = rng.tz_convert("UTC")
449
+ right = rng2.tz_convert("UTC")
450
+ expected = getattr(left, setop)(right)
451
+ tm.assert_index_equal(result, expected)
452
+ assert result.tz == left.tz
453
+ if len(result):
454
+ assert result[0].tz is timezone.utc
455
+ assert result[-1].tz is timezone.utc
456
+
457
+ def test_dti_union_mixed(self):
458
+ # GH#21671
459
+ rng = DatetimeIndex([Timestamp("2011-01-01"), pd.NaT])
460
+ rng2 = DatetimeIndex(["2012-01-01", "2012-01-02"], tz="Asia/Tokyo")
461
+ result = rng.union(rng2)
462
+ expected = Index(
463
+ [
464
+ Timestamp("2011-01-01"),
465
+ pd.NaT,
466
+ Timestamp("2012-01-01", tz="Asia/Tokyo"),
467
+ Timestamp("2012-01-02", tz="Asia/Tokyo"),
468
+ ],
469
+ dtype=object,
470
+ )
471
+ tm.assert_index_equal(result, expected)
472
+
473
+
474
+ class TestBusinessDatetimeIndex:
475
+ def test_union(self, sort):
476
+ rng = bdate_range(START, END)
477
+ # overlapping
478
+ left = rng[:10]
479
+ right = rng[5:10]
480
+
481
+ the_union = left.union(right, sort=sort)
482
+ assert isinstance(the_union, DatetimeIndex)
483
+
484
+ # non-overlapping, gap in middle
485
+ left = rng[:5]
486
+ right = rng[10:]
487
+
488
+ the_union = left.union(right, sort=sort)
489
+ assert isinstance(the_union, Index)
490
+
491
+ # non-overlapping, no gap
492
+ left = rng[:5]
493
+ right = rng[5:10]
494
+
495
+ the_union = left.union(right, sort=sort)
496
+ assert isinstance(the_union, DatetimeIndex)
497
+
498
+ # order does not matter
499
+ if sort is None:
500
+ tm.assert_index_equal(right.union(left, sort=sort), the_union)
501
+ else:
502
+ expected = DatetimeIndex(list(right) + list(left))
503
+ tm.assert_index_equal(right.union(left, sort=sort), expected)
504
+
505
+ # overlapping, but different offset
506
+ rng = date_range(START, END, freq=BMonthEnd())
507
+
508
+ the_union = rng.union(rng, sort=sort)
509
+ assert isinstance(the_union, DatetimeIndex)
510
+
511
+ def test_union_not_cacheable(self, sort):
512
+ rng = date_range("1/1/2000", periods=50, freq=Minute())
513
+ rng1 = rng[10:]
514
+ rng2 = rng[:25]
515
+ the_union = rng1.union(rng2, sort=sort)
516
+ if sort is None:
517
+ tm.assert_index_equal(the_union, rng)
518
+ else:
519
+ expected = DatetimeIndex(list(rng[10:]) + list(rng[:10]))
520
+ tm.assert_index_equal(the_union, expected)
521
+
522
+ rng1 = rng[10:]
523
+ rng2 = rng[15:35]
524
+ the_union = rng1.union(rng2, sort=sort)
525
+ expected = rng[10:]
526
+ tm.assert_index_equal(the_union, expected)
527
+
528
+ def test_intersection(self):
529
+ rng = date_range("1/1/2000", periods=50, freq=Minute())
530
+ rng1 = rng[10:]
531
+ rng2 = rng[:25]
532
+ the_int = rng1.intersection(rng2)
533
+ expected = rng[10:25]
534
+ tm.assert_index_equal(the_int, expected)
535
+ assert isinstance(the_int, DatetimeIndex)
536
+ assert the_int.freq == rng.freq
537
+
538
+ the_int = rng1.intersection(rng2)
539
+ tm.assert_index_equal(the_int, expected)
540
+
541
+ # non-overlapping
542
+ the_int = rng[:10].intersection(rng[10:])
543
+ expected = DatetimeIndex([]).as_unit("ns")
544
+ tm.assert_index_equal(the_int, expected)
545
+
546
+ def test_intersection_bug(self):
547
+ # GH #771
548
+ a = bdate_range("11/30/2011", "12/31/2011")
549
+ b = bdate_range("12/10/2011", "12/20/2011")
550
+ result = a.intersection(b)
551
+ tm.assert_index_equal(result, b)
552
+ assert result.freq == b.freq
553
+
554
+ def test_intersection_list(self):
555
+ # GH#35876
556
+ # values is not an Index -> no name -> retain "a"
557
+ values = [Timestamp("2020-01-01"), Timestamp("2020-02-01")]
558
+ idx = DatetimeIndex(values, name="a")
559
+ res = idx.intersection(values)
560
+ tm.assert_index_equal(res, idx)
561
+
562
+ def test_month_range_union_tz_pytz(self, sort):
563
+ tz = pytz.timezone("US/Eastern")
564
+
565
+ early_start = datetime(2011, 1, 1)
566
+ early_end = datetime(2011, 3, 1)
567
+
568
+ late_start = datetime(2011, 3, 1)
569
+ late_end = datetime(2011, 5, 1)
570
+
571
+ early_dr = date_range(start=early_start, end=early_end, tz=tz, freq=MonthEnd())
572
+ late_dr = date_range(start=late_start, end=late_end, tz=tz, freq=MonthEnd())
573
+
574
+ early_dr.union(late_dr, sort=sort)
575
+
576
+ @td.skip_if_windows
577
+ def test_month_range_union_tz_dateutil(self, sort):
578
+ from pandas._libs.tslibs.timezones import dateutil_gettz
579
+
580
+ tz = dateutil_gettz("US/Eastern")
581
+
582
+ early_start = datetime(2011, 1, 1)
583
+ early_end = datetime(2011, 3, 1)
584
+
585
+ late_start = datetime(2011, 3, 1)
586
+ late_end = datetime(2011, 5, 1)
587
+
588
+ early_dr = date_range(start=early_start, end=early_end, tz=tz, freq=MonthEnd())
589
+ late_dr = date_range(start=late_start, end=late_end, tz=tz, freq=MonthEnd())
590
+
591
+ early_dr.union(late_dr, sort=sort)
592
+
593
+ @pytest.mark.parametrize("sort", [False, None])
594
+ def test_intersection_duplicates(self, sort):
595
+ # GH#38196
596
+ idx1 = Index(
597
+ [
598
+ Timestamp("2019-12-13"),
599
+ Timestamp("2019-12-12"),
600
+ Timestamp("2019-12-12"),
601
+ ]
602
+ )
603
+ result = idx1.intersection(idx1, sort=sort)
604
+ expected = Index([Timestamp("2019-12-13"), Timestamp("2019-12-12")])
605
+ tm.assert_index_equal(result, expected)
606
+
607
+
608
+ class TestCustomDatetimeIndex:
609
+ def test_union(self, sort):
610
+ # overlapping
611
+ rng = bdate_range(START, END, freq="C")
612
+ left = rng[:10]
613
+ right = rng[5:10]
614
+
615
+ the_union = left.union(right, sort=sort)
616
+ assert isinstance(the_union, DatetimeIndex)
617
+
618
+ # non-overlapping, gap in middle
619
+ left = rng[:5]
620
+ right = rng[10:]
621
+
622
+ the_union = left.union(right, sort)
623
+ assert isinstance(the_union, Index)
624
+
625
+ # non-overlapping, no gap
626
+ left = rng[:5]
627
+ right = rng[5:10]
628
+
629
+ the_union = left.union(right, sort=sort)
630
+ assert isinstance(the_union, DatetimeIndex)
631
+
632
+ # order does not matter
633
+ if sort is None:
634
+ tm.assert_index_equal(right.union(left, sort=sort), the_union)
635
+
636
+ # overlapping, but different offset
637
+ rng = date_range(START, END, freq=BMonthEnd())
638
+
639
+ the_union = rng.union(rng, sort=sort)
640
+ assert isinstance(the_union, DatetimeIndex)
641
+
642
+ def test_intersection_bug(self):
643
+ # GH #771
644
+ a = bdate_range("11/30/2011", "12/31/2011", freq="C")
645
+ b = bdate_range("12/10/2011", "12/20/2011", freq="C")
646
+ result = a.intersection(b)
647
+ tm.assert_index_equal(result, b)
648
+ assert result.freq == b.freq
649
+
650
+ @pytest.mark.parametrize(
651
+ "tz", [None, "UTC", "Europe/Berlin", pytz.FixedOffset(-60)]
652
+ )
653
+ def test_intersection_dst_transition(self, tz):
654
+ # GH 46702: Europe/Berlin has DST transition
655
+ idx1 = date_range("2020-03-27", periods=5, freq="D", tz=tz)
656
+ idx2 = date_range("2020-03-30", periods=5, freq="D", tz=tz)
657
+ result = idx1.intersection(idx2)
658
+ expected = date_range("2020-03-30", periods=2, freq="D", tz=tz)
659
+ tm.assert_index_equal(result, expected)
660
+
661
+ # GH#45863 same problem for union
662
+ index1 = date_range("2021-10-28", periods=3, freq="D", tz="Europe/London")
663
+ index2 = date_range("2021-10-30", periods=4, freq="D", tz="Europe/London")
664
+ result = index1.union(index2)
665
+ expected = date_range("2021-10-28", periods=6, freq="D", tz="Europe/London")
666
+ tm.assert_index_equal(result, expected)