JustinTX commited on
Commit
31ac681
·
verified ·
1 Parent(s): e98d55c

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/__init__.py +0 -0
  2. py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_constructors.py +78 -0
  3. py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_formats.py +163 -0
  4. py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_indexing.py +104 -0
  5. py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_pickle.py +11 -0
  6. py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_reshape.py +97 -0
  7. py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_setops.py +266 -0
  8. py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_where.py +13 -0
  9. py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/__init__.py +0 -0
  10. py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_drop_duplicates.py +89 -0
  11. py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_equals.py +181 -0
  12. py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_indexing.py +45 -0
  13. py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_is_monotonic.py +46 -0
  14. py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_nat.py +53 -0
  15. py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_sort_values.py +315 -0
  16. py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_value_counts.py +103 -0
  17. py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/__init__.py +0 -0
  18. py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_astype.py +254 -0
  19. py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_constructors.py +535 -0
  20. py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_equals.py +36 -0
  21. py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_formats.py +119 -0
  22. py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_indexing.py +674 -0
  23. py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_interval.py +918 -0
  24. py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_interval_range.py +369 -0
  25. py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_interval_tree.py +208 -0
  26. py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_join.py +44 -0
  27. py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_pickle.py +13 -0
  28. py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_setops.py +208 -0
  29. py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/__init__.py +0 -0
  30. py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/conftest.py +27 -0
  31. py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_analytics.py +263 -0
  32. py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_astype.py +30 -0
  33. py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_constructors.py +860 -0
  34. py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_conversion.py +201 -0
  35. py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_copy.py +96 -0
  36. py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_drop.py +190 -0
  37. py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_equivalence.py +284 -0
  38. py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_formats.py +249 -0
  39. py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_integrity.py +289 -0
  40. py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_join.py +268 -0
  41. py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_lexsort.py +46 -0
  42. py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_monotonic.py +188 -0
  43. py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_reindex.py +174 -0
  44. py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_reshape.py +224 -0
  45. py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_setops.py +772 -0
  46. py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_sorting.py +349 -0
  47. py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_take.py +78 -0
  48. py311/lib/python3.11/site-packages/pandas/tests/indexes/numeric/__init__.py +0 -0
  49. py311/lib/python3.11/site-packages/pandas/tests/indexes/numeric/test_astype.py +95 -0
  50. py311/lib/python3.11/site-packages/pandas/tests/indexes/numeric/test_indexing.py +611 -0
py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/__init__.py ADDED
File without changes
py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_constructors.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ import pandas as pd
5
+ from pandas import (
6
+ Index,
7
+ MultiIndex,
8
+ Series,
9
+ )
10
+ import pandas._testing as tm
11
+
12
+
13
+ class TestIndexConstructor:
14
+ # Tests for the Index constructor, specifically for cases that do
15
+ # not return a subclass
16
+
17
+ @pytest.mark.parametrize("value", [1, np.int64(1)])
18
+ def test_constructor_corner(self, value):
19
+ # corner case
20
+ msg = (
21
+ r"Index\(\.\.\.\) must be called with a collection of some "
22
+ f"kind, {value} was passed"
23
+ )
24
+ with pytest.raises(TypeError, match=msg):
25
+ Index(value)
26
+
27
+ @pytest.mark.parametrize("index_vals", [[("A", 1), "B"], ["B", ("A", 1)]])
28
+ def test_construction_list_mixed_tuples(self, index_vals):
29
+ # see gh-10697: if we are constructing from a mixed list of tuples,
30
+ # make sure that we are independent of the sorting order.
31
+ index = Index(index_vals)
32
+ assert isinstance(index, Index)
33
+ assert not isinstance(index, MultiIndex)
34
+
35
+ def test_constructor_cast(self):
36
+ msg = "could not convert string to float"
37
+ with pytest.raises(ValueError, match=msg):
38
+ Index(["a", "b", "c"], dtype=float)
39
+
40
+ @pytest.mark.parametrize("tuple_list", [[()], [(), ()]])
41
+ def test_construct_empty_tuples(self, tuple_list):
42
+ # GH #45608
43
+ result = Index(tuple_list)
44
+ expected = MultiIndex.from_tuples(tuple_list)
45
+
46
+ tm.assert_index_equal(result, expected)
47
+
48
+ def test_index_string_inference(self):
49
+ # GH#54430
50
+ expected = Index(["a", "b"], dtype=pd.StringDtype(na_value=np.nan))
51
+ with pd.option_context("future.infer_string", True):
52
+ ser = Index(["a", "b"])
53
+ tm.assert_index_equal(ser, expected)
54
+
55
+ expected = Index(["a", 1], dtype="object")
56
+ with pd.option_context("future.infer_string", True):
57
+ ser = Index(["a", 1])
58
+ tm.assert_index_equal(ser, expected)
59
+
60
+ def test_inference_on_pandas_objects(self):
61
+ # GH#56012
62
+ idx = Index([pd.Timestamp("2019-12-31")], dtype=object)
63
+ with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
64
+ result = Index(idx)
65
+ assert result.dtype != np.object_
66
+
67
+ ser = Series([pd.Timestamp("2019-12-31")], dtype=object)
68
+
69
+ with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
70
+ result = Index(ser)
71
+ assert result.dtype != np.object_
72
+
73
+ def test_constructor_not_read_only(self):
74
+ # GH#57130
75
+ ser = Series([1, 2], dtype=object)
76
+ with pd.option_context("mode.copy_on_write", True):
77
+ idx = Index(ser)
78
+ assert idx._values.flags.writeable
py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_formats.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ from pandas._config import using_string_dtype
5
+ import pandas._config.config as cf
6
+
7
+ from pandas import Index
8
+ import pandas._testing as tm
9
+
10
+
11
+ class TestIndexRendering:
12
+ def test_repr_is_valid_construction_code(self):
13
+ # for the case of Index, where the repr is traditional rather than
14
+ # stylized
15
+ idx = Index(["a", "b"])
16
+ res = eval(repr(idx))
17
+ tm.assert_index_equal(res, idx)
18
+
19
+ @pytest.mark.xfail(using_string_dtype(), reason="repr different")
20
+ @pytest.mark.parametrize(
21
+ "index,expected",
22
+ [
23
+ # ASCII
24
+ # short
25
+ (
26
+ Index(["a", "bb", "ccc"]),
27
+ """Index(['a', 'bb', 'ccc'], dtype='object')""",
28
+ ),
29
+ # multiple lines
30
+ (
31
+ Index(["a", "bb", "ccc"] * 10),
32
+ "Index(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', "
33
+ "'bb', 'ccc', 'a', 'bb', 'ccc',\n"
34
+ " 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', "
35
+ "'bb', 'ccc', 'a', 'bb', 'ccc',\n"
36
+ " 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],\n"
37
+ " dtype='object')",
38
+ ),
39
+ # truncated
40
+ (
41
+ Index(["a", "bb", "ccc"] * 100),
42
+ "Index(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',\n"
43
+ " ...\n"
44
+ " 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],\n"
45
+ " dtype='object', length=300)",
46
+ ),
47
+ # Non-ASCII
48
+ # short
49
+ (
50
+ Index(["あ", "いい", "ううう"]),
51
+ """Index(['あ', 'いい', 'ううう'], dtype='object')""",
52
+ ),
53
+ # multiple lines
54
+ (
55
+ Index(["あ", "いい", "ううう"] * 10),
56
+ (
57
+ "Index(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', "
58
+ "'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',\n"
59
+ " 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', "
60
+ "'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',\n"
61
+ " 'あ', 'いい', 'ううう', 'あ', 'いい', "
62
+ "'ううう'],\n"
63
+ " dtype='object')"
64
+ ),
65
+ ),
66
+ # truncated
67
+ (
68
+ Index(["あ", "いい", "ううう"] * 100),
69
+ (
70
+ "Index(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', "
71
+ "'あ', 'いい', 'ううう', 'あ',\n"
72
+ " ...\n"
73
+ " 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', "
74
+ "'ううう', 'あ', 'いい', 'ううう'],\n"
75
+ " dtype='object', length=300)"
76
+ ),
77
+ ),
78
+ ],
79
+ )
80
+ def test_string_index_repr(self, index, expected):
81
+ result = repr(index)
82
+ assert result == expected
83
+
84
+ @pytest.mark.xfail(using_string_dtype(), reason="repr different")
85
+ @pytest.mark.parametrize(
86
+ "index,expected",
87
+ [
88
+ # short
89
+ (
90
+ Index(["あ", "いい", "ううう"]),
91
+ ("Index(['あ', 'いい', 'ううう'], dtype='object')"),
92
+ ),
93
+ # multiple lines
94
+ (
95
+ Index(["あ", "いい", "ううう"] * 10),
96
+ (
97
+ "Index(['あ', 'いい', 'ううう', 'あ', 'いい', "
98
+ "'ううう', 'あ', 'いい', 'ううう',\n"
99
+ " 'あ', 'いい', 'ううう', 'あ', 'いい', "
100
+ "'ううう', 'あ', 'いい', 'ううう',\n"
101
+ " 'あ', 'いい', 'ううう', 'あ', 'いい', "
102
+ "'ううう', 'あ', 'いい', 'ううう',\n"
103
+ " 'あ', 'いい', 'ううう'],\n"
104
+ " dtype='object')"
105
+ ""
106
+ ),
107
+ ),
108
+ # truncated
109
+ (
110
+ Index(["あ", "いい", "ううう"] * 100),
111
+ (
112
+ "Index(['あ', 'いい', 'ううう', 'あ', 'いい', "
113
+ "'ううう', 'あ', 'いい', 'ううう',\n"
114
+ " 'あ',\n"
115
+ " ...\n"
116
+ " 'ううう', 'あ', 'いい', 'ううう', 'あ', "
117
+ "'いい', 'ううう', 'あ', 'いい',\n"
118
+ " 'ううう'],\n"
119
+ " dtype='object', length=300)"
120
+ ),
121
+ ),
122
+ ],
123
+ )
124
+ def test_string_index_repr_with_unicode_option(self, index, expected):
125
+ # Enable Unicode option -----------------------------------------
126
+ with cf.option_context("display.unicode.east_asian_width", True):
127
+ result = repr(index)
128
+ assert result == expected
129
+
130
+ def test_repr_summary(self):
131
+ with cf.option_context("display.max_seq_items", 10):
132
+ result = repr(Index(np.arange(1000)))
133
+ assert len(result) < 200
134
+ assert "..." in result
135
+
136
+ def test_summary_bug(self):
137
+ # GH#3869
138
+ ind = Index(["{other}%s", "~:{range}:0"], name="A")
139
+ result = ind._summary()
140
+ # shouldn't be formatted accidentally.
141
+ assert "~:{range}:0" in result
142
+ assert "{other}%s" in result
143
+
144
+ def test_index_repr_bool_nan(self):
145
+ # GH32146
146
+ arr = Index([True, False, np.nan], dtype=object)
147
+ msg = "Index.format is deprecated"
148
+ with tm.assert_produces_warning(FutureWarning, match=msg):
149
+ exp1 = arr.format()
150
+ out1 = ["True", "False", "NaN"]
151
+ assert out1 == exp1
152
+
153
+ exp2 = repr(arr)
154
+ out2 = "Index([True, False, nan], dtype='object')"
155
+ assert out2 == exp2
156
+
157
+ def test_format_different_scalar_lengths(self):
158
+ # GH#35439
159
+ idx = Index(["aaaaaaaaa", "b"])
160
+ expected = ["aaaaaaaaa", "b"]
161
+ msg = r"Index\.format is deprecated"
162
+ with tm.assert_produces_warning(FutureWarning, match=msg):
163
+ assert idx.format() == expected
py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_indexing.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ from pandas._libs import index as libindex
5
+
6
+ import pandas as pd
7
+ from pandas import (
8
+ Index,
9
+ NaT,
10
+ )
11
+ import pandas._testing as tm
12
+
13
+
14
+ class TestGetSliceBounds:
15
+ @pytest.mark.parametrize("side, expected", [("left", 4), ("right", 5)])
16
+ def test_get_slice_bounds_within(self, side, expected):
17
+ index = Index(list("abcdef"))
18
+ result = index.get_slice_bound("e", side=side)
19
+ assert result == expected
20
+
21
+ @pytest.mark.parametrize("side", ["left", "right"])
22
+ @pytest.mark.parametrize(
23
+ "data, bound, expected", [(list("abcdef"), "x", 6), (list("bcdefg"), "a", 0)]
24
+ )
25
+ def test_get_slice_bounds_outside(self, side, expected, data, bound):
26
+ index = Index(data)
27
+ result = index.get_slice_bound(bound, side=side)
28
+ assert result == expected
29
+
30
+ def test_get_slice_bounds_invalid_side(self):
31
+ with pytest.raises(ValueError, match="Invalid value for side kwarg"):
32
+ Index([]).get_slice_bound("a", side="middle")
33
+
34
+
35
+ class TestGetIndexerNonUnique:
36
+ def test_get_indexer_non_unique_dtype_mismatch(self):
37
+ # GH#25459
38
+ indexes, missing = Index(["A", "B"]).get_indexer_non_unique(Index([0]))
39
+ tm.assert_numpy_array_equal(np.array([-1], dtype=np.intp), indexes)
40
+ tm.assert_numpy_array_equal(np.array([0], dtype=np.intp), missing)
41
+
42
+ @pytest.mark.parametrize(
43
+ "idx_values,idx_non_unique",
44
+ [
45
+ ([np.nan, 100, 200, 100], [np.nan, 100]),
46
+ ([np.nan, 100.0, 200.0, 100.0], [np.nan, 100.0]),
47
+ ],
48
+ )
49
+ def test_get_indexer_non_unique_int_index(self, idx_values, idx_non_unique):
50
+ indexes, missing = Index(idx_values).get_indexer_non_unique(Index([np.nan]))
51
+ tm.assert_numpy_array_equal(np.array([0], dtype=np.intp), indexes)
52
+ tm.assert_numpy_array_equal(np.array([], dtype=np.intp), missing)
53
+
54
+ indexes, missing = Index(idx_values).get_indexer_non_unique(
55
+ Index(idx_non_unique)
56
+ )
57
+ tm.assert_numpy_array_equal(np.array([0, 1, 3], dtype=np.intp), indexes)
58
+ tm.assert_numpy_array_equal(np.array([], dtype=np.intp), missing)
59
+
60
+
61
+ class TestGetLoc:
62
+ @pytest.mark.slow # to_flat_index takes a while
63
+ def test_get_loc_tuple_monotonic_above_size_cutoff(self, monkeypatch):
64
+ # Go through the libindex path for which using
65
+ # _bin_search vs ndarray.searchsorted makes a difference
66
+
67
+ with monkeypatch.context():
68
+ monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 100)
69
+ lev = list("ABCD")
70
+ dti = pd.date_range("2016-01-01", periods=10)
71
+
72
+ mi = pd.MultiIndex.from_product([lev, range(5), dti])
73
+ oidx = mi.to_flat_index()
74
+
75
+ loc = len(oidx) // 2
76
+ tup = oidx[loc]
77
+
78
+ res = oidx.get_loc(tup)
79
+ assert res == loc
80
+
81
+ def test_get_loc_nan_object_dtype_nonmonotonic_nonunique(self):
82
+ # case that goes through _maybe_get_bool_indexer
83
+ idx = Index(["foo", np.nan, None, "foo", 1.0, None], dtype=object)
84
+
85
+ # we dont raise KeyError on nan
86
+ res = idx.get_loc(np.nan)
87
+ assert res == 1
88
+
89
+ # we only match on None, not on np.nan
90
+ res = idx.get_loc(None)
91
+ expected = np.array([False, False, True, False, False, True])
92
+ tm.assert_numpy_array_equal(res, expected)
93
+
94
+ # we don't match at all on mismatched NA
95
+ with pytest.raises(KeyError, match="NaT"):
96
+ idx.get_loc(NaT)
97
+
98
+
99
+ def test_getitem_boolean_ea_indexer():
100
+ # GH#45806
101
+ ser = pd.Series([True, False, pd.NA], dtype="boolean")
102
+ result = ser.index[ser]
103
+ expected = Index([0])
104
+ tm.assert_index_equal(result, expected)
py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_pickle.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pandas import Index
2
+ import pandas._testing as tm
3
+
4
+
5
+ def test_pickle_preserves_object_dtype():
6
+ # GH#43188, GH#43155 don't infer numeric dtype
7
+ index = Index([1, 2, 3], dtype=object)
8
+
9
+ result = tm.round_trip_pickle(index)
10
+ assert result.dtype == object
11
+ tm.assert_index_equal(index, result)
py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_reshape.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Tests for ndarray-like method on the base Index class
3
+ """
4
+ import numpy as np
5
+ import pytest
6
+
7
+ import pandas as pd
8
+ from pandas import Index
9
+ import pandas._testing as tm
10
+
11
+
12
+ class TestReshape:
13
+ def test_repeat(self):
14
+ repeats = 2
15
+ index = Index([1, 2, 3])
16
+ expected = Index([1, 1, 2, 2, 3, 3])
17
+
18
+ result = index.repeat(repeats)
19
+ tm.assert_index_equal(result, expected)
20
+
21
+ def test_insert(self):
22
+ # GH 7256
23
+ # validate neg/pos inserts
24
+ result = Index(["b", "c", "d"])
25
+
26
+ # test 0th element
27
+ tm.assert_index_equal(Index(["a", "b", "c", "d"]), result.insert(0, "a"))
28
+
29
+ # test Nth element that follows Python list behavior
30
+ tm.assert_index_equal(Index(["b", "c", "e", "d"]), result.insert(-1, "e"))
31
+
32
+ # test loc +/- neq (0, -1)
33
+ tm.assert_index_equal(result.insert(1, "z"), result.insert(-2, "z"))
34
+
35
+ # test empty
36
+ null_index = Index([])
37
+ tm.assert_index_equal(Index(["a"]), null_index.insert(0, "a"))
38
+
39
+ def test_insert_missing(self, request, nulls_fixture, using_infer_string):
40
+ if using_infer_string and nulls_fixture is pd.NA:
41
+ request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)"))
42
+ # GH#22295
43
+ # test there is no mangling of NA values
44
+ expected = Index(["a", nulls_fixture, "b", "c"], dtype=object)
45
+ result = Index(list("abc"), dtype=object).insert(
46
+ 1, Index([nulls_fixture], dtype=object)
47
+ )
48
+ tm.assert_index_equal(result, expected)
49
+
50
+ @pytest.mark.parametrize(
51
+ "val", [(1, 2), np.datetime64("2019-12-31"), np.timedelta64(1, "D")]
52
+ )
53
+ @pytest.mark.parametrize("loc", [-1, 2])
54
+ def test_insert_datetime_into_object(self, loc, val):
55
+ # GH#44509
56
+ idx = Index(["1", "2", "3"])
57
+ result = idx.insert(loc, val)
58
+ expected = Index(["1", "2", val, "3"])
59
+ tm.assert_index_equal(result, expected)
60
+ assert type(expected[2]) is type(val)
61
+
62
+ def test_insert_none_into_string_numpy(self, string_dtype_no_object):
63
+ # GH#55365
64
+ index = Index(["a", "b", "c"], dtype=string_dtype_no_object)
65
+ result = index.insert(-1, None)
66
+ expected = Index(["a", "b", None, "c"], dtype=string_dtype_no_object)
67
+ tm.assert_index_equal(result, expected)
68
+
69
+ @pytest.mark.parametrize(
70
+ "pos,expected",
71
+ [
72
+ (0, Index(["b", "c", "d"], name="index")),
73
+ (-1, Index(["a", "b", "c"], name="index")),
74
+ ],
75
+ )
76
+ def test_delete(self, pos, expected):
77
+ index = Index(["a", "b", "c", "d"], name="index")
78
+ result = index.delete(pos)
79
+ tm.assert_index_equal(result, expected)
80
+ assert result.name == expected.name
81
+
82
+ def test_delete_raises(self):
83
+ index = Index(["a", "b", "c", "d"], name="index")
84
+ msg = "index 5 is out of bounds for axis 0 with size 4"
85
+ with pytest.raises(IndexError, match=msg):
86
+ index.delete(5)
87
+
88
+ def test_append_multiple(self):
89
+ index = Index(["a", "b", "c", "d", "e", "f"])
90
+
91
+ foos = [index[:2], index[2:4], index[4:]]
92
+ result = foos[0].append(foos[1:])
93
+ tm.assert_index_equal(result, index)
94
+
95
+ # empty
96
+ result = index.append([])
97
+ tm.assert_index_equal(result, index)
py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_setops.py ADDED
@@ -0,0 +1,266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime
2
+
3
+ import numpy as np
4
+ import pytest
5
+
6
+ import pandas as pd
7
+ from pandas import (
8
+ Index,
9
+ Series,
10
+ )
11
+ import pandas._testing as tm
12
+ from pandas.core.algorithms import safe_sort
13
+
14
+
15
+ def equal_contents(arr1, arr2) -> bool:
16
+ """
17
+ Checks if the set of unique elements of arr1 and arr2 are equivalent.
18
+ """
19
+ return frozenset(arr1) == frozenset(arr2)
20
+
21
+
22
+ class TestIndexSetOps:
23
+ @pytest.mark.parametrize(
24
+ "method", ["union", "intersection", "difference", "symmetric_difference"]
25
+ )
26
+ def test_setops_sort_validation(self, method):
27
+ idx1 = Index(["a", "b"])
28
+ idx2 = Index(["b", "c"])
29
+
30
+ with pytest.raises(ValueError, match="The 'sort' keyword only takes"):
31
+ getattr(idx1, method)(idx2, sort=2)
32
+
33
+ # sort=True is supported as of GH#??
34
+ getattr(idx1, method)(idx2, sort=True)
35
+
36
+ def test_setops_preserve_object_dtype(self):
37
+ idx = Index([1, 2, 3], dtype=object)
38
+ result = idx.intersection(idx[1:])
39
+ expected = idx[1:]
40
+ tm.assert_index_equal(result, expected)
41
+
42
+ # if other is not monotonic increasing, intersection goes through
43
+ # a different route
44
+ result = idx.intersection(idx[1:][::-1])
45
+ tm.assert_index_equal(result, expected)
46
+
47
+ result = idx._union(idx[1:], sort=None)
48
+ expected = idx
49
+ tm.assert_numpy_array_equal(result, expected.values)
50
+
51
+ result = idx.union(idx[1:], sort=None)
52
+ tm.assert_index_equal(result, expected)
53
+
54
+ # if other is not monotonic increasing, _union goes through
55
+ # a different route
56
+ result = idx._union(idx[1:][::-1], sort=None)
57
+ tm.assert_numpy_array_equal(result, expected.values)
58
+
59
+ result = idx.union(idx[1:][::-1], sort=None)
60
+ tm.assert_index_equal(result, expected)
61
+
62
+ def test_union_base(self):
63
+ index = Index([0, "a", 1, "b", 2, "c"])
64
+ first = index[3:]
65
+ second = index[:5]
66
+
67
+ result = first.union(second)
68
+
69
+ expected = Index([0, 1, 2, "a", "b", "c"])
70
+ tm.assert_index_equal(result, expected)
71
+
72
+ @pytest.mark.parametrize("klass", [np.array, Series, list])
73
+ def test_union_different_type_base(self, klass):
74
+ # GH 10149
75
+ index = Index([0, "a", 1, "b", 2, "c"])
76
+ first = index[3:]
77
+ second = index[:5]
78
+
79
+ result = first.union(klass(second.values))
80
+
81
+ assert equal_contents(result, index)
82
+
83
+ def test_union_sort_other_incomparable(self):
84
+ # https://github.com/pandas-dev/pandas/issues/24959
85
+ idx = Index([1, pd.Timestamp("2000")])
86
+ # default (sort=None)
87
+ with tm.assert_produces_warning(RuntimeWarning):
88
+ result = idx.union(idx[:1])
89
+
90
+ tm.assert_index_equal(result, idx)
91
+
92
+ # sort=None
93
+ with tm.assert_produces_warning(RuntimeWarning):
94
+ result = idx.union(idx[:1], sort=None)
95
+ tm.assert_index_equal(result, idx)
96
+
97
+ # sort=False
98
+ result = idx.union(idx[:1], sort=False)
99
+ tm.assert_index_equal(result, idx)
100
+
101
+ def test_union_sort_other_incomparable_true(self):
102
+ idx = Index([1, pd.Timestamp("2000")])
103
+ with pytest.raises(TypeError, match=".*"):
104
+ idx.union(idx[:1], sort=True)
105
+
106
+ def test_intersection_equal_sort_true(self):
107
+ idx = Index(["c", "a", "b"])
108
+ sorted_ = Index(["a", "b", "c"])
109
+ tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_)
110
+
111
+ def test_intersection_base(self, sort):
112
+ # (same results for py2 and py3 but sortedness not tested elsewhere)
113
+ index = Index([0, "a", 1, "b", 2, "c"])
114
+ first = index[:5]
115
+ second = index[:3]
116
+
117
+ expected = Index([0, 1, "a"]) if sort is None else Index([0, "a", 1])
118
+ result = first.intersection(second, sort=sort)
119
+ tm.assert_index_equal(result, expected)
120
+
121
+ @pytest.mark.parametrize("klass", [np.array, Series, list])
122
+ def test_intersection_different_type_base(self, klass, sort):
123
+ # GH 10149
124
+ index = Index([0, "a", 1, "b", 2, "c"])
125
+ first = index[:5]
126
+ second = index[:3]
127
+
128
+ result = first.intersection(klass(second.values), sort=sort)
129
+ assert equal_contents(result, second)
130
+
131
+ def test_intersection_nosort(self):
132
+ result = Index(["c", "b", "a"]).intersection(["b", "a"])
133
+ expected = Index(["b", "a"])
134
+ tm.assert_index_equal(result, expected)
135
+
136
+ def test_intersection_equal_sort(self):
137
+ idx = Index(["c", "a", "b"])
138
+ tm.assert_index_equal(idx.intersection(idx, sort=False), idx)
139
+ tm.assert_index_equal(idx.intersection(idx, sort=None), idx)
140
+
141
+ def test_intersection_str_dates(self, sort):
142
+ dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)]
143
+
144
+ i1 = Index(dt_dates, dtype=object)
145
+ i2 = Index(["aa"], dtype=object)
146
+ result = i2.intersection(i1, sort=sort)
147
+
148
+ assert len(result) == 0
149
+
150
+ @pytest.mark.parametrize(
151
+ "index2,expected_arr",
152
+ [(Index(["B", "D"]), ["B"]), (Index(["B", "D", "A"]), ["A", "B"])],
153
+ )
154
+ def test_intersection_non_monotonic_non_unique(self, index2, expected_arr, sort):
155
+ # non-monotonic non-unique
156
+ index1 = Index(["A", "B", "A", "C"])
157
+ expected = Index(expected_arr)
158
+ result = index1.intersection(index2, sort=sort)
159
+ if sort is None:
160
+ expected = expected.sort_values()
161
+ tm.assert_index_equal(result, expected)
162
+
163
+ def test_difference_base(self, sort):
164
+ # (same results for py2 and py3 but sortedness not tested elsewhere)
165
+ index = Index([0, "a", 1, "b", 2, "c"])
166
+ first = index[:4]
167
+ second = index[3:]
168
+
169
+ result = first.difference(second, sort)
170
+ expected = Index([0, "a", 1])
171
+ if sort is None:
172
+ expected = Index(safe_sort(expected))
173
+ tm.assert_index_equal(result, expected)
174
+
175
+ def test_symmetric_difference(self):
176
+ # (same results for py2 and py3 but sortedness not tested elsewhere)
177
+ index = Index([0, "a", 1, "b", 2, "c"])
178
+ first = index[:4]
179
+ second = index[3:]
180
+
181
+ result = first.symmetric_difference(second)
182
+ expected = Index([0, 1, 2, "a", "c"])
183
+ tm.assert_index_equal(result, expected)
184
+
185
+ @pytest.mark.parametrize(
186
+ "method,expected,sort",
187
+ [
188
+ (
189
+ "intersection",
190
+ np.array(
191
+ [(1, "A"), (2, "A"), (1, "B"), (2, "B")],
192
+ dtype=[("num", int), ("let", "S1")],
193
+ ),
194
+ False,
195
+ ),
196
+ (
197
+ "intersection",
198
+ np.array(
199
+ [(1, "A"), (1, "B"), (2, "A"), (2, "B")],
200
+ dtype=[("num", int), ("let", "S1")],
201
+ ),
202
+ None,
203
+ ),
204
+ (
205
+ "union",
206
+ np.array(
207
+ [(1, "A"), (1, "B"), (1, "C"), (2, "A"), (2, "B"), (2, "C")],
208
+ dtype=[("num", int), ("let", "S1")],
209
+ ),
210
+ None,
211
+ ),
212
+ ],
213
+ )
214
+ def test_tuple_union_bug(self, method, expected, sort):
215
+ index1 = Index(
216
+ np.array(
217
+ [(1, "A"), (2, "A"), (1, "B"), (2, "B")],
218
+ dtype=[("num", int), ("let", "S1")],
219
+ )
220
+ )
221
+ index2 = Index(
222
+ np.array(
223
+ [(1, "A"), (2, "A"), (1, "B"), (2, "B"), (1, "C"), (2, "C")],
224
+ dtype=[("num", int), ("let", "S1")],
225
+ )
226
+ )
227
+
228
+ result = getattr(index1, method)(index2, sort=sort)
229
+ assert result.ndim == 1
230
+
231
+ expected = Index(expected)
232
+ tm.assert_index_equal(result, expected)
233
+
234
+ @pytest.mark.parametrize("first_list", [["b", "a"], []])
235
+ @pytest.mark.parametrize("second_list", [["a", "b"], []])
236
+ @pytest.mark.parametrize(
237
+ "first_name, second_name, expected_name",
238
+ [("A", "B", None), (None, "B", None), ("A", None, None)],
239
+ )
240
+ def test_union_name_preservation(
241
+ self, first_list, second_list, first_name, second_name, expected_name, sort
242
+ ):
243
+ first = Index(first_list, name=first_name)
244
+ second = Index(second_list, name=second_name)
245
+ union = first.union(second, sort=sort)
246
+
247
+ vals = set(first_list).union(second_list)
248
+
249
+ if sort is None and len(first_list) > 0 and len(second_list) > 0:
250
+ expected = Index(sorted(vals), name=expected_name)
251
+ tm.assert_index_equal(union, expected)
252
+ else:
253
+ expected = Index(vals, name=expected_name)
254
+ tm.assert_index_equal(union.sort_values(), expected.sort_values())
255
+
256
+ @pytest.mark.parametrize(
257
+ "diff_type, expected",
258
+ [["difference", [1, "B"]], ["symmetric_difference", [1, 2, "B", "C"]]],
259
+ )
260
+ def test_difference_object_type(self, diff_type, expected):
261
+ # GH 13432
262
+ idx1 = Index([0, 1, "A", "B"])
263
+ idx2 = Index([0, 2, "A", "C"])
264
+ result = getattr(idx1, diff_type)(idx2)
265
+ expected = Index(expected)
266
+ tm.assert_index_equal(result, expected)
py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_where.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+ from pandas import Index
4
+ import pandas._testing as tm
5
+
6
+
7
+ class TestWhere:
8
+ def test_where_intlike_str_doesnt_cast_ints(self):
9
+ idx = Index(range(3))
10
+ mask = np.array([True, False, True])
11
+ res = idx.where(mask, "2")
12
+ expected = Index([0, "2", 2])
13
+ tm.assert_index_equal(res, expected)
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/__init__.py ADDED
File without changes
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_drop_duplicates.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ from pandas import (
5
+ PeriodIndex,
6
+ Series,
7
+ date_range,
8
+ period_range,
9
+ timedelta_range,
10
+ )
11
+ import pandas._testing as tm
12
+
13
+
14
+ class DropDuplicates:
15
+ def test_drop_duplicates_metadata(self, idx):
16
+ # GH#10115
17
+ result = idx.drop_duplicates()
18
+ tm.assert_index_equal(idx, result)
19
+ assert idx.freq == result.freq
20
+
21
+ idx_dup = idx.append(idx)
22
+ result = idx_dup.drop_duplicates()
23
+
24
+ expected = idx
25
+ if not isinstance(idx, PeriodIndex):
26
+ # freq is reset except for PeriodIndex
27
+ assert idx_dup.freq is None
28
+ assert result.freq is None
29
+ expected = idx._with_freq(None)
30
+ else:
31
+ assert result.freq == expected.freq
32
+
33
+ tm.assert_index_equal(result, expected)
34
+
35
+ @pytest.mark.parametrize(
36
+ "keep, expected, index",
37
+ [
38
+ (
39
+ "first",
40
+ np.concatenate(([False] * 10, [True] * 5)),
41
+ np.arange(0, 10, dtype=np.int64),
42
+ ),
43
+ (
44
+ "last",
45
+ np.concatenate(([True] * 5, [False] * 10)),
46
+ np.arange(5, 15, dtype=np.int64),
47
+ ),
48
+ (
49
+ False,
50
+ np.concatenate(([True] * 5, [False] * 5, [True] * 5)),
51
+ np.arange(5, 10, dtype=np.int64),
52
+ ),
53
+ ],
54
+ )
55
+ def test_drop_duplicates(self, keep, expected, index, idx):
56
+ # to check Index/Series compat
57
+ idx = idx.append(idx[:5])
58
+
59
+ tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected)
60
+ expected = idx[~expected]
61
+
62
+ result = idx.drop_duplicates(keep=keep)
63
+ tm.assert_index_equal(result, expected)
64
+
65
+ result = Series(idx).drop_duplicates(keep=keep)
66
+ expected = Series(expected, index=index)
67
+ tm.assert_series_equal(result, expected)
68
+
69
+
70
+ class TestDropDuplicatesPeriodIndex(DropDuplicates):
71
+ @pytest.fixture(params=["D", "3D", "h", "2h", "min", "2min", "s", "3s"])
72
+ def freq(self, request):
73
+ return request.param
74
+
75
+ @pytest.fixture
76
+ def idx(self, freq):
77
+ return period_range("2011-01-01", periods=10, freq=freq, name="idx")
78
+
79
+
80
+ class TestDropDuplicatesDatetimeIndex(DropDuplicates):
81
+ @pytest.fixture
82
+ def idx(self, freq_sample):
83
+ return date_range("2011-01-01", freq=freq_sample, periods=10, name="idx")
84
+
85
+
86
+ class TestDropDuplicatesTimedeltaIndex(DropDuplicates):
87
+ @pytest.fixture
88
+ def idx(self, freq_sample):
89
+ return timedelta_range("1 day", periods=10, freq=freq_sample, name="idx")
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_equals.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Tests shared for DatetimeIndex/TimedeltaIndex/PeriodIndex
3
+ """
4
+ from datetime import (
5
+ datetime,
6
+ timedelta,
7
+ )
8
+
9
+ import numpy as np
10
+ import pytest
11
+
12
+ import pandas as pd
13
+ from pandas import (
14
+ CategoricalIndex,
15
+ DatetimeIndex,
16
+ Index,
17
+ PeriodIndex,
18
+ TimedeltaIndex,
19
+ date_range,
20
+ period_range,
21
+ timedelta_range,
22
+ )
23
+ import pandas._testing as tm
24
+
25
+
26
+ class EqualsTests:
27
+ def test_not_equals_numeric(self, index):
28
+ assert not index.equals(Index(index.asi8))
29
+ assert not index.equals(Index(index.asi8.astype("u8")))
30
+ assert not index.equals(Index(index.asi8).astype("f8"))
31
+
32
+ def test_equals(self, index):
33
+ assert index.equals(index)
34
+ assert index.equals(index.astype(object))
35
+ assert index.equals(CategoricalIndex(index))
36
+ assert index.equals(CategoricalIndex(index.astype(object)))
37
+
38
+ def test_not_equals_non_arraylike(self, index):
39
+ assert not index.equals(list(index))
40
+
41
+ def test_not_equals_strings(self, index):
42
+ other = Index([str(x) for x in index], dtype=object)
43
+ assert not index.equals(other)
44
+ assert not index.equals(CategoricalIndex(other))
45
+
46
+ def test_not_equals_misc_strs(self, index):
47
+ other = Index(list("abc"))
48
+ assert not index.equals(other)
49
+
50
+
51
+ class TestPeriodIndexEquals(EqualsTests):
52
+ @pytest.fixture
53
+ def index(self):
54
+ return period_range("2013-01-01", periods=5, freq="D")
55
+
56
+ # TODO: de-duplicate with other test_equals2 methods
57
+ @pytest.mark.parametrize("freq", ["D", "M"])
58
+ def test_equals2(self, freq):
59
+ # GH#13107
60
+ idx = PeriodIndex(["2011-01-01", "2011-01-02", "NaT"], freq=freq)
61
+ assert idx.equals(idx)
62
+ assert idx.equals(idx.copy())
63
+ assert idx.equals(idx.astype(object))
64
+ assert idx.astype(object).equals(idx)
65
+ assert idx.astype(object).equals(idx.astype(object))
66
+ assert not idx.equals(list(idx))
67
+ assert not idx.equals(pd.Series(idx))
68
+
69
+ idx2 = PeriodIndex(["2011-01-01", "2011-01-02", "NaT"], freq="h")
70
+ assert not idx.equals(idx2)
71
+ assert not idx.equals(idx2.copy())
72
+ assert not idx.equals(idx2.astype(object))
73
+ assert not idx.astype(object).equals(idx2)
74
+ assert not idx.equals(list(idx2))
75
+ assert not idx.equals(pd.Series(idx2))
76
+
77
+ # same internal, different tz
78
+ idx3 = PeriodIndex._simple_new(
79
+ idx._values._simple_new(idx._values.asi8, dtype=pd.PeriodDtype("h"))
80
+ )
81
+ tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
82
+ assert not idx.equals(idx3)
83
+ assert not idx.equals(idx3.copy())
84
+ assert not idx.equals(idx3.astype(object))
85
+ assert not idx.astype(object).equals(idx3)
86
+ assert not idx.equals(list(idx3))
87
+ assert not idx.equals(pd.Series(idx3))
88
+
89
+
90
+ class TestDatetimeIndexEquals(EqualsTests):
91
+ @pytest.fixture
92
+ def index(self):
93
+ return date_range("2013-01-01", periods=5)
94
+
95
+ def test_equals2(self):
96
+ # GH#13107
97
+ idx = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"])
98
+ assert idx.equals(idx)
99
+ assert idx.equals(idx.copy())
100
+ assert idx.equals(idx.astype(object))
101
+ assert idx.astype(object).equals(idx)
102
+ assert idx.astype(object).equals(idx.astype(object))
103
+ assert not idx.equals(list(idx))
104
+ assert not idx.equals(pd.Series(idx))
105
+
106
+ idx2 = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"], tz="US/Pacific")
107
+ assert not idx.equals(idx2)
108
+ assert not idx.equals(idx2.copy())
109
+ assert not idx.equals(idx2.astype(object))
110
+ assert not idx.astype(object).equals(idx2)
111
+ assert not idx.equals(list(idx2))
112
+ assert not idx.equals(pd.Series(idx2))
113
+
114
+ # same internal, different tz
115
+ idx3 = DatetimeIndex(idx.asi8, tz="US/Pacific")
116
+ tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
117
+ assert not idx.equals(idx3)
118
+ assert not idx.equals(idx3.copy())
119
+ assert not idx.equals(idx3.astype(object))
120
+ assert not idx.astype(object).equals(idx3)
121
+ assert not idx.equals(list(idx3))
122
+ assert not idx.equals(pd.Series(idx3))
123
+
124
+ # check that we do not raise when comparing with OutOfBounds objects
125
+ oob = Index([datetime(2500, 1, 1)] * 3, dtype=object)
126
+ assert not idx.equals(oob)
127
+ assert not idx2.equals(oob)
128
+ assert not idx3.equals(oob)
129
+
130
+ # check that we do not raise when comparing with OutOfBounds dt64
131
+ oob2 = oob.map(np.datetime64)
132
+ assert not idx.equals(oob2)
133
+ assert not idx2.equals(oob2)
134
+ assert not idx3.equals(oob2)
135
+
136
+ @pytest.mark.parametrize("freq", ["B", "C"])
137
+ def test_not_equals_bday(self, freq):
138
+ rng = date_range("2009-01-01", "2010-01-01", freq=freq)
139
+ assert not rng.equals(list(rng))
140
+
141
+
142
+ class TestTimedeltaIndexEquals(EqualsTests):
143
+ @pytest.fixture
144
+ def index(self):
145
+ return timedelta_range("1 day", periods=10)
146
+
147
+ def test_equals2(self):
148
+ # GH#13107
149
+ idx = TimedeltaIndex(["1 days", "2 days", "NaT"])
150
+ assert idx.equals(idx)
151
+ assert idx.equals(idx.copy())
152
+ assert idx.equals(idx.astype(object))
153
+ assert idx.astype(object).equals(idx)
154
+ assert idx.astype(object).equals(idx.astype(object))
155
+ assert not idx.equals(list(idx))
156
+ assert not idx.equals(pd.Series(idx))
157
+
158
+ idx2 = TimedeltaIndex(["2 days", "1 days", "NaT"])
159
+ assert not idx.equals(idx2)
160
+ assert not idx.equals(idx2.copy())
161
+ assert not idx.equals(idx2.astype(object))
162
+ assert not idx.astype(object).equals(idx2)
163
+ assert not idx.astype(object).equals(idx2.astype(object))
164
+ assert not idx.equals(list(idx2))
165
+ assert not idx.equals(pd.Series(idx2))
166
+
167
+ # Check that we dont raise OverflowError on comparisons outside the
168
+ # implementation range GH#28532
169
+ oob = Index([timedelta(days=10**6)] * 3, dtype=object)
170
+ assert not idx.equals(oob)
171
+ assert not idx2.equals(oob)
172
+
173
+ oob2 = Index([np.timedelta64(x) for x in oob], dtype=object)
174
+ assert (oob == oob2).all()
175
+ assert not idx.equals(oob2)
176
+ assert not idx2.equals(oob2)
177
+
178
+ oob3 = oob.map(np.timedelta64)
179
+ assert (oob3 == oob).all()
180
+ assert not idx.equals(oob3)
181
+ assert not idx2.equals(oob3)
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_indexing.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ import pandas as pd
5
+ from pandas import (
6
+ DatetimeIndex,
7
+ Index,
8
+ )
9
+ import pandas._testing as tm
10
+
11
+ dtlike_dtypes = [
12
+ np.dtype("timedelta64[ns]"),
13
+ np.dtype("datetime64[ns]"),
14
+ pd.DatetimeTZDtype("ns", "Asia/Tokyo"),
15
+ pd.PeriodDtype("ns"),
16
+ ]
17
+
18
+
19
+ @pytest.mark.parametrize("ldtype", dtlike_dtypes)
20
+ @pytest.mark.parametrize("rdtype", dtlike_dtypes)
21
+ def test_get_indexer_non_unique_wrong_dtype(ldtype, rdtype):
22
+ vals = np.tile(3600 * 10**9 * np.arange(3, dtype=np.int64), 2)
23
+
24
+ def construct(dtype):
25
+ if dtype is dtlike_dtypes[-1]:
26
+ # PeriodArray will try to cast ints to strings
27
+ return DatetimeIndex(vals).astype(dtype)
28
+ return Index(vals, dtype=dtype)
29
+
30
+ left = construct(ldtype)
31
+ right = construct(rdtype)
32
+
33
+ result = left.get_indexer_non_unique(right)
34
+
35
+ if ldtype is rdtype:
36
+ ex1 = np.array([0, 3, 1, 4, 2, 5] * 2, dtype=np.intp)
37
+ ex2 = np.array([], dtype=np.intp)
38
+ tm.assert_numpy_array_equal(result[0], ex1)
39
+ tm.assert_numpy_array_equal(result[1], ex2)
40
+
41
+ else:
42
+ no_matches = np.array([-1] * 6, dtype=np.intp)
43
+ missing = np.arange(6, dtype=np.intp)
44
+ tm.assert_numpy_array_equal(result[0], no_matches)
45
+ tm.assert_numpy_array_equal(result[1], missing)
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_is_monotonic.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pandas import (
2
+ Index,
3
+ NaT,
4
+ date_range,
5
+ )
6
+
7
+
8
+ def test_is_monotonic_with_nat():
9
+ # GH#31437
10
+ # PeriodIndex.is_monotonic_increasing should behave analogously to DatetimeIndex,
11
+ # in particular never be monotonic when we have NaT
12
+ dti = date_range("2016-01-01", periods=3)
13
+ pi = dti.to_period("D")
14
+ tdi = Index(dti.view("timedelta64[ns]"))
15
+
16
+ for obj in [pi, pi._engine, dti, dti._engine, tdi, tdi._engine]:
17
+ if isinstance(obj, Index):
18
+ # i.e. not Engines
19
+ assert obj.is_monotonic_increasing
20
+ assert obj.is_monotonic_increasing
21
+ assert not obj.is_monotonic_decreasing
22
+ assert obj.is_unique
23
+
24
+ dti1 = dti.insert(0, NaT)
25
+ pi1 = dti1.to_period("D")
26
+ tdi1 = Index(dti1.view("timedelta64[ns]"))
27
+
28
+ for obj in [pi1, pi1._engine, dti1, dti1._engine, tdi1, tdi1._engine]:
29
+ if isinstance(obj, Index):
30
+ # i.e. not Engines
31
+ assert not obj.is_monotonic_increasing
32
+ assert not obj.is_monotonic_increasing
33
+ assert not obj.is_monotonic_decreasing
34
+ assert obj.is_unique
35
+
36
+ dti2 = dti.insert(3, NaT)
37
+ pi2 = dti2.to_period("h")
38
+ tdi2 = Index(dti2.view("timedelta64[ns]"))
39
+
40
+ for obj in [pi2, pi2._engine, dti2, dti2._engine, tdi2, tdi2._engine]:
41
+ if isinstance(obj, Index):
42
+ # i.e. not Engines
43
+ assert not obj.is_monotonic_increasing
44
+ assert not obj.is_monotonic_increasing
45
+ assert not obj.is_monotonic_decreasing
46
+ assert obj.is_unique
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_nat.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ from pandas import (
5
+ DatetimeIndex,
6
+ NaT,
7
+ PeriodIndex,
8
+ TimedeltaIndex,
9
+ )
10
+ import pandas._testing as tm
11
+
12
+
13
+ class NATests:
14
+ def test_nat(self, index_without_na):
15
+ empty_index = index_without_na[:0]
16
+
17
+ index_with_na = index_without_na.copy(deep=True)
18
+ index_with_na._data[1] = NaT
19
+
20
+ assert empty_index._na_value is NaT
21
+ assert index_with_na._na_value is NaT
22
+ assert index_without_na._na_value is NaT
23
+
24
+ idx = index_without_na
25
+ assert idx._can_hold_na
26
+
27
+ tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
28
+ assert idx.hasnans is False
29
+
30
+ idx = index_with_na
31
+ assert idx._can_hold_na
32
+
33
+ tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
34
+ assert idx.hasnans is True
35
+
36
+
37
+ class TestDatetimeIndexNA(NATests):
38
+ @pytest.fixture
39
+ def index_without_na(self, tz_naive_fixture):
40
+ tz = tz_naive_fixture
41
+ return DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz)
42
+
43
+
44
+ class TestTimedeltaIndexNA(NATests):
45
+ @pytest.fixture
46
+ def index_without_na(self):
47
+ return TimedeltaIndex(["1 days", "2 days"])
48
+
49
+
50
+ class TestPeriodIndexNA(NATests):
51
+ @pytest.fixture
52
+ def index_without_na(self):
53
+ return PeriodIndex(["2011-01-01", "2011-01-02"], freq="D")
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_sort_values.py ADDED
@@ -0,0 +1,315 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ from pandas import (
5
+ DatetimeIndex,
6
+ Index,
7
+ NaT,
8
+ PeriodIndex,
9
+ TimedeltaIndex,
10
+ timedelta_range,
11
+ )
12
+ import pandas._testing as tm
13
+
14
+
15
+ def check_freq_ascending(ordered, orig, ascending):
16
+ """
17
+ Check the expected freq on a PeriodIndex/DatetimeIndex/TimedeltaIndex
18
+ when the original index is generated (or generate-able) with
19
+ period_range/date_range/timedelta_range.
20
+ """
21
+ if isinstance(ordered, PeriodIndex):
22
+ assert ordered.freq == orig.freq
23
+ elif isinstance(ordered, (DatetimeIndex, TimedeltaIndex)):
24
+ if ascending:
25
+ assert ordered.freq.n == orig.freq.n
26
+ else:
27
+ assert ordered.freq.n == -1 * orig.freq.n
28
+
29
+
30
+ def check_freq_nonmonotonic(ordered, orig):
31
+ """
32
+ Check the expected freq on a PeriodIndex/DatetimeIndex/TimedeltaIndex
33
+ when the original index is _not_ generated (or generate-able) with
34
+ period_range/date_range//timedelta_range.
35
+ """
36
+ if isinstance(ordered, PeriodIndex):
37
+ assert ordered.freq == orig.freq
38
+ elif isinstance(ordered, (DatetimeIndex, TimedeltaIndex)):
39
+ assert ordered.freq is None
40
+
41
+
42
+ class TestSortValues:
43
+ @pytest.fixture(params=[DatetimeIndex, TimedeltaIndex, PeriodIndex])
44
+ def non_monotonic_idx(self, request):
45
+ if request.param is DatetimeIndex:
46
+ return DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"])
47
+ elif request.param is PeriodIndex:
48
+ dti = DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"])
49
+ return dti.to_period("D")
50
+ else:
51
+ return TimedeltaIndex(
52
+ ["1 day 00:00:05", "1 day 00:00:01", "1 day 00:00:02"]
53
+ )
54
+
55
+ def test_argmin_argmax(self, non_monotonic_idx):
56
+ assert non_monotonic_idx.argmin() == 1
57
+ assert non_monotonic_idx.argmax() == 0
58
+
59
+ def test_sort_values(self, non_monotonic_idx):
60
+ idx = non_monotonic_idx
61
+ ordered = idx.sort_values()
62
+ assert ordered.is_monotonic_increasing
63
+ ordered = idx.sort_values(ascending=False)
64
+ assert ordered[::-1].is_monotonic_increasing
65
+
66
+ ordered, dexer = idx.sort_values(return_indexer=True)
67
+ assert ordered.is_monotonic_increasing
68
+ tm.assert_numpy_array_equal(dexer, np.array([1, 2, 0], dtype=np.intp))
69
+
70
+ ordered, dexer = idx.sort_values(return_indexer=True, ascending=False)
71
+ assert ordered[::-1].is_monotonic_increasing
72
+ tm.assert_numpy_array_equal(dexer, np.array([0, 2, 1], dtype=np.intp))
73
+
74
+ def check_sort_values_with_freq(self, idx):
75
+ ordered = idx.sort_values()
76
+ tm.assert_index_equal(ordered, idx)
77
+ check_freq_ascending(ordered, idx, True)
78
+
79
+ ordered = idx.sort_values(ascending=False)
80
+ expected = idx[::-1]
81
+ tm.assert_index_equal(ordered, expected)
82
+ check_freq_ascending(ordered, idx, False)
83
+
84
+ ordered, indexer = idx.sort_values(return_indexer=True)
85
+ tm.assert_index_equal(ordered, idx)
86
+ tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2], dtype=np.intp))
87
+ check_freq_ascending(ordered, idx, True)
88
+
89
+ ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
90
+ expected = idx[::-1]
91
+ tm.assert_index_equal(ordered, expected)
92
+ tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0], dtype=np.intp))
93
+ check_freq_ascending(ordered, idx, False)
94
+
95
+ @pytest.mark.parametrize("freq", ["D", "h"])
96
+ def test_sort_values_with_freq_timedeltaindex(self, freq):
97
+ # GH#10295
98
+ idx = timedelta_range(start=f"1{freq}", periods=3, freq=freq).rename("idx")
99
+
100
+ self.check_sort_values_with_freq(idx)
101
+
102
+ @pytest.mark.parametrize(
103
+ "idx",
104
+ [
105
+ DatetimeIndex(
106
+ ["2011-01-01", "2011-01-02", "2011-01-03"], freq="D", name="idx"
107
+ ),
108
+ DatetimeIndex(
109
+ ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
110
+ freq="h",
111
+ name="tzidx",
112
+ tz="Asia/Tokyo",
113
+ ),
114
+ ],
115
+ )
116
+ def test_sort_values_with_freq_datetimeindex(self, idx):
117
+ self.check_sort_values_with_freq(idx)
118
+
119
+ @pytest.mark.parametrize("freq", ["D", "2D", "4D"])
120
+ def test_sort_values_with_freq_periodindex(self, freq):
121
+ # here with_freq refers to being period_range-like
122
+ idx = PeriodIndex(
123
+ ["2011-01-01", "2011-01-02", "2011-01-03"], freq=freq, name="idx"
124
+ )
125
+ self.check_sort_values_with_freq(idx)
126
+
127
+ @pytest.mark.parametrize(
128
+ "idx",
129
+ [
130
+ PeriodIndex(["2011", "2012", "2013"], name="pidx", freq="Y"),
131
+ Index([2011, 2012, 2013], name="idx"), # for compatibility check
132
+ ],
133
+ )
134
+ def test_sort_values_with_freq_periodindex2(self, idx):
135
+ # here with_freq indicates this is period_range-like
136
+ self.check_sort_values_with_freq(idx)
137
+
138
+ def check_sort_values_without_freq(self, idx, expected):
139
+ ordered = idx.sort_values(na_position="first")
140
+ tm.assert_index_equal(ordered, expected)
141
+ check_freq_nonmonotonic(ordered, idx)
142
+
143
+ if not idx.isna().any():
144
+ ordered = idx.sort_values()
145
+ tm.assert_index_equal(ordered, expected)
146
+ check_freq_nonmonotonic(ordered, idx)
147
+
148
+ ordered = idx.sort_values(ascending=False)
149
+ tm.assert_index_equal(ordered, expected[::-1])
150
+ check_freq_nonmonotonic(ordered, idx)
151
+
152
+ ordered, indexer = idx.sort_values(return_indexer=True, na_position="first")
153
+ tm.assert_index_equal(ordered, expected)
154
+
155
+ exp = np.array([0, 4, 3, 1, 2], dtype=np.intp)
156
+ tm.assert_numpy_array_equal(indexer, exp)
157
+ check_freq_nonmonotonic(ordered, idx)
158
+
159
+ if not idx.isna().any():
160
+ ordered, indexer = idx.sort_values(return_indexer=True)
161
+ tm.assert_index_equal(ordered, expected)
162
+
163
+ exp = np.array([0, 4, 3, 1, 2], dtype=np.intp)
164
+ tm.assert_numpy_array_equal(indexer, exp)
165
+ check_freq_nonmonotonic(ordered, idx)
166
+
167
+ ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
168
+ tm.assert_index_equal(ordered, expected[::-1])
169
+
170
+ exp = np.array([2, 1, 3, 0, 4], dtype=np.intp)
171
+ tm.assert_numpy_array_equal(indexer, exp)
172
+ check_freq_nonmonotonic(ordered, idx)
173
+
174
+ def test_sort_values_without_freq_timedeltaindex(self):
175
+ # GH#10295
176
+
177
+ idx = TimedeltaIndex(
178
+ ["1 hour", "3 hour", "5 hour", "2 hour ", "1 hour"], name="idx1"
179
+ )
180
+ expected = TimedeltaIndex(
181
+ ["1 hour", "1 hour", "2 hour", "3 hour", "5 hour"], name="idx1"
182
+ )
183
+ self.check_sort_values_without_freq(idx, expected)
184
+
185
+ @pytest.mark.parametrize(
186
+ "index_dates,expected_dates",
187
+ [
188
+ (
189
+ ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
190
+ ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
191
+ ),
192
+ (
193
+ ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
194
+ ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
195
+ ),
196
+ (
197
+ [NaT, "2011-01-03", "2011-01-05", "2011-01-02", NaT],
198
+ [NaT, NaT, "2011-01-02", "2011-01-03", "2011-01-05"],
199
+ ),
200
+ ],
201
+ )
202
+ def test_sort_values_without_freq_datetimeindex(
203
+ self, index_dates, expected_dates, tz_naive_fixture
204
+ ):
205
+ tz = tz_naive_fixture
206
+
207
+ # without freq
208
+ idx = DatetimeIndex(index_dates, tz=tz, name="idx")
209
+ expected = DatetimeIndex(expected_dates, tz=tz, name="idx")
210
+
211
+ self.check_sort_values_without_freq(idx, expected)
212
+
213
+ @pytest.mark.parametrize(
214
+ "idx,expected",
215
+ [
216
+ (
217
+ PeriodIndex(
218
+ [
219
+ "2011-01-01",
220
+ "2011-01-03",
221
+ "2011-01-05",
222
+ "2011-01-02",
223
+ "2011-01-01",
224
+ ],
225
+ freq="D",
226
+ name="idx1",
227
+ ),
228
+ PeriodIndex(
229
+ [
230
+ "2011-01-01",
231
+ "2011-01-01",
232
+ "2011-01-02",
233
+ "2011-01-03",
234
+ "2011-01-05",
235
+ ],
236
+ freq="D",
237
+ name="idx1",
238
+ ),
239
+ ),
240
+ (
241
+ PeriodIndex(
242
+ [
243
+ "2011-01-01",
244
+ "2011-01-03",
245
+ "2011-01-05",
246
+ "2011-01-02",
247
+ "2011-01-01",
248
+ ],
249
+ freq="D",
250
+ name="idx2",
251
+ ),
252
+ PeriodIndex(
253
+ [
254
+ "2011-01-01",
255
+ "2011-01-01",
256
+ "2011-01-02",
257
+ "2011-01-03",
258
+ "2011-01-05",
259
+ ],
260
+ freq="D",
261
+ name="idx2",
262
+ ),
263
+ ),
264
+ (
265
+ PeriodIndex(
266
+ [NaT, "2011-01-03", "2011-01-05", "2011-01-02", NaT],
267
+ freq="D",
268
+ name="idx3",
269
+ ),
270
+ PeriodIndex(
271
+ [NaT, NaT, "2011-01-02", "2011-01-03", "2011-01-05"],
272
+ freq="D",
273
+ name="idx3",
274
+ ),
275
+ ),
276
+ (
277
+ PeriodIndex(
278
+ ["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="Y"
279
+ ),
280
+ PeriodIndex(
281
+ ["2011", "2011", "2012", "2013", "2015"], name="pidx", freq="Y"
282
+ ),
283
+ ),
284
+ (
285
+ # For compatibility check
286
+ Index([2011, 2013, 2015, 2012, 2011], name="idx"),
287
+ Index([2011, 2011, 2012, 2013, 2015], name="idx"),
288
+ ),
289
+ ],
290
+ )
291
+ def test_sort_values_without_freq_periodindex(self, idx, expected):
292
+ # here without_freq means not generateable by period_range
293
+ self.check_sort_values_without_freq(idx, expected)
294
+
295
+ def test_sort_values_without_freq_periodindex_nat(self):
296
+ # doesn't quite fit into check_sort_values_without_freq
297
+ idx = PeriodIndex(["2011", "2013", "NaT", "2011"], name="pidx", freq="D")
298
+ expected = PeriodIndex(["NaT", "2011", "2011", "2013"], name="pidx", freq="D")
299
+
300
+ ordered = idx.sort_values(na_position="first")
301
+ tm.assert_index_equal(ordered, expected)
302
+ check_freq_nonmonotonic(ordered, idx)
303
+
304
+ ordered = idx.sort_values(ascending=False)
305
+ tm.assert_index_equal(ordered, expected[::-1])
306
+ check_freq_nonmonotonic(ordered, idx)
307
+
308
+
309
+ def test_order_stability_compat():
310
+ # GH#35922. sort_values is stable both for normal and datetime-like Index
311
+ pidx = PeriodIndex(["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="Y")
312
+ iidx = Index([2011, 2013, 2015, 2012, 2011], name="idx")
313
+ ordered1, indexer1 = pidx.sort_values(return_indexer=True, ascending=False)
314
+ ordered2, indexer2 = iidx.sort_values(return_indexer=True, ascending=False)
315
+ tm.assert_numpy_array_equal(indexer1, indexer2)
py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_value_counts.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+ from pandas import (
4
+ DatetimeIndex,
5
+ NaT,
6
+ PeriodIndex,
7
+ Series,
8
+ TimedeltaIndex,
9
+ date_range,
10
+ period_range,
11
+ timedelta_range,
12
+ )
13
+ import pandas._testing as tm
14
+
15
+
16
+ class TestValueCounts:
17
+ # GH#7735
18
+
19
+ def test_value_counts_unique_datetimeindex(self, tz_naive_fixture):
20
+ tz = tz_naive_fixture
21
+ orig = date_range("2011-01-01 09:00", freq="h", periods=10, tz=tz)
22
+ self._check_value_counts_with_repeats(orig)
23
+
24
+ def test_value_counts_unique_timedeltaindex(self):
25
+ orig = timedelta_range("1 days 09:00:00", freq="h", periods=10)
26
+ self._check_value_counts_with_repeats(orig)
27
+
28
+ def test_value_counts_unique_periodindex(self):
29
+ orig = period_range("2011-01-01 09:00", freq="h", periods=10)
30
+ self._check_value_counts_with_repeats(orig)
31
+
32
+ def _check_value_counts_with_repeats(self, orig):
33
+ # create repeated values, 'n'th element is repeated by n+1 times
34
+ idx = type(orig)(
35
+ np.repeat(orig._values, range(1, len(orig) + 1)), dtype=orig.dtype
36
+ )
37
+
38
+ exp_idx = orig[::-1]
39
+ if not isinstance(exp_idx, PeriodIndex):
40
+ exp_idx = exp_idx._with_freq(None)
41
+ expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64", name="count")
42
+
43
+ for obj in [idx, Series(idx)]:
44
+ tm.assert_series_equal(obj.value_counts(), expected)
45
+
46
+ tm.assert_index_equal(idx.unique(), orig)
47
+
48
+ def test_value_counts_unique_datetimeindex2(self, tz_naive_fixture):
49
+ tz = tz_naive_fixture
50
+ idx = DatetimeIndex(
51
+ [
52
+ "2013-01-01 09:00",
53
+ "2013-01-01 09:00",
54
+ "2013-01-01 09:00",
55
+ "2013-01-01 08:00",
56
+ "2013-01-01 08:00",
57
+ NaT,
58
+ ],
59
+ tz=tz,
60
+ )
61
+ self._check_value_counts_dropna(idx)
62
+
63
+ def test_value_counts_unique_timedeltaindex2(self):
64
+ idx = TimedeltaIndex(
65
+ [
66
+ "1 days 09:00:00",
67
+ "1 days 09:00:00",
68
+ "1 days 09:00:00",
69
+ "1 days 08:00:00",
70
+ "1 days 08:00:00",
71
+ NaT,
72
+ ]
73
+ )
74
+ self._check_value_counts_dropna(idx)
75
+
76
+ def test_value_counts_unique_periodindex2(self):
77
+ idx = PeriodIndex(
78
+ [
79
+ "2013-01-01 09:00",
80
+ "2013-01-01 09:00",
81
+ "2013-01-01 09:00",
82
+ "2013-01-01 08:00",
83
+ "2013-01-01 08:00",
84
+ NaT,
85
+ ],
86
+ freq="h",
87
+ )
88
+ self._check_value_counts_dropna(idx)
89
+
90
+ def _check_value_counts_dropna(self, idx):
91
+ exp_idx = idx[[2, 3]]
92
+ expected = Series([3, 2], index=exp_idx, name="count")
93
+
94
+ for obj in [idx, Series(idx)]:
95
+ tm.assert_series_equal(obj.value_counts(), expected)
96
+
97
+ exp_idx = idx[[2, 3, -1]]
98
+ expected = Series([3, 2, 1], index=exp_idx, name="count")
99
+
100
+ for obj in [idx, Series(idx)]:
101
+ tm.assert_series_equal(obj.value_counts(dropna=False), expected)
102
+
103
+ tm.assert_index_equal(idx.unique(), exp_idx)
py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/__init__.py ADDED
File without changes
py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_astype.py ADDED
@@ -0,0 +1,254 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ import numpy as np
4
+ import pytest
5
+
6
+ from pandas.core.dtypes.dtypes import (
7
+ CategoricalDtype,
8
+ IntervalDtype,
9
+ )
10
+
11
+ from pandas import (
12
+ CategoricalIndex,
13
+ Index,
14
+ IntervalIndex,
15
+ NaT,
16
+ Timedelta,
17
+ Timestamp,
18
+ interval_range,
19
+ )
20
+ import pandas._testing as tm
21
+
22
+
23
+ class AstypeTests:
24
+ """Tests common to IntervalIndex with any subtype"""
25
+
26
+ def test_astype_idempotent(self, index):
27
+ result = index.astype("interval")
28
+ tm.assert_index_equal(result, index)
29
+
30
+ result = index.astype(index.dtype)
31
+ tm.assert_index_equal(result, index)
32
+
33
+ def test_astype_object(self, index):
34
+ result = index.astype(object)
35
+ expected = Index(index.values, dtype="object")
36
+ tm.assert_index_equal(result, expected)
37
+ assert not result.equals(index)
38
+
39
+ def test_astype_category(self, index):
40
+ result = index.astype("category")
41
+ expected = CategoricalIndex(index.values)
42
+ tm.assert_index_equal(result, expected)
43
+
44
+ result = index.astype(CategoricalDtype())
45
+ tm.assert_index_equal(result, expected)
46
+
47
+ # non-default params
48
+ categories = index.dropna().unique().values[:-1]
49
+ dtype = CategoricalDtype(categories=categories, ordered=True)
50
+ result = index.astype(dtype)
51
+ expected = CategoricalIndex(index.values, categories=categories, ordered=True)
52
+ tm.assert_index_equal(result, expected)
53
+
54
+ @pytest.mark.parametrize(
55
+ "dtype",
56
+ [
57
+ "int64",
58
+ "uint64",
59
+ "float64",
60
+ "complex128",
61
+ "period[M]",
62
+ "timedelta64",
63
+ "timedelta64[ns]",
64
+ "datetime64",
65
+ "datetime64[ns]",
66
+ "datetime64[ns, US/Eastern]",
67
+ ],
68
+ )
69
+ def test_astype_cannot_cast(self, index, dtype):
70
+ msg = "Cannot cast IntervalIndex to dtype"
71
+ with pytest.raises(TypeError, match=msg):
72
+ index.astype(dtype)
73
+
74
+ def test_astype_invalid_dtype(self, index):
75
+ msg = "data type [\"']fake_dtype[\"'] not understood"
76
+ with pytest.raises(TypeError, match=msg):
77
+ index.astype("fake_dtype")
78
+
79
+
80
+ class TestIntSubtype(AstypeTests):
81
+ """Tests specific to IntervalIndex with integer-like subtype"""
82
+
83
+ indexes = [
84
+ IntervalIndex.from_breaks(np.arange(-10, 11, dtype="int64")),
85
+ IntervalIndex.from_breaks(np.arange(100, dtype="uint64"), closed="left"),
86
+ ]
87
+
88
+ @pytest.fixture(params=indexes)
89
+ def index(self, request):
90
+ return request.param
91
+
92
+ @pytest.mark.parametrize(
93
+ "subtype", ["float64", "datetime64[ns]", "timedelta64[ns]"]
94
+ )
95
+ def test_subtype_conversion(self, index, subtype):
96
+ dtype = IntervalDtype(subtype, index.closed)
97
+ result = index.astype(dtype)
98
+ expected = IntervalIndex.from_arrays(
99
+ index.left.astype(subtype), index.right.astype(subtype), closed=index.closed
100
+ )
101
+ tm.assert_index_equal(result, expected)
102
+
103
+ @pytest.mark.parametrize(
104
+ "subtype_start, subtype_end", [("int64", "uint64"), ("uint64", "int64")]
105
+ )
106
+ def test_subtype_integer(self, subtype_start, subtype_end):
107
+ index = IntervalIndex.from_breaks(np.arange(100, dtype=subtype_start))
108
+ dtype = IntervalDtype(subtype_end, index.closed)
109
+ result = index.astype(dtype)
110
+ expected = IntervalIndex.from_arrays(
111
+ index.left.astype(subtype_end),
112
+ index.right.astype(subtype_end),
113
+ closed=index.closed,
114
+ )
115
+ tm.assert_index_equal(result, expected)
116
+
117
+ @pytest.mark.xfail(reason="GH#15832")
118
+ def test_subtype_integer_errors(self):
119
+ # int64 -> uint64 fails with negative values
120
+ index = interval_range(-10, 10)
121
+ dtype = IntervalDtype("uint64", "right")
122
+
123
+ # Until we decide what the exception message _should_ be, we
124
+ # assert something that it should _not_ be.
125
+ # We should _not_ be getting a message suggesting that the -10
126
+ # has been wrapped around to a large-positive integer
127
+ msg = "^(?!(left side of interval must be <= right side))"
128
+ with pytest.raises(ValueError, match=msg):
129
+ index.astype(dtype)
130
+
131
+
132
+ class TestFloatSubtype(AstypeTests):
133
+ """Tests specific to IntervalIndex with float subtype"""
134
+
135
+ indexes = [
136
+ interval_range(-10.0, 10.0, closed="neither"),
137
+ IntervalIndex.from_arrays(
138
+ [-1.5, np.nan, 0.0, 0.0, 1.5], [-0.5, np.nan, 1.0, 1.0, 3.0], closed="both"
139
+ ),
140
+ ]
141
+
142
+ @pytest.fixture(params=indexes)
143
+ def index(self, request):
144
+ return request.param
145
+
146
+ @pytest.mark.parametrize("subtype", ["int64", "uint64"])
147
+ def test_subtype_integer(self, subtype):
148
+ index = interval_range(0.0, 10.0)
149
+ dtype = IntervalDtype(subtype, "right")
150
+ result = index.astype(dtype)
151
+ expected = IntervalIndex.from_arrays(
152
+ index.left.astype(subtype), index.right.astype(subtype), closed=index.closed
153
+ )
154
+ tm.assert_index_equal(result, expected)
155
+
156
+ # raises with NA
157
+ msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
158
+ with pytest.raises(ValueError, match=msg):
159
+ index.insert(0, np.nan).astype(dtype)
160
+
161
+ @pytest.mark.parametrize("subtype", ["int64", "uint64"])
162
+ def test_subtype_integer_with_non_integer_borders(self, subtype):
163
+ index = interval_range(0.0, 3.0, freq=0.25)
164
+ dtype = IntervalDtype(subtype, "right")
165
+ result = index.astype(dtype)
166
+ expected = IntervalIndex.from_arrays(
167
+ index.left.astype(subtype), index.right.astype(subtype), closed=index.closed
168
+ )
169
+ tm.assert_index_equal(result, expected)
170
+
171
+ def test_subtype_integer_errors(self):
172
+ # float64 -> uint64 fails with negative values
173
+ index = interval_range(-10.0, 10.0)
174
+ dtype = IntervalDtype("uint64", "right")
175
+ msg = re.escape(
176
+ "Cannot convert interval[float64, right] to interval[uint64, right]; "
177
+ "subtypes are incompatible"
178
+ )
179
+ with pytest.raises(TypeError, match=msg):
180
+ index.astype(dtype)
181
+
182
+ @pytest.mark.parametrize("subtype", ["datetime64[ns]", "timedelta64[ns]"])
183
+ def test_subtype_datetimelike(self, index, subtype):
184
+ dtype = IntervalDtype(subtype, "right")
185
+ msg = "Cannot convert .* to .*; subtypes are incompatible"
186
+ with pytest.raises(TypeError, match=msg):
187
+ index.astype(dtype)
188
+
189
+ @pytest.mark.filterwarnings(
190
+ "ignore:invalid value encountered in cast:RuntimeWarning"
191
+ )
192
+ def test_astype_category(self, index):
193
+ super().test_astype_category(index)
194
+
195
+
196
+ class TestDatetimelikeSubtype(AstypeTests):
197
+ """Tests specific to IntervalIndex with datetime-like subtype"""
198
+
199
+ indexes = [
200
+ interval_range(Timestamp("2018-01-01"), periods=10, closed="neither"),
201
+ interval_range(Timestamp("2018-01-01"), periods=10).insert(2, NaT),
202
+ interval_range(Timestamp("2018-01-01", tz="US/Eastern"), periods=10),
203
+ interval_range(Timedelta("0 days"), periods=10, closed="both"),
204
+ interval_range(Timedelta("0 days"), periods=10).insert(2, NaT),
205
+ ]
206
+
207
+ @pytest.fixture(params=indexes)
208
+ def index(self, request):
209
+ return request.param
210
+
211
+ @pytest.mark.parametrize("subtype", ["int64", "uint64"])
212
+ def test_subtype_integer(self, index, subtype):
213
+ dtype = IntervalDtype(subtype, "right")
214
+
215
+ if subtype != "int64":
216
+ msg = (
217
+ r"Cannot convert interval\[(timedelta64|datetime64)\[ns.*\], .*\] "
218
+ r"to interval\[uint64, .*\]"
219
+ )
220
+ with pytest.raises(TypeError, match=msg):
221
+ index.astype(dtype)
222
+ return
223
+
224
+ result = index.astype(dtype)
225
+ new_left = index.left.astype(subtype)
226
+ new_right = index.right.astype(subtype)
227
+
228
+ expected = IntervalIndex.from_arrays(new_left, new_right, closed=index.closed)
229
+ tm.assert_index_equal(result, expected)
230
+
231
+ def test_subtype_float(self, index):
232
+ dtype = IntervalDtype("float64", "right")
233
+ msg = "Cannot convert .* to .*; subtypes are incompatible"
234
+ with pytest.raises(TypeError, match=msg):
235
+ index.astype(dtype)
236
+
237
+ def test_subtype_datetimelike(self):
238
+ # datetime -> timedelta raises
239
+ dtype = IntervalDtype("timedelta64[ns]", "right")
240
+ msg = "Cannot convert .* to .*; subtypes are incompatible"
241
+
242
+ index = interval_range(Timestamp("2018-01-01"), periods=10)
243
+ with pytest.raises(TypeError, match=msg):
244
+ index.astype(dtype)
245
+
246
+ index = interval_range(Timestamp("2018-01-01", tz="CET"), periods=10)
247
+ with pytest.raises(TypeError, match=msg):
248
+ index.astype(dtype)
249
+
250
+ # timedelta -> datetime raises
251
+ dtype = IntervalDtype("datetime64[ns]", "right")
252
+ index = interval_range(Timedelta("0 days"), periods=10)
253
+ with pytest.raises(TypeError, match=msg):
254
+ index.astype(dtype)
py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_constructors.py ADDED
@@ -0,0 +1,535 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from functools import partial
2
+
3
+ import numpy as np
4
+ import pytest
5
+
6
+ import pandas.util._test_decorators as td
7
+
8
+ from pandas.core.dtypes.common import is_unsigned_integer_dtype
9
+ from pandas.core.dtypes.dtypes import IntervalDtype
10
+
11
+ from pandas import (
12
+ Categorical,
13
+ CategoricalDtype,
14
+ CategoricalIndex,
15
+ Index,
16
+ Interval,
17
+ IntervalIndex,
18
+ date_range,
19
+ notna,
20
+ period_range,
21
+ timedelta_range,
22
+ )
23
+ import pandas._testing as tm
24
+ from pandas.core.arrays import IntervalArray
25
+ import pandas.core.common as com
26
+
27
+
28
+ @pytest.fixture(params=[None, "foo"])
29
+ def name(request):
30
+ return request.param
31
+
32
+
33
+ class ConstructorTests:
34
+ """
35
+ Common tests for all variations of IntervalIndex construction. Input data
36
+ to be supplied in breaks format, then converted by the subclass method
37
+ get_kwargs_from_breaks to the expected format.
38
+ """
39
+
40
+ @pytest.fixture(
41
+ params=[
42
+ ([3, 14, 15, 92, 653], np.int64),
43
+ (np.arange(10, dtype="int64"), np.int64),
44
+ (Index(np.arange(-10, 11, dtype=np.int64)), np.int64),
45
+ (Index(np.arange(10, 31, dtype=np.uint64)), np.uint64),
46
+ (Index(np.arange(20, 30, 0.5), dtype=np.float64), np.float64),
47
+ (date_range("20180101", periods=10), "<M8[ns]"),
48
+ (
49
+ date_range("20180101", periods=10, tz="US/Eastern"),
50
+ "datetime64[ns, US/Eastern]",
51
+ ),
52
+ (timedelta_range("1 day", periods=10), "<m8[ns]"),
53
+ ]
54
+ )
55
+ def breaks_and_expected_subtype(self, request):
56
+ return request.param
57
+
58
+ def test_constructor(self, constructor, breaks_and_expected_subtype, closed, name):
59
+ breaks, expected_subtype = breaks_and_expected_subtype
60
+
61
+ result_kwargs = self.get_kwargs_from_breaks(breaks, closed)
62
+
63
+ result = constructor(closed=closed, name=name, **result_kwargs)
64
+
65
+ assert result.closed == closed
66
+ assert result.name == name
67
+ assert result.dtype.subtype == expected_subtype
68
+ tm.assert_index_equal(result.left, Index(breaks[:-1], dtype=expected_subtype))
69
+ tm.assert_index_equal(result.right, Index(breaks[1:], dtype=expected_subtype))
70
+
71
+ @pytest.mark.parametrize(
72
+ "breaks, subtype",
73
+ [
74
+ (Index([0, 1, 2, 3, 4], dtype=np.int64), "float64"),
75
+ (Index([0, 1, 2, 3, 4], dtype=np.int64), "datetime64[ns]"),
76
+ (Index([0, 1, 2, 3, 4], dtype=np.int64), "timedelta64[ns]"),
77
+ (Index([0, 1, 2, 3, 4], dtype=np.float64), "int64"),
78
+ (date_range("2017-01-01", periods=5), "int64"),
79
+ (timedelta_range("1 day", periods=5), "int64"),
80
+ ],
81
+ )
82
+ def test_constructor_dtype(self, constructor, breaks, subtype):
83
+ # GH 19262: conversion via dtype parameter
84
+ expected_kwargs = self.get_kwargs_from_breaks(breaks.astype(subtype))
85
+ expected = constructor(**expected_kwargs)
86
+
87
+ result_kwargs = self.get_kwargs_from_breaks(breaks)
88
+ iv_dtype = IntervalDtype(subtype, "right")
89
+ for dtype in (iv_dtype, str(iv_dtype)):
90
+ result = constructor(dtype=dtype, **result_kwargs)
91
+ tm.assert_index_equal(result, expected)
92
+
93
+ @pytest.mark.parametrize(
94
+ "breaks",
95
+ [
96
+ Index([0, 1, 2, 3, 4], dtype=np.int64),
97
+ Index([0, 1, 2, 3, 4], dtype=np.uint64),
98
+ Index([0, 1, 2, 3, 4], dtype=np.float64),
99
+ date_range("2017-01-01", periods=5),
100
+ timedelta_range("1 day", periods=5),
101
+ ],
102
+ )
103
+ def test_constructor_pass_closed(self, constructor, breaks):
104
+ # not passing closed to IntervalDtype, but to IntervalArray constructor
105
+ iv_dtype = IntervalDtype(breaks.dtype)
106
+
107
+ result_kwargs = self.get_kwargs_from_breaks(breaks)
108
+
109
+ for dtype in (iv_dtype, str(iv_dtype)):
110
+ with tm.assert_produces_warning(None):
111
+ result = constructor(dtype=dtype, closed="left", **result_kwargs)
112
+ assert result.dtype.closed == "left"
113
+
114
+ @pytest.mark.parametrize("breaks", [[np.nan] * 2, [np.nan] * 4, [np.nan] * 50])
115
+ def test_constructor_nan(self, constructor, breaks, closed):
116
+ # GH 18421
117
+ result_kwargs = self.get_kwargs_from_breaks(breaks)
118
+ result = constructor(closed=closed, **result_kwargs)
119
+
120
+ expected_subtype = np.float64
121
+ expected_values = np.array(breaks[:-1], dtype=object)
122
+
123
+ assert result.closed == closed
124
+ assert result.dtype.subtype == expected_subtype
125
+ tm.assert_numpy_array_equal(np.array(result), expected_values)
126
+
127
+ @pytest.mark.parametrize(
128
+ "breaks",
129
+ [
130
+ [],
131
+ np.array([], dtype="int64"),
132
+ np.array([], dtype="uint64"),
133
+ np.array([], dtype="float64"),
134
+ np.array([], dtype="datetime64[ns]"),
135
+ np.array([], dtype="timedelta64[ns]"),
136
+ ],
137
+ )
138
+ def test_constructor_empty(self, constructor, breaks, closed):
139
+ # GH 18421
140
+ result_kwargs = self.get_kwargs_from_breaks(breaks)
141
+ result = constructor(closed=closed, **result_kwargs)
142
+
143
+ expected_values = np.array([], dtype=object)
144
+ expected_subtype = getattr(breaks, "dtype", np.int64)
145
+
146
+ assert result.empty
147
+ assert result.closed == closed
148
+ assert result.dtype.subtype == expected_subtype
149
+ tm.assert_numpy_array_equal(np.array(result), expected_values)
150
+
151
+ @pytest.mark.parametrize(
152
+ "breaks",
153
+ [
154
+ tuple("0123456789"),
155
+ list("abcdefghij"),
156
+ np.array(list("abcdefghij"), dtype=object),
157
+ np.array(list("abcdefghij"), dtype="<U1"),
158
+ ],
159
+ )
160
+ def test_constructor_string(self, constructor, breaks):
161
+ # GH 19016
162
+ msg = (
163
+ "category, object, and string subtypes are not supported "
164
+ "for IntervalIndex"
165
+ )
166
+ with pytest.raises(TypeError, match=msg):
167
+ constructor(**self.get_kwargs_from_breaks(breaks))
168
+
169
+ @pytest.mark.parametrize("cat_constructor", [Categorical, CategoricalIndex])
170
+ def test_constructor_categorical_valid(self, constructor, cat_constructor):
171
+ # GH 21243/21253
172
+
173
+ breaks = np.arange(10, dtype="int64")
174
+ expected = IntervalIndex.from_breaks(breaks)
175
+
176
+ cat_breaks = cat_constructor(breaks)
177
+ result_kwargs = self.get_kwargs_from_breaks(cat_breaks)
178
+ result = constructor(**result_kwargs)
179
+ tm.assert_index_equal(result, expected)
180
+
181
+ def test_generic_errors(self, constructor):
182
+ # filler input data to be used when supplying invalid kwargs
183
+ filler = self.get_kwargs_from_breaks(range(10))
184
+
185
+ # invalid closed
186
+ msg = "closed must be one of 'right', 'left', 'both', 'neither'"
187
+ with pytest.raises(ValueError, match=msg):
188
+ constructor(closed="invalid", **filler)
189
+
190
+ # unsupported dtype
191
+ msg = "dtype must be an IntervalDtype, got int64"
192
+ with pytest.raises(TypeError, match=msg):
193
+ constructor(dtype="int64", **filler)
194
+
195
+ # invalid dtype
196
+ msg = "data type [\"']invalid[\"'] not understood"
197
+ with pytest.raises(TypeError, match=msg):
198
+ constructor(dtype="invalid", **filler)
199
+
200
+ # no point in nesting periods in an IntervalIndex
201
+ periods = period_range("2000-01-01", periods=10)
202
+ periods_kwargs = self.get_kwargs_from_breaks(periods)
203
+ msg = "Period dtypes are not supported, use a PeriodIndex instead"
204
+ with pytest.raises(ValueError, match=msg):
205
+ constructor(**periods_kwargs)
206
+
207
+ # decreasing values
208
+ decreasing_kwargs = self.get_kwargs_from_breaks(range(10, -1, -1))
209
+ msg = "left side of interval must be <= right side"
210
+ with pytest.raises(ValueError, match=msg):
211
+ constructor(**decreasing_kwargs)
212
+
213
+
214
+ class TestFromArrays(ConstructorTests):
215
+ """Tests specific to IntervalIndex.from_arrays"""
216
+
217
+ @pytest.fixture
218
+ def constructor(self):
219
+ return IntervalIndex.from_arrays
220
+
221
+ def get_kwargs_from_breaks(self, breaks, closed="right"):
222
+ """
223
+ converts intervals in breaks format to a dictionary of kwargs to
224
+ specific to the format expected by IntervalIndex.from_arrays
225
+ """
226
+ return {"left": breaks[:-1], "right": breaks[1:]}
227
+
228
+ def test_constructor_errors(self):
229
+ # GH 19016: categorical data
230
+ data = Categorical(list("01234abcde"), ordered=True)
231
+ msg = (
232
+ "category, object, and string subtypes are not supported "
233
+ "for IntervalIndex"
234
+ )
235
+ with pytest.raises(TypeError, match=msg):
236
+ IntervalIndex.from_arrays(data[:-1], data[1:])
237
+
238
+ # unequal length
239
+ left = [0, 1, 2]
240
+ right = [2, 3]
241
+ msg = "left and right must have the same length"
242
+ with pytest.raises(ValueError, match=msg):
243
+ IntervalIndex.from_arrays(left, right)
244
+
245
+ @pytest.mark.parametrize(
246
+ "left_subtype, right_subtype", [(np.int64, np.float64), (np.float64, np.int64)]
247
+ )
248
+ def test_mixed_float_int(self, left_subtype, right_subtype):
249
+ """mixed int/float left/right results in float for both sides"""
250
+ left = np.arange(9, dtype=left_subtype)
251
+ right = np.arange(1, 10, dtype=right_subtype)
252
+ result = IntervalIndex.from_arrays(left, right)
253
+
254
+ expected_left = Index(left, dtype=np.float64)
255
+ expected_right = Index(right, dtype=np.float64)
256
+ expected_subtype = np.float64
257
+
258
+ tm.assert_index_equal(result.left, expected_left)
259
+ tm.assert_index_equal(result.right, expected_right)
260
+ assert result.dtype.subtype == expected_subtype
261
+
262
+ @pytest.mark.parametrize("interval_cls", [IntervalArray, IntervalIndex])
263
+ def test_from_arrays_mismatched_datetimelike_resos(self, interval_cls):
264
+ # GH#55714
265
+ left = date_range("2016-01-01", periods=3, unit="s")
266
+ right = date_range("2017-01-01", periods=3, unit="ms")
267
+ result = interval_cls.from_arrays(left, right)
268
+ expected = interval_cls.from_arrays(left.as_unit("ms"), right)
269
+ tm.assert_equal(result, expected)
270
+
271
+ # td64
272
+ left2 = left - left[0]
273
+ right2 = right - left[0]
274
+ result2 = interval_cls.from_arrays(left2, right2)
275
+ expected2 = interval_cls.from_arrays(left2.as_unit("ms"), right2)
276
+ tm.assert_equal(result2, expected2)
277
+
278
+ # dt64tz
279
+ left3 = left.tz_localize("UTC")
280
+ right3 = right.tz_localize("UTC")
281
+ result3 = interval_cls.from_arrays(left3, right3)
282
+ expected3 = interval_cls.from_arrays(left3.as_unit("ms"), right3)
283
+ tm.assert_equal(result3, expected3)
284
+
285
+
286
+ class TestFromBreaks(ConstructorTests):
287
+ """Tests specific to IntervalIndex.from_breaks"""
288
+
289
+ @pytest.fixture
290
+ def constructor(self):
291
+ return IntervalIndex.from_breaks
292
+
293
+ def get_kwargs_from_breaks(self, breaks, closed="right"):
294
+ """
295
+ converts intervals in breaks format to a dictionary of kwargs to
296
+ specific to the format expected by IntervalIndex.from_breaks
297
+ """
298
+ return {"breaks": breaks}
299
+
300
+ def test_constructor_errors(self):
301
+ # GH 19016: categorical data
302
+ data = Categorical(list("01234abcde"), ordered=True)
303
+ msg = (
304
+ "category, object, and string subtypes are not supported "
305
+ "for IntervalIndex"
306
+ )
307
+ with pytest.raises(TypeError, match=msg):
308
+ IntervalIndex.from_breaks(data)
309
+
310
+ def test_length_one(self):
311
+ """breaks of length one produce an empty IntervalIndex"""
312
+ breaks = [0]
313
+ result = IntervalIndex.from_breaks(breaks)
314
+ expected = IntervalIndex.from_breaks([])
315
+ tm.assert_index_equal(result, expected)
316
+
317
+ def test_left_right_dont_share_data(self):
318
+ # GH#36310
319
+ breaks = np.arange(5)
320
+ result = IntervalIndex.from_breaks(breaks)._data
321
+ assert result._left.base is None or result._left.base is not result._right.base
322
+
323
+
324
+ class TestFromTuples(ConstructorTests):
325
+ """Tests specific to IntervalIndex.from_tuples"""
326
+
327
+ @pytest.fixture
328
+ def constructor(self):
329
+ return IntervalIndex.from_tuples
330
+
331
+ def get_kwargs_from_breaks(self, breaks, closed="right"):
332
+ """
333
+ converts intervals in breaks format to a dictionary of kwargs to
334
+ specific to the format expected by IntervalIndex.from_tuples
335
+ """
336
+ if is_unsigned_integer_dtype(breaks):
337
+ pytest.skip(f"{breaks.dtype} not relevant IntervalIndex.from_tuples tests")
338
+
339
+ if len(breaks) == 0:
340
+ return {"data": breaks}
341
+
342
+ tuples = list(zip(breaks[:-1], breaks[1:]))
343
+ if isinstance(breaks, (list, tuple)):
344
+ return {"data": tuples}
345
+ elif isinstance(getattr(breaks, "dtype", None), CategoricalDtype):
346
+ return {"data": breaks._constructor(tuples)}
347
+ return {"data": com.asarray_tuplesafe(tuples)}
348
+
349
+ def test_constructor_errors(self):
350
+ # non-tuple
351
+ tuples = [(0, 1), 2, (3, 4)]
352
+ msg = "IntervalIndex.from_tuples received an invalid item, 2"
353
+ with pytest.raises(TypeError, match=msg.format(t=tuples)):
354
+ IntervalIndex.from_tuples(tuples)
355
+
356
+ # too few/many items
357
+ tuples = [(0, 1), (2,), (3, 4)]
358
+ msg = "IntervalIndex.from_tuples requires tuples of length 2, got {t}"
359
+ with pytest.raises(ValueError, match=msg.format(t=tuples)):
360
+ IntervalIndex.from_tuples(tuples)
361
+
362
+ tuples = [(0, 1), (2, 3, 4), (5, 6)]
363
+ with pytest.raises(ValueError, match=msg.format(t=tuples)):
364
+ IntervalIndex.from_tuples(tuples)
365
+
366
+ def test_na_tuples(self):
367
+ # tuple (NA, NA) evaluates the same as NA as an element
368
+ na_tuple = [(0, 1), (np.nan, np.nan), (2, 3)]
369
+ idx_na_tuple = IntervalIndex.from_tuples(na_tuple)
370
+ idx_na_element = IntervalIndex.from_tuples([(0, 1), np.nan, (2, 3)])
371
+ tm.assert_index_equal(idx_na_tuple, idx_na_element)
372
+
373
+
374
+ class TestClassConstructors(ConstructorTests):
375
+ """Tests specific to the IntervalIndex/Index constructors"""
376
+
377
+ @pytest.fixture(
378
+ params=[IntervalIndex, partial(Index, dtype="interval")],
379
+ ids=["IntervalIndex", "Index"],
380
+ )
381
+ def klass(self, request):
382
+ # We use a separate fixture here to include Index.__new__ with dtype kwarg
383
+ return request.param
384
+
385
+ @pytest.fixture
386
+ def constructor(self):
387
+ return IntervalIndex
388
+
389
+ def get_kwargs_from_breaks(self, breaks, closed="right"):
390
+ """
391
+ converts intervals in breaks format to a dictionary of kwargs to
392
+ specific to the format expected by the IntervalIndex/Index constructors
393
+ """
394
+ if is_unsigned_integer_dtype(breaks):
395
+ pytest.skip(f"{breaks.dtype} not relevant for class constructor tests")
396
+
397
+ if len(breaks) == 0:
398
+ return {"data": breaks}
399
+
400
+ ivs = [
401
+ Interval(left, right, closed) if notna(left) else left
402
+ for left, right in zip(breaks[:-1], breaks[1:])
403
+ ]
404
+
405
+ if isinstance(breaks, list):
406
+ return {"data": ivs}
407
+ elif isinstance(getattr(breaks, "dtype", None), CategoricalDtype):
408
+ return {"data": breaks._constructor(ivs)}
409
+ return {"data": np.array(ivs, dtype=object)}
410
+
411
+ def test_generic_errors(self, constructor):
412
+ """
413
+ override the base class implementation since errors are handled
414
+ differently; checks unnecessary since caught at the Interval level
415
+ """
416
+
417
+ def test_constructor_string(self):
418
+ # GH23013
419
+ # When forming the interval from breaks,
420
+ # the interval of strings is already forbidden.
421
+ pass
422
+
423
+ def test_constructor_errors(self, klass):
424
+ # mismatched closed within intervals with no constructor override
425
+ ivs = [Interval(0, 1, closed="right"), Interval(2, 3, closed="left")]
426
+ msg = "intervals must all be closed on the same side"
427
+ with pytest.raises(ValueError, match=msg):
428
+ klass(ivs)
429
+
430
+ # scalar
431
+ msg = (
432
+ r"(IntervalIndex|Index)\(...\) must be called with a collection of "
433
+ "some kind, 5 was passed"
434
+ )
435
+ with pytest.raises(TypeError, match=msg):
436
+ klass(5)
437
+
438
+ # not an interval; dtype depends on 32bit/windows builds
439
+ msg = "type <class 'numpy.int(32|64)'> with value 0 is not an interval"
440
+ with pytest.raises(TypeError, match=msg):
441
+ klass([0, 1])
442
+
443
+ @pytest.mark.parametrize(
444
+ "data, closed",
445
+ [
446
+ ([], "both"),
447
+ ([np.nan, np.nan], "neither"),
448
+ (
449
+ [Interval(0, 3, closed="neither"), Interval(2, 5, closed="neither")],
450
+ "left",
451
+ ),
452
+ (
453
+ [Interval(0, 3, closed="left"), Interval(2, 5, closed="right")],
454
+ "neither",
455
+ ),
456
+ (IntervalIndex.from_breaks(range(5), closed="both"), "right"),
457
+ ],
458
+ )
459
+ def test_override_inferred_closed(self, constructor, data, closed):
460
+ # GH 19370
461
+ if isinstance(data, IntervalIndex):
462
+ tuples = data.to_tuples()
463
+ else:
464
+ tuples = [(iv.left, iv.right) if notna(iv) else iv for iv in data]
465
+ expected = IntervalIndex.from_tuples(tuples, closed=closed)
466
+ result = constructor(data, closed=closed)
467
+ tm.assert_index_equal(result, expected)
468
+
469
+ @pytest.mark.parametrize(
470
+ "values_constructor", [list, np.array, IntervalIndex, IntervalArray]
471
+ )
472
+ def test_index_object_dtype(self, values_constructor):
473
+ # Index(intervals, dtype=object) is an Index (not an IntervalIndex)
474
+ intervals = [Interval(0, 1), Interval(1, 2), Interval(2, 3)]
475
+ values = values_constructor(intervals)
476
+ result = Index(values, dtype=object)
477
+
478
+ assert type(result) is Index
479
+ tm.assert_numpy_array_equal(result.values, np.array(values))
480
+
481
+ def test_index_mixed_closed(self):
482
+ # GH27172
483
+ intervals = [
484
+ Interval(0, 1, closed="left"),
485
+ Interval(1, 2, closed="right"),
486
+ Interval(2, 3, closed="neither"),
487
+ Interval(3, 4, closed="both"),
488
+ ]
489
+ result = Index(intervals)
490
+ expected = Index(intervals, dtype=object)
491
+ tm.assert_index_equal(result, expected)
492
+
493
+
494
+ @pytest.mark.parametrize("timezone", ["UTC", "US/Pacific", "GMT"])
495
+ def test_interval_index_subtype(timezone, inclusive_endpoints_fixture):
496
+ # GH#46999
497
+ dates = date_range("2022", periods=3, tz=timezone)
498
+ dtype = f"interval[datetime64[ns, {timezone}], {inclusive_endpoints_fixture}]"
499
+ result = IntervalIndex.from_arrays(
500
+ ["2022-01-01", "2022-01-02"],
501
+ ["2022-01-02", "2022-01-03"],
502
+ closed=inclusive_endpoints_fixture,
503
+ dtype=dtype,
504
+ )
505
+ expected = IntervalIndex.from_arrays(
506
+ dates[:-1], dates[1:], closed=inclusive_endpoints_fixture
507
+ )
508
+ tm.assert_index_equal(result, expected)
509
+
510
+
511
+ def test_dtype_closed_mismatch():
512
+ # GH#38394 closed specified in both dtype and IntervalIndex constructor
513
+
514
+ dtype = IntervalDtype(np.int64, "left")
515
+
516
+ msg = "closed keyword does not match dtype.closed"
517
+ with pytest.raises(ValueError, match=msg):
518
+ IntervalIndex([], dtype=dtype, closed="neither")
519
+
520
+ with pytest.raises(ValueError, match=msg):
521
+ IntervalArray([], dtype=dtype, closed="neither")
522
+
523
+
524
+ @pytest.mark.parametrize(
525
+ "dtype",
526
+ ["Float64", pytest.param("float64[pyarrow]", marks=td.skip_if_no("pyarrow"))],
527
+ )
528
+ def test_ea_dtype(dtype):
529
+ # GH#56765
530
+ bins = [(0.0, 0.4), (0.4, 0.6)]
531
+ interval_dtype = IntervalDtype(subtype=dtype, closed="left")
532
+ result = IntervalIndex.from_tuples(bins, closed="left", dtype=interval_dtype)
533
+ assert result.dtype == interval_dtype
534
+ expected = IntervalIndex.from_tuples(bins, closed="left").astype(interval_dtype)
535
+ tm.assert_index_equal(result, expected)
py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_equals.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+ from pandas import (
4
+ IntervalIndex,
5
+ date_range,
6
+ )
7
+
8
+
9
+ class TestEquals:
10
+ def test_equals(self, closed):
11
+ expected = IntervalIndex.from_breaks(np.arange(5), closed=closed)
12
+ assert expected.equals(expected)
13
+ assert expected.equals(expected.copy())
14
+
15
+ assert not expected.equals(expected.astype(object))
16
+ assert not expected.equals(np.array(expected))
17
+ assert not expected.equals(list(expected))
18
+
19
+ assert not expected.equals([1, 2])
20
+ assert not expected.equals(np.array([1, 2]))
21
+ assert not expected.equals(date_range("20130101", periods=2))
22
+
23
+ expected_name1 = IntervalIndex.from_breaks(
24
+ np.arange(5), closed=closed, name="foo"
25
+ )
26
+ expected_name2 = IntervalIndex.from_breaks(
27
+ np.arange(5), closed=closed, name="bar"
28
+ )
29
+ assert expected.equals(expected_name1)
30
+ assert expected_name1.equals(expected_name2)
31
+
32
+ for other_closed in {"left", "right", "both", "neither"} - {closed}:
33
+ expected_other_closed = IntervalIndex.from_breaks(
34
+ np.arange(5), closed=other_closed
35
+ )
36
+ assert not expected.equals(expected_other_closed)
py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_formats.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ from pandas import (
5
+ DataFrame,
6
+ DatetimeIndex,
7
+ Index,
8
+ Interval,
9
+ IntervalIndex,
10
+ Series,
11
+ Timedelta,
12
+ Timestamp,
13
+ )
14
+ import pandas._testing as tm
15
+
16
+
17
+ class TestIntervalIndexRendering:
18
+ # TODO: this is a test for DataFrame/Series, not IntervalIndex
19
+ @pytest.mark.parametrize(
20
+ "constructor,expected",
21
+ [
22
+ (
23
+ Series,
24
+ (
25
+ "(0.0, 1.0] a\n"
26
+ "NaN b\n"
27
+ "(2.0, 3.0] c\n"
28
+ "dtype: object"
29
+ ),
30
+ ),
31
+ (DataFrame, (" 0\n(0.0, 1.0] a\nNaN b\n(2.0, 3.0] c")),
32
+ ],
33
+ )
34
+ def test_repr_missing(self, constructor, expected, using_infer_string, request):
35
+ # GH 25984
36
+ if using_infer_string and constructor is Series:
37
+ request.applymarker(pytest.mark.xfail(reason="repr different"))
38
+ index = IntervalIndex.from_tuples([(0, 1), np.nan, (2, 3)])
39
+ obj = constructor(list("abc"), index=index)
40
+ result = repr(obj)
41
+ assert result == expected
42
+
43
+ def test_repr_floats(self):
44
+ # GH 32553
45
+
46
+ markers = Series(
47
+ [1, 2],
48
+ index=IntervalIndex(
49
+ [
50
+ Interval(left, right)
51
+ for left, right in zip(
52
+ Index([329.973, 345.137], dtype="float64"),
53
+ Index([345.137, 360.191], dtype="float64"),
54
+ )
55
+ ]
56
+ ),
57
+ )
58
+ result = str(markers)
59
+ expected = "(329.973, 345.137] 1\n(345.137, 360.191] 2\ndtype: int64"
60
+ assert result == expected
61
+
62
+ @pytest.mark.filterwarnings(
63
+ "ignore:invalid value encountered in cast:RuntimeWarning"
64
+ )
65
+ @pytest.mark.parametrize(
66
+ "tuples, closed, expected_data",
67
+ [
68
+ ([(0, 1), (1, 2), (2, 3)], "left", ["[0, 1)", "[1, 2)", "[2, 3)"]),
69
+ (
70
+ [(0.5, 1.0), np.nan, (2.0, 3.0)],
71
+ "right",
72
+ ["(0.5, 1.0]", "NaN", "(2.0, 3.0]"],
73
+ ),
74
+ (
75
+ [
76
+ (Timestamp("20180101"), Timestamp("20180102")),
77
+ np.nan,
78
+ ((Timestamp("20180102"), Timestamp("20180103"))),
79
+ ],
80
+ "both",
81
+ [
82
+ "[2018-01-01 00:00:00, 2018-01-02 00:00:00]",
83
+ "NaN",
84
+ "[2018-01-02 00:00:00, 2018-01-03 00:00:00]",
85
+ ],
86
+ ),
87
+ (
88
+ [
89
+ (Timedelta("0 days"), Timedelta("1 days")),
90
+ (Timedelta("1 days"), Timedelta("2 days")),
91
+ np.nan,
92
+ ],
93
+ "neither",
94
+ [
95
+ "(0 days 00:00:00, 1 days 00:00:00)",
96
+ "(1 days 00:00:00, 2 days 00:00:00)",
97
+ "NaN",
98
+ ],
99
+ ),
100
+ ],
101
+ )
102
+ def test_get_values_for_csv(self, tuples, closed, expected_data):
103
+ # GH 28210
104
+ index = IntervalIndex.from_tuples(tuples, closed=closed)
105
+ result = index._get_values_for_csv(na_rep="NaN")
106
+ expected = np.array(expected_data)
107
+ tm.assert_numpy_array_equal(result, expected)
108
+
109
+ def test_timestamp_with_timezone(self, unit):
110
+ # GH 55035
111
+ left = DatetimeIndex(["2020-01-01"], dtype=f"M8[{unit}, UTC]")
112
+ right = DatetimeIndex(["2020-01-02"], dtype=f"M8[{unit}, UTC]")
113
+ index = IntervalIndex.from_arrays(left, right)
114
+ result = repr(index)
115
+ expected = (
116
+ "IntervalIndex([(2020-01-01 00:00:00+00:00, 2020-01-02 00:00:00+00:00]], "
117
+ f"dtype='interval[datetime64[{unit}, UTC], right]')"
118
+ )
119
+ assert result == expected
py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_indexing.py ADDED
@@ -0,0 +1,674 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ import numpy as np
4
+ import pytest
5
+
6
+ from pandas.errors import InvalidIndexError
7
+
8
+ from pandas import (
9
+ NA,
10
+ CategoricalIndex,
11
+ DatetimeIndex,
12
+ Index,
13
+ Interval,
14
+ IntervalIndex,
15
+ MultiIndex,
16
+ NaT,
17
+ Timedelta,
18
+ Timestamp,
19
+ array,
20
+ date_range,
21
+ interval_range,
22
+ isna,
23
+ period_range,
24
+ timedelta_range,
25
+ )
26
+ import pandas._testing as tm
27
+
28
+
29
+ class TestGetItem:
30
+ def test_getitem(self, closed):
31
+ idx = IntervalIndex.from_arrays((0, 1, np.nan), (1, 2, np.nan), closed=closed)
32
+ assert idx[0] == Interval(0.0, 1.0, closed=closed)
33
+ assert idx[1] == Interval(1.0, 2.0, closed=closed)
34
+ assert isna(idx[2])
35
+
36
+ result = idx[0:1]
37
+ expected = IntervalIndex.from_arrays((0.0,), (1.0,), closed=closed)
38
+ tm.assert_index_equal(result, expected)
39
+
40
+ result = idx[0:2]
41
+ expected = IntervalIndex.from_arrays((0.0, 1), (1.0, 2.0), closed=closed)
42
+ tm.assert_index_equal(result, expected)
43
+
44
+ result = idx[1:3]
45
+ expected = IntervalIndex.from_arrays(
46
+ (1.0, np.nan), (2.0, np.nan), closed=closed
47
+ )
48
+ tm.assert_index_equal(result, expected)
49
+
50
+ def test_getitem_2d_deprecated(self):
51
+ # GH#30588 multi-dim indexing is deprecated, but raising is also acceptable
52
+ idx = IntervalIndex.from_breaks(range(11), closed="right")
53
+ with pytest.raises(ValueError, match="multi-dimensional indexing not allowed"):
54
+ idx[:, None]
55
+ with pytest.raises(ValueError, match="multi-dimensional indexing not allowed"):
56
+ # GH#44051
57
+ idx[True]
58
+ with pytest.raises(ValueError, match="multi-dimensional indexing not allowed"):
59
+ # GH#44051
60
+ idx[False]
61
+
62
+
63
+ class TestWhere:
64
+ def test_where(self, listlike_box):
65
+ klass = listlike_box
66
+
67
+ idx = IntervalIndex.from_breaks(range(11), closed="right")
68
+ cond = [True] * len(idx)
69
+ expected = idx
70
+ result = expected.where(klass(cond))
71
+ tm.assert_index_equal(result, expected)
72
+
73
+ cond = [False] + [True] * len(idx[1:])
74
+ expected = IntervalIndex([np.nan] + idx[1:].tolist())
75
+ result = idx.where(klass(cond))
76
+ tm.assert_index_equal(result, expected)
77
+
78
+
79
+ class TestTake:
80
+ def test_take(self, closed):
81
+ index = IntervalIndex.from_breaks(range(11), closed=closed)
82
+
83
+ result = index.take(range(10))
84
+ tm.assert_index_equal(result, index)
85
+
86
+ result = index.take([0, 0, 1])
87
+ expected = IntervalIndex.from_arrays([0, 0, 1], [1, 1, 2], closed=closed)
88
+ tm.assert_index_equal(result, expected)
89
+
90
+
91
+ class TestGetLoc:
92
+ @pytest.mark.parametrize("side", ["right", "left", "both", "neither"])
93
+ def test_get_loc_interval(self, closed, side):
94
+ idx = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed)
95
+
96
+ for bound in [[0, 1], [1, 2], [2, 3], [3, 4], [0, 2], [2.5, 3], [-1, 4]]:
97
+ # if get_loc is supplied an interval, it should only search
98
+ # for exact matches, not overlaps or covers, else KeyError.
99
+ msg = re.escape(f"Interval({bound[0]}, {bound[1]}, closed='{side}')")
100
+ if closed == side:
101
+ if bound == [0, 1]:
102
+ assert idx.get_loc(Interval(0, 1, closed=side)) == 0
103
+ elif bound == [2, 3]:
104
+ assert idx.get_loc(Interval(2, 3, closed=side)) == 1
105
+ else:
106
+ with pytest.raises(KeyError, match=msg):
107
+ idx.get_loc(Interval(*bound, closed=side))
108
+ else:
109
+ with pytest.raises(KeyError, match=msg):
110
+ idx.get_loc(Interval(*bound, closed=side))
111
+
112
+ @pytest.mark.parametrize("scalar", [-0.5, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5])
113
+ def test_get_loc_scalar(self, closed, scalar):
114
+ # correct = {side: {query: answer}}.
115
+ # If query is not in the dict, that query should raise a KeyError
116
+ correct = {
117
+ "right": {0.5: 0, 1: 0, 2.5: 1, 3: 1},
118
+ "left": {0: 0, 0.5: 0, 2: 1, 2.5: 1},
119
+ "both": {0: 0, 0.5: 0, 1: 0, 2: 1, 2.5: 1, 3: 1},
120
+ "neither": {0.5: 0, 2.5: 1},
121
+ }
122
+
123
+ idx = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed)
124
+
125
+ # if get_loc is supplied a scalar, it should return the index of
126
+ # the interval which contains the scalar, or KeyError.
127
+ if scalar in correct[closed].keys():
128
+ assert idx.get_loc(scalar) == correct[closed][scalar]
129
+ else:
130
+ with pytest.raises(KeyError, match=str(scalar)):
131
+ idx.get_loc(scalar)
132
+
133
+ @pytest.mark.parametrize("scalar", [-1, 0, 0.5, 3, 4.5, 5, 6])
134
+ def test_get_loc_length_one_scalar(self, scalar, closed):
135
+ # GH 20921
136
+ index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
137
+ if scalar in index[0]:
138
+ result = index.get_loc(scalar)
139
+ assert result == 0
140
+ else:
141
+ with pytest.raises(KeyError, match=str(scalar)):
142
+ index.get_loc(scalar)
143
+
144
+ @pytest.mark.parametrize("other_closed", ["left", "right", "both", "neither"])
145
+ @pytest.mark.parametrize("left, right", [(0, 5), (-1, 4), (-1, 6), (6, 7)])
146
+ def test_get_loc_length_one_interval(self, left, right, closed, other_closed):
147
+ # GH 20921
148
+ index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
149
+ interval = Interval(left, right, closed=other_closed)
150
+ if interval == index[0]:
151
+ result = index.get_loc(interval)
152
+ assert result == 0
153
+ else:
154
+ with pytest.raises(
155
+ KeyError,
156
+ match=re.escape(f"Interval({left}, {right}, closed='{other_closed}')"),
157
+ ):
158
+ index.get_loc(interval)
159
+
160
+ # Make consistent with test_interval_new.py (see #16316, #16386)
161
+ @pytest.mark.parametrize(
162
+ "breaks",
163
+ [
164
+ date_range("20180101", periods=4),
165
+ date_range("20180101", periods=4, tz="US/Eastern"),
166
+ timedelta_range("0 days", periods=4),
167
+ ],
168
+ ids=lambda x: str(x.dtype),
169
+ )
170
+ def test_get_loc_datetimelike_nonoverlapping(self, breaks):
171
+ # GH 20636
172
+ # nonoverlapping = IntervalIndex method and no i8 conversion
173
+ index = IntervalIndex.from_breaks(breaks)
174
+
175
+ value = index[0].mid
176
+ result = index.get_loc(value)
177
+ expected = 0
178
+ assert result == expected
179
+
180
+ interval = Interval(index[0].left, index[0].right)
181
+ result = index.get_loc(interval)
182
+ expected = 0
183
+ assert result == expected
184
+
185
+ @pytest.mark.parametrize(
186
+ "arrays",
187
+ [
188
+ (date_range("20180101", periods=4), date_range("20180103", periods=4)),
189
+ (
190
+ date_range("20180101", periods=4, tz="US/Eastern"),
191
+ date_range("20180103", periods=4, tz="US/Eastern"),
192
+ ),
193
+ (
194
+ timedelta_range("0 days", periods=4),
195
+ timedelta_range("2 days", periods=4),
196
+ ),
197
+ ],
198
+ ids=lambda x: str(x[0].dtype),
199
+ )
200
+ def test_get_loc_datetimelike_overlapping(self, arrays):
201
+ # GH 20636
202
+ index = IntervalIndex.from_arrays(*arrays)
203
+
204
+ value = index[0].mid + Timedelta("12 hours")
205
+ result = index.get_loc(value)
206
+ expected = slice(0, 2, None)
207
+ assert result == expected
208
+
209
+ interval = Interval(index[0].left, index[0].right)
210
+ result = index.get_loc(interval)
211
+ expected = 0
212
+ assert result == expected
213
+
214
+ @pytest.mark.parametrize(
215
+ "values",
216
+ [
217
+ date_range("2018-01-04", periods=4, freq="-1D"),
218
+ date_range("2018-01-04", periods=4, freq="-1D", tz="US/Eastern"),
219
+ timedelta_range("3 days", periods=4, freq="-1D"),
220
+ np.arange(3.0, -1.0, -1.0),
221
+ np.arange(3, -1, -1),
222
+ ],
223
+ ids=lambda x: str(x.dtype),
224
+ )
225
+ def test_get_loc_decreasing(self, values):
226
+ # GH 25860
227
+ index = IntervalIndex.from_arrays(values[1:], values[:-1])
228
+ result = index.get_loc(index[0])
229
+ expected = 0
230
+ assert result == expected
231
+
232
+ @pytest.mark.parametrize("key", [[5], (2, 3)])
233
+ def test_get_loc_non_scalar_errors(self, key):
234
+ # GH 31117
235
+ idx = IntervalIndex.from_tuples([(1, 3), (2, 4), (3, 5), (7, 10), (3, 10)])
236
+
237
+ msg = str(key)
238
+ with pytest.raises(InvalidIndexError, match=msg):
239
+ idx.get_loc(key)
240
+
241
+ def test_get_indexer_with_nans(self):
242
+ # GH#41831
243
+ index = IntervalIndex([np.nan, Interval(1, 2), np.nan])
244
+
245
+ expected = np.array([True, False, True])
246
+ for key in [None, np.nan, NA]:
247
+ assert key in index
248
+ result = index.get_loc(key)
249
+ tm.assert_numpy_array_equal(result, expected)
250
+
251
+ for key in [NaT, np.timedelta64("NaT", "ns"), np.datetime64("NaT", "ns")]:
252
+ with pytest.raises(KeyError, match=str(key)):
253
+ index.get_loc(key)
254
+
255
+
256
+ class TestGetIndexer:
257
+ @pytest.mark.parametrize(
258
+ "query, expected",
259
+ [
260
+ ([Interval(2, 4, closed="right")], [1]),
261
+ ([Interval(2, 4, closed="left")], [-1]),
262
+ ([Interval(2, 4, closed="both")], [-1]),
263
+ ([Interval(2, 4, closed="neither")], [-1]),
264
+ ([Interval(1, 4, closed="right")], [-1]),
265
+ ([Interval(0, 4, closed="right")], [-1]),
266
+ ([Interval(0.5, 1.5, closed="right")], [-1]),
267
+ ([Interval(2, 4, closed="right"), Interval(0, 1, closed="right")], [1, -1]),
268
+ ([Interval(2, 4, closed="right"), Interval(2, 4, closed="right")], [1, 1]),
269
+ ([Interval(5, 7, closed="right"), Interval(2, 4, closed="right")], [2, 1]),
270
+ ([Interval(2, 4, closed="right"), Interval(2, 4, closed="left")], [1, -1]),
271
+ ],
272
+ )
273
+ def test_get_indexer_with_interval(self, query, expected):
274
+ tuples = [(0, 2), (2, 4), (5, 7)]
275
+ index = IntervalIndex.from_tuples(tuples, closed="right")
276
+
277
+ result = index.get_indexer(query)
278
+ expected = np.array(expected, dtype="intp")
279
+ tm.assert_numpy_array_equal(result, expected)
280
+
281
+ @pytest.mark.parametrize(
282
+ "query, expected",
283
+ [
284
+ ([-0.5], [-1]),
285
+ ([0], [-1]),
286
+ ([0.5], [0]),
287
+ ([1], [0]),
288
+ ([1.5], [1]),
289
+ ([2], [1]),
290
+ ([2.5], [-1]),
291
+ ([3], [-1]),
292
+ ([3.5], [2]),
293
+ ([4], [2]),
294
+ ([4.5], [-1]),
295
+ ([1, 2], [0, 1]),
296
+ ([1, 2, 3], [0, 1, -1]),
297
+ ([1, 2, 3, 4], [0, 1, -1, 2]),
298
+ ([1, 2, 3, 4, 2], [0, 1, -1, 2, 1]),
299
+ ],
300
+ )
301
+ def test_get_indexer_with_int_and_float(self, query, expected):
302
+ tuples = [(0, 1), (1, 2), (3, 4)]
303
+ index = IntervalIndex.from_tuples(tuples, closed="right")
304
+
305
+ result = index.get_indexer(query)
306
+ expected = np.array(expected, dtype="intp")
307
+ tm.assert_numpy_array_equal(result, expected)
308
+
309
+ @pytest.mark.parametrize("item", [[3], np.arange(0.5, 5, 0.5)])
310
+ def test_get_indexer_length_one(self, item, closed):
311
+ # GH 17284
312
+ index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
313
+ result = index.get_indexer(item)
314
+ expected = np.array([0] * len(item), dtype="intp")
315
+ tm.assert_numpy_array_equal(result, expected)
316
+
317
+ @pytest.mark.parametrize("size", [1, 5])
318
+ def test_get_indexer_length_one_interval(self, size, closed):
319
+ # GH 17284
320
+ index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
321
+ result = index.get_indexer([Interval(0, 5, closed)] * size)
322
+ expected = np.array([0] * size, dtype="intp")
323
+ tm.assert_numpy_array_equal(result, expected)
324
+
325
+ @pytest.mark.parametrize(
326
+ "target",
327
+ [
328
+ IntervalIndex.from_tuples([(7, 8), (1, 2), (3, 4), (0, 1)]),
329
+ IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4), np.nan]),
330
+ IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)], closed="both"),
331
+ [-1, 0, 0.5, 1, 2, 2.5, np.nan],
332
+ ["foo", "foo", "bar", "baz"],
333
+ ],
334
+ )
335
+ def test_get_indexer_categorical(self, target, ordered):
336
+ # GH 30063: categorical and non-categorical results should be consistent
337
+ index = IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)])
338
+ categorical_target = CategoricalIndex(target, ordered=ordered)
339
+
340
+ result = index.get_indexer(categorical_target)
341
+ expected = index.get_indexer(target)
342
+ tm.assert_numpy_array_equal(result, expected)
343
+
344
+ @pytest.mark.filterwarnings(
345
+ "ignore:invalid value encountered in cast:RuntimeWarning"
346
+ )
347
+ def test_get_indexer_categorical_with_nans(self):
348
+ # GH#41934 nans in both index and in target
349
+ ii = IntervalIndex.from_breaks(range(5))
350
+ ii2 = ii.append(IntervalIndex([np.nan]))
351
+ ci2 = CategoricalIndex(ii2)
352
+
353
+ result = ii2.get_indexer(ci2)
354
+ expected = np.arange(5, dtype=np.intp)
355
+ tm.assert_numpy_array_equal(result, expected)
356
+
357
+ # not-all-matches
358
+ result = ii2[1:].get_indexer(ci2[::-1])
359
+ expected = np.array([3, 2, 1, 0, -1], dtype=np.intp)
360
+ tm.assert_numpy_array_equal(result, expected)
361
+
362
+ # non-unique target, non-unique nans
363
+ result = ii2.get_indexer(ci2.append(ci2))
364
+ expected = np.array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4], dtype=np.intp)
365
+ tm.assert_numpy_array_equal(result, expected)
366
+
367
+ def test_get_indexer_datetime(self):
368
+ ii = IntervalIndex.from_breaks(date_range("2018-01-01", periods=4))
369
+ # TODO: with mismatched resolution get_indexer currently raises;
370
+ # this should probably coerce?
371
+ target = DatetimeIndex(["2018-01-02"], dtype="M8[ns]")
372
+ result = ii.get_indexer(target)
373
+ expected = np.array([0], dtype=np.intp)
374
+ tm.assert_numpy_array_equal(result, expected)
375
+
376
+ result = ii.get_indexer(target.astype(str))
377
+ tm.assert_numpy_array_equal(result, expected)
378
+
379
+ # https://github.com/pandas-dev/pandas/issues/47772
380
+ result = ii.get_indexer(target.asi8)
381
+ expected = np.array([-1], dtype=np.intp)
382
+ tm.assert_numpy_array_equal(result, expected)
383
+
384
+ @pytest.mark.parametrize(
385
+ "tuples, closed",
386
+ [
387
+ ([(0, 2), (1, 3), (3, 4)], "neither"),
388
+ ([(0, 5), (1, 4), (6, 7)], "left"),
389
+ ([(0, 1), (0, 1), (1, 2)], "right"),
390
+ ([(0, 1), (2, 3), (3, 4)], "both"),
391
+ ],
392
+ )
393
+ def test_get_indexer_errors(self, tuples, closed):
394
+ # IntervalIndex needs non-overlapping for uniqueness when querying
395
+ index = IntervalIndex.from_tuples(tuples, closed=closed)
396
+
397
+ msg = (
398
+ "cannot handle overlapping indices; use "
399
+ "IntervalIndex.get_indexer_non_unique"
400
+ )
401
+ with pytest.raises(InvalidIndexError, match=msg):
402
+ index.get_indexer([0, 2])
403
+
404
+ @pytest.mark.parametrize(
405
+ "query, expected",
406
+ [
407
+ ([-0.5], ([-1], [0])),
408
+ ([0], ([0], [])),
409
+ ([0.5], ([0], [])),
410
+ ([1], ([0, 1], [])),
411
+ ([1.5], ([0, 1], [])),
412
+ ([2], ([0, 1, 2], [])),
413
+ ([2.5], ([1, 2], [])),
414
+ ([3], ([2], [])),
415
+ ([3.5], ([2], [])),
416
+ ([4], ([-1], [0])),
417
+ ([4.5], ([-1], [0])),
418
+ ([1, 2], ([0, 1, 0, 1, 2], [])),
419
+ ([1, 2, 3], ([0, 1, 0, 1, 2, 2], [])),
420
+ ([1, 2, 3, 4], ([0, 1, 0, 1, 2, 2, -1], [3])),
421
+ ([1, 2, 3, 4, 2], ([0, 1, 0, 1, 2, 2, -1, 0, 1, 2], [3])),
422
+ ],
423
+ )
424
+ def test_get_indexer_non_unique_with_int_and_float(self, query, expected):
425
+ tuples = [(0, 2.5), (1, 3), (2, 4)]
426
+ index = IntervalIndex.from_tuples(tuples, closed="left")
427
+
428
+ result_indexer, result_missing = index.get_indexer_non_unique(query)
429
+ expected_indexer = np.array(expected[0], dtype="intp")
430
+ expected_missing = np.array(expected[1], dtype="intp")
431
+
432
+ tm.assert_numpy_array_equal(result_indexer, expected_indexer)
433
+ tm.assert_numpy_array_equal(result_missing, expected_missing)
434
+
435
+ # TODO we may also want to test get_indexer for the case when
436
+ # the intervals are duplicated, decreasing, non-monotonic, etc..
437
+
438
+ def test_get_indexer_non_monotonic(self):
439
+ # GH 16410
440
+ idx1 = IntervalIndex.from_tuples([(2, 3), (4, 5), (0, 1)])
441
+ idx2 = IntervalIndex.from_tuples([(0, 1), (2, 3), (6, 7), (8, 9)])
442
+ result = idx1.get_indexer(idx2)
443
+ expected = np.array([2, 0, -1, -1], dtype=np.intp)
444
+ tm.assert_numpy_array_equal(result, expected)
445
+
446
+ result = idx1.get_indexer(idx1[1:])
447
+ expected = np.array([1, 2], dtype=np.intp)
448
+ tm.assert_numpy_array_equal(result, expected)
449
+
450
+ def test_get_indexer_with_nans(self):
451
+ # GH#41831
452
+ index = IntervalIndex([np.nan, np.nan])
453
+ other = IntervalIndex([np.nan])
454
+
455
+ assert not index._index_as_unique
456
+
457
+ result = index.get_indexer_for(other)
458
+ expected = np.array([0, 1], dtype=np.intp)
459
+ tm.assert_numpy_array_equal(result, expected)
460
+
461
+ def test_get_index_non_unique_non_monotonic(self):
462
+ # GH#44084 (root cause)
463
+ index = IntervalIndex.from_tuples(
464
+ [(0.0, 1.0), (1.0, 2.0), (0.0, 1.0), (1.0, 2.0)]
465
+ )
466
+
467
+ result, _ = index.get_indexer_non_unique([Interval(1.0, 2.0)])
468
+ expected = np.array([1, 3], dtype=np.intp)
469
+ tm.assert_numpy_array_equal(result, expected)
470
+
471
+ def test_get_indexer_multiindex_with_intervals(self):
472
+ # GH#44084 (MultiIndex case as reported)
473
+ interval_index = IntervalIndex.from_tuples(
474
+ [(2.0, 3.0), (0.0, 1.0), (1.0, 2.0)], name="interval"
475
+ )
476
+ foo_index = Index([1, 2, 3], name="foo")
477
+
478
+ multi_index = MultiIndex.from_product([foo_index, interval_index])
479
+
480
+ result = multi_index.get_level_values("interval").get_indexer_for(
481
+ [Interval(0.0, 1.0)]
482
+ )
483
+ expected = np.array([1, 4, 7], dtype=np.intp)
484
+ tm.assert_numpy_array_equal(result, expected)
485
+
486
+ @pytest.mark.parametrize("box", [IntervalIndex, array, list])
487
+ def test_get_indexer_interval_index(self, box):
488
+ # GH#30178
489
+ rng = period_range("2022-07-01", freq="D", periods=3)
490
+ idx = box(interval_range(Timestamp("2022-07-01"), freq="3D", periods=3))
491
+
492
+ actual = rng.get_indexer(idx)
493
+ expected = np.array([-1, -1, -1], dtype=np.intp)
494
+ tm.assert_numpy_array_equal(actual, expected)
495
+
496
+ def test_get_indexer_read_only(self):
497
+ idx = interval_range(start=0, end=5)
498
+ arr = np.array([1, 2])
499
+ arr.flags.writeable = False
500
+ result = idx.get_indexer(arr)
501
+ expected = np.array([0, 1])
502
+ tm.assert_numpy_array_equal(result, expected, check_dtype=False)
503
+
504
+ result = idx.get_indexer_non_unique(arr)[0]
505
+ tm.assert_numpy_array_equal(result, expected, check_dtype=False)
506
+
507
+
508
+ class TestSliceLocs:
509
+ def test_slice_locs_with_interval(self):
510
+ # increasing monotonically
511
+ index = IntervalIndex.from_tuples([(0, 2), (1, 3), (2, 4)])
512
+
513
+ assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3)
514
+ assert index.slice_locs(start=Interval(0, 2)) == (0, 3)
515
+ assert index.slice_locs(end=Interval(2, 4)) == (0, 3)
516
+ assert index.slice_locs(end=Interval(0, 2)) == (0, 1)
517
+ assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 1)
518
+
519
+ # decreasing monotonically
520
+ index = IntervalIndex.from_tuples([(2, 4), (1, 3), (0, 2)])
521
+
522
+ assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (2, 1)
523
+ assert index.slice_locs(start=Interval(0, 2)) == (2, 3)
524
+ assert index.slice_locs(end=Interval(2, 4)) == (0, 1)
525
+ assert index.slice_locs(end=Interval(0, 2)) == (0, 3)
526
+ assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (0, 3)
527
+
528
+ # sorted duplicates
529
+ index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4)])
530
+
531
+ assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3)
532
+ assert index.slice_locs(start=Interval(0, 2)) == (0, 3)
533
+ assert index.slice_locs(end=Interval(2, 4)) == (0, 3)
534
+ assert index.slice_locs(end=Interval(0, 2)) == (0, 2)
535
+ assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 2)
536
+
537
+ # unsorted duplicates
538
+ index = IntervalIndex.from_tuples([(0, 2), (2, 4), (0, 2)])
539
+
540
+ with pytest.raises(
541
+ KeyError,
542
+ match=re.escape(
543
+ '"Cannot get left slice bound for non-unique label: '
544
+ "Interval(0, 2, closed='right')\""
545
+ ),
546
+ ):
547
+ index.slice_locs(start=Interval(0, 2), end=Interval(2, 4))
548
+
549
+ with pytest.raises(
550
+ KeyError,
551
+ match=re.escape(
552
+ '"Cannot get left slice bound for non-unique label: '
553
+ "Interval(0, 2, closed='right')\""
554
+ ),
555
+ ):
556
+ index.slice_locs(start=Interval(0, 2))
557
+
558
+ assert index.slice_locs(end=Interval(2, 4)) == (0, 2)
559
+
560
+ with pytest.raises(
561
+ KeyError,
562
+ match=re.escape(
563
+ '"Cannot get right slice bound for non-unique label: '
564
+ "Interval(0, 2, closed='right')\""
565
+ ),
566
+ ):
567
+ index.slice_locs(end=Interval(0, 2))
568
+
569
+ with pytest.raises(
570
+ KeyError,
571
+ match=re.escape(
572
+ '"Cannot get right slice bound for non-unique label: '
573
+ "Interval(0, 2, closed='right')\""
574
+ ),
575
+ ):
576
+ index.slice_locs(start=Interval(2, 4), end=Interval(0, 2))
577
+
578
+ # another unsorted duplicates
579
+ index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4), (1, 3)])
580
+
581
+ assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3)
582
+ assert index.slice_locs(start=Interval(0, 2)) == (0, 4)
583
+ assert index.slice_locs(end=Interval(2, 4)) == (0, 3)
584
+ assert index.slice_locs(end=Interval(0, 2)) == (0, 2)
585
+ assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 2)
586
+
587
+ def test_slice_locs_with_ints_and_floats_succeeds(self):
588
+ # increasing non-overlapping
589
+ index = IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)])
590
+
591
+ assert index.slice_locs(0, 1) == (0, 1)
592
+ assert index.slice_locs(0, 2) == (0, 2)
593
+ assert index.slice_locs(0, 3) == (0, 2)
594
+ assert index.slice_locs(3, 1) == (2, 1)
595
+ assert index.slice_locs(3, 4) == (2, 3)
596
+ assert index.slice_locs(0, 4) == (0, 3)
597
+
598
+ # decreasing non-overlapping
599
+ index = IntervalIndex.from_tuples([(3, 4), (1, 2), (0, 1)])
600
+ assert index.slice_locs(0, 1) == (3, 3)
601
+ assert index.slice_locs(0, 2) == (3, 2)
602
+ assert index.slice_locs(0, 3) == (3, 1)
603
+ assert index.slice_locs(3, 1) == (1, 3)
604
+ assert index.slice_locs(3, 4) == (1, 1)
605
+ assert index.slice_locs(0, 4) == (3, 1)
606
+
607
+ @pytest.mark.parametrize("query", [[0, 1], [0, 2], [0, 3], [0, 4]])
608
+ @pytest.mark.parametrize(
609
+ "tuples",
610
+ [
611
+ [(0, 2), (1, 3), (2, 4)],
612
+ [(2, 4), (1, 3), (0, 2)],
613
+ [(0, 2), (0, 2), (2, 4)],
614
+ [(0, 2), (2, 4), (0, 2)],
615
+ [(0, 2), (0, 2), (2, 4), (1, 3)],
616
+ ],
617
+ )
618
+ def test_slice_locs_with_ints_and_floats_errors(self, tuples, query):
619
+ start, stop = query
620
+ index = IntervalIndex.from_tuples(tuples)
621
+ with pytest.raises(
622
+ KeyError,
623
+ match=(
624
+ "'can only get slices from an IntervalIndex if bounds are "
625
+ "non-overlapping and all monotonic increasing or decreasing'"
626
+ ),
627
+ ):
628
+ index.slice_locs(start, stop)
629
+
630
+
631
+ class TestPutmask:
632
+ @pytest.mark.parametrize("tz", ["US/Pacific", None])
633
+ def test_putmask_dt64(self, tz):
634
+ # GH#37968
635
+ dti = date_range("2016-01-01", periods=9, tz=tz)
636
+ idx = IntervalIndex.from_breaks(dti)
637
+ mask = np.zeros(idx.shape, dtype=bool)
638
+ mask[0:3] = True
639
+
640
+ result = idx.putmask(mask, idx[-1])
641
+ expected = IntervalIndex([idx[-1]] * 3 + list(idx[3:]))
642
+ tm.assert_index_equal(result, expected)
643
+
644
+ def test_putmask_td64(self):
645
+ # GH#37968
646
+ dti = date_range("2016-01-01", periods=9)
647
+ tdi = dti - dti[0]
648
+ idx = IntervalIndex.from_breaks(tdi)
649
+ mask = np.zeros(idx.shape, dtype=bool)
650
+ mask[0:3] = True
651
+
652
+ result = idx.putmask(mask, idx[-1])
653
+ expected = IntervalIndex([idx[-1]] * 3 + list(idx[3:]))
654
+ tm.assert_index_equal(result, expected)
655
+
656
+
657
+ class TestContains:
658
+ # .__contains__, not .contains
659
+
660
+ def test_contains_dunder(self):
661
+ index = IntervalIndex.from_arrays([0, 1], [1, 2], closed="right")
662
+
663
+ # __contains__ requires perfect matches to intervals.
664
+ assert 0 not in index
665
+ assert 1 not in index
666
+ assert 2 not in index
667
+
668
+ assert Interval(0, 1, closed="right") in index
669
+ assert Interval(0, 2, closed="right") not in index
670
+ assert Interval(0, 0.5, closed="right") not in index
671
+ assert Interval(3, 5, closed="right") not in index
672
+ assert Interval(-1, 0, closed="left") not in index
673
+ assert Interval(0, 1, closed="left") not in index
674
+ assert Interval(0, 1, closed="both") not in index
py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_interval.py ADDED
@@ -0,0 +1,918 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from itertools import permutations
2
+ import re
3
+
4
+ import numpy as np
5
+ import pytest
6
+
7
+ import pandas as pd
8
+ from pandas import (
9
+ Index,
10
+ Interval,
11
+ IntervalIndex,
12
+ Timedelta,
13
+ Timestamp,
14
+ date_range,
15
+ interval_range,
16
+ isna,
17
+ notna,
18
+ timedelta_range,
19
+ )
20
+ import pandas._testing as tm
21
+ import pandas.core.common as com
22
+
23
+
24
+ @pytest.fixture(params=[None, "foo"])
25
+ def name(request):
26
+ return request.param
27
+
28
+
29
+ class TestIntervalIndex:
30
+ index = IntervalIndex.from_arrays([0, 1], [1, 2])
31
+
32
+ def create_index(self, closed="right"):
33
+ return IntervalIndex.from_breaks(range(11), closed=closed)
34
+
35
+ def create_index_with_nan(self, closed="right"):
36
+ mask = [True, False] + [True] * 8
37
+ return IntervalIndex.from_arrays(
38
+ np.where(mask, np.arange(10), np.nan),
39
+ np.where(mask, np.arange(1, 11), np.nan),
40
+ closed=closed,
41
+ )
42
+
43
+ def test_properties(self, closed):
44
+ index = self.create_index(closed=closed)
45
+ assert len(index) == 10
46
+ assert index.size == 10
47
+ assert index.shape == (10,)
48
+
49
+ tm.assert_index_equal(index.left, Index(np.arange(10, dtype=np.int64)))
50
+ tm.assert_index_equal(index.right, Index(np.arange(1, 11, dtype=np.int64)))
51
+ tm.assert_index_equal(index.mid, Index(np.arange(0.5, 10.5, dtype=np.float64)))
52
+
53
+ assert index.closed == closed
54
+
55
+ ivs = [
56
+ Interval(left, right, closed)
57
+ for left, right in zip(range(10), range(1, 11))
58
+ ]
59
+ expected = np.array(ivs, dtype=object)
60
+ tm.assert_numpy_array_equal(np.asarray(index), expected)
61
+
62
+ # with nans
63
+ index = self.create_index_with_nan(closed=closed)
64
+ assert len(index) == 10
65
+ assert index.size == 10
66
+ assert index.shape == (10,)
67
+
68
+ expected_left = Index([0, np.nan, 2, 3, 4, 5, 6, 7, 8, 9])
69
+ expected_right = expected_left + 1
70
+ expected_mid = expected_left + 0.5
71
+ tm.assert_index_equal(index.left, expected_left)
72
+ tm.assert_index_equal(index.right, expected_right)
73
+ tm.assert_index_equal(index.mid, expected_mid)
74
+
75
+ assert index.closed == closed
76
+
77
+ ivs = [
78
+ Interval(left, right, closed) if notna(left) else np.nan
79
+ for left, right in zip(expected_left, expected_right)
80
+ ]
81
+ expected = np.array(ivs, dtype=object)
82
+ tm.assert_numpy_array_equal(np.asarray(index), expected)
83
+
84
+ @pytest.mark.parametrize(
85
+ "breaks",
86
+ [
87
+ [1, 1, 2, 5, 15, 53, 217, 1014, 5335, 31240, 201608],
88
+ [-np.inf, -100, -10, 0.5, 1, 1.5, 3.8, 101, 202, np.inf],
89
+ date_range("2017-01-01", "2017-01-04"),
90
+ pytest.param(
91
+ date_range("2017-01-01", "2017-01-04", unit="s"),
92
+ marks=pytest.mark.xfail(reason="mismatched result unit"),
93
+ ),
94
+ pd.to_timedelta(["1ns", "2ms", "3s", "4min", "5h", "6D"]),
95
+ ],
96
+ )
97
+ def test_length(self, closed, breaks):
98
+ # GH 18789
99
+ index = IntervalIndex.from_breaks(breaks, closed=closed)
100
+ result = index.length
101
+ expected = Index(iv.length for iv in index)
102
+ tm.assert_index_equal(result, expected)
103
+
104
+ # with NA
105
+ index = index.insert(1, np.nan)
106
+ result = index.length
107
+ expected = Index(iv.length if notna(iv) else iv for iv in index)
108
+ tm.assert_index_equal(result, expected)
109
+
110
+ def test_with_nans(self, closed):
111
+ index = self.create_index(closed=closed)
112
+ assert index.hasnans is False
113
+
114
+ result = index.isna()
115
+ expected = np.zeros(len(index), dtype=bool)
116
+ tm.assert_numpy_array_equal(result, expected)
117
+
118
+ result = index.notna()
119
+ expected = np.ones(len(index), dtype=bool)
120
+ tm.assert_numpy_array_equal(result, expected)
121
+
122
+ index = self.create_index_with_nan(closed=closed)
123
+ assert index.hasnans is True
124
+
125
+ result = index.isna()
126
+ expected = np.array([False, True] + [False] * (len(index) - 2))
127
+ tm.assert_numpy_array_equal(result, expected)
128
+
129
+ result = index.notna()
130
+ expected = np.array([True, False] + [True] * (len(index) - 2))
131
+ tm.assert_numpy_array_equal(result, expected)
132
+
133
+ def test_copy(self, closed):
134
+ expected = self.create_index(closed=closed)
135
+
136
+ result = expected.copy()
137
+ assert result.equals(expected)
138
+
139
+ result = expected.copy(deep=True)
140
+ assert result.equals(expected)
141
+ assert result.left is not expected.left
142
+
143
+ def test_ensure_copied_data(self, closed):
144
+ # exercise the copy flag in the constructor
145
+
146
+ # not copying
147
+ index = self.create_index(closed=closed)
148
+ result = IntervalIndex(index, copy=False)
149
+ tm.assert_numpy_array_equal(
150
+ index.left.values, result.left.values, check_same="same"
151
+ )
152
+ tm.assert_numpy_array_equal(
153
+ index.right.values, result.right.values, check_same="same"
154
+ )
155
+
156
+ # by-definition make a copy
157
+ result = IntervalIndex(np.array(index), copy=False)
158
+ tm.assert_numpy_array_equal(
159
+ index.left.values, result.left.values, check_same="copy"
160
+ )
161
+ tm.assert_numpy_array_equal(
162
+ index.right.values, result.right.values, check_same="copy"
163
+ )
164
+
165
+ def test_delete(self, closed):
166
+ breaks = np.arange(1, 11, dtype=np.int64)
167
+ expected = IntervalIndex.from_breaks(breaks, closed=closed)
168
+ result = self.create_index(closed=closed).delete(0)
169
+ tm.assert_index_equal(result, expected)
170
+
171
+ @pytest.mark.parametrize(
172
+ "data",
173
+ [
174
+ interval_range(0, periods=10, closed="neither"),
175
+ interval_range(1.7, periods=8, freq=2.5, closed="both"),
176
+ interval_range(Timestamp("20170101"), periods=12, closed="left"),
177
+ interval_range(Timedelta("1 day"), periods=6, closed="right"),
178
+ ],
179
+ )
180
+ def test_insert(self, data):
181
+ item = data[0]
182
+ idx_item = IntervalIndex([item])
183
+
184
+ # start
185
+ expected = idx_item.append(data)
186
+ result = data.insert(0, item)
187
+ tm.assert_index_equal(result, expected)
188
+
189
+ # end
190
+ expected = data.append(idx_item)
191
+ result = data.insert(len(data), item)
192
+ tm.assert_index_equal(result, expected)
193
+
194
+ # mid
195
+ expected = data[:3].append(idx_item).append(data[3:])
196
+ result = data.insert(3, item)
197
+ tm.assert_index_equal(result, expected)
198
+
199
+ # invalid type
200
+ res = data.insert(1, "foo")
201
+ expected = data.astype(object).insert(1, "foo")
202
+ tm.assert_index_equal(res, expected)
203
+
204
+ msg = "can only insert Interval objects and NA into an IntervalArray"
205
+ with pytest.raises(TypeError, match=msg):
206
+ data._data.insert(1, "foo")
207
+
208
+ # invalid closed
209
+ msg = "'value.closed' is 'left', expected 'right'."
210
+ for closed in {"left", "right", "both", "neither"} - {item.closed}:
211
+ msg = f"'value.closed' is '{closed}', expected '{item.closed}'."
212
+ bad_item = Interval(item.left, item.right, closed=closed)
213
+ res = data.insert(1, bad_item)
214
+ expected = data.astype(object).insert(1, bad_item)
215
+ tm.assert_index_equal(res, expected)
216
+ with pytest.raises(ValueError, match=msg):
217
+ data._data.insert(1, bad_item)
218
+
219
+ # GH 18295 (test missing)
220
+ na_idx = IntervalIndex([np.nan], closed=data.closed)
221
+ for na in [np.nan, None, pd.NA]:
222
+ expected = data[:1].append(na_idx).append(data[1:])
223
+ result = data.insert(1, na)
224
+ tm.assert_index_equal(result, expected)
225
+
226
+ if data.left.dtype.kind not in ["m", "M"]:
227
+ # trying to insert pd.NaT into a numeric-dtyped Index should cast
228
+ expected = data.astype(object).insert(1, pd.NaT)
229
+
230
+ msg = "can only insert Interval objects and NA into an IntervalArray"
231
+ with pytest.raises(TypeError, match=msg):
232
+ data._data.insert(1, pd.NaT)
233
+
234
+ result = data.insert(1, pd.NaT)
235
+ tm.assert_index_equal(result, expected)
236
+
237
+ def test_is_unique_interval(self, closed):
238
+ """
239
+ Interval specific tests for is_unique in addition to base class tests
240
+ """
241
+ # unique overlapping - distinct endpoints
242
+ idx = IntervalIndex.from_tuples([(0, 1), (0.5, 1.5)], closed=closed)
243
+ assert idx.is_unique is True
244
+
245
+ # unique overlapping - shared endpoints
246
+ idx = IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed)
247
+ assert idx.is_unique is True
248
+
249
+ # unique nested
250
+ idx = IntervalIndex.from_tuples([(-1, 1), (-2, 2)], closed=closed)
251
+ assert idx.is_unique is True
252
+
253
+ # unique NaN
254
+ idx = IntervalIndex.from_tuples([(np.nan, np.nan)], closed=closed)
255
+ assert idx.is_unique is True
256
+
257
+ # non-unique NaN
258
+ idx = IntervalIndex.from_tuples(
259
+ [(np.nan, np.nan), (np.nan, np.nan)], closed=closed
260
+ )
261
+ assert idx.is_unique is False
262
+
263
+ def test_monotonic(self, closed):
264
+ # increasing non-overlapping
265
+ idx = IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)], closed=closed)
266
+ assert idx.is_monotonic_increasing is True
267
+ assert idx._is_strictly_monotonic_increasing is True
268
+ assert idx.is_monotonic_decreasing is False
269
+ assert idx._is_strictly_monotonic_decreasing is False
270
+
271
+ # decreasing non-overlapping
272
+ idx = IntervalIndex.from_tuples([(4, 5), (2, 3), (1, 2)], closed=closed)
273
+ assert idx.is_monotonic_increasing is False
274
+ assert idx._is_strictly_monotonic_increasing is False
275
+ assert idx.is_monotonic_decreasing is True
276
+ assert idx._is_strictly_monotonic_decreasing is True
277
+
278
+ # unordered non-overlapping
279
+ idx = IntervalIndex.from_tuples([(0, 1), (4, 5), (2, 3)], closed=closed)
280
+ assert idx.is_monotonic_increasing is False
281
+ assert idx._is_strictly_monotonic_increasing is False
282
+ assert idx.is_monotonic_decreasing is False
283
+ assert idx._is_strictly_monotonic_decreasing is False
284
+
285
+ # increasing overlapping
286
+ idx = IntervalIndex.from_tuples([(0, 2), (0.5, 2.5), (1, 3)], closed=closed)
287
+ assert idx.is_monotonic_increasing is True
288
+ assert idx._is_strictly_monotonic_increasing is True
289
+ assert idx.is_monotonic_decreasing is False
290
+ assert idx._is_strictly_monotonic_decreasing is False
291
+
292
+ # decreasing overlapping
293
+ idx = IntervalIndex.from_tuples([(1, 3), (0.5, 2.5), (0, 2)], closed=closed)
294
+ assert idx.is_monotonic_increasing is False
295
+ assert idx._is_strictly_monotonic_increasing is False
296
+ assert idx.is_monotonic_decreasing is True
297
+ assert idx._is_strictly_monotonic_decreasing is True
298
+
299
+ # unordered overlapping
300
+ idx = IntervalIndex.from_tuples([(0.5, 2.5), (0, 2), (1, 3)], closed=closed)
301
+ assert idx.is_monotonic_increasing is False
302
+ assert idx._is_strictly_monotonic_increasing is False
303
+ assert idx.is_monotonic_decreasing is False
304
+ assert idx._is_strictly_monotonic_decreasing is False
305
+
306
+ # increasing overlapping shared endpoints
307
+ idx = IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed)
308
+ assert idx.is_monotonic_increasing is True
309
+ assert idx._is_strictly_monotonic_increasing is True
310
+ assert idx.is_monotonic_decreasing is False
311
+ assert idx._is_strictly_monotonic_decreasing is False
312
+
313
+ # decreasing overlapping shared endpoints
314
+ idx = IntervalIndex.from_tuples([(2, 3), (1, 3), (1, 2)], closed=closed)
315
+ assert idx.is_monotonic_increasing is False
316
+ assert idx._is_strictly_monotonic_increasing is False
317
+ assert idx.is_monotonic_decreasing is True
318
+ assert idx._is_strictly_monotonic_decreasing is True
319
+
320
+ # stationary
321
+ idx = IntervalIndex.from_tuples([(0, 1), (0, 1)], closed=closed)
322
+ assert idx.is_monotonic_increasing is True
323
+ assert idx._is_strictly_monotonic_increasing is False
324
+ assert idx.is_monotonic_decreasing is True
325
+ assert idx._is_strictly_monotonic_decreasing is False
326
+
327
+ # empty
328
+ idx = IntervalIndex([], closed=closed)
329
+ assert idx.is_monotonic_increasing is True
330
+ assert idx._is_strictly_monotonic_increasing is True
331
+ assert idx.is_monotonic_decreasing is True
332
+ assert idx._is_strictly_monotonic_decreasing is True
333
+
334
+ def test_is_monotonic_with_nans(self):
335
+ # GH#41831
336
+ index = IntervalIndex([np.nan, np.nan])
337
+
338
+ assert not index.is_monotonic_increasing
339
+ assert not index._is_strictly_monotonic_increasing
340
+ assert not index.is_monotonic_increasing
341
+ assert not index._is_strictly_monotonic_decreasing
342
+ assert not index.is_monotonic_decreasing
343
+
344
+ @pytest.mark.parametrize(
345
+ "breaks",
346
+ [
347
+ date_range("20180101", periods=4),
348
+ date_range("20180101", periods=4, tz="US/Eastern"),
349
+ timedelta_range("0 days", periods=4),
350
+ ],
351
+ ids=lambda x: str(x.dtype),
352
+ )
353
+ def test_maybe_convert_i8(self, breaks):
354
+ # GH 20636
355
+ index = IntervalIndex.from_breaks(breaks)
356
+
357
+ # intervalindex
358
+ result = index._maybe_convert_i8(index)
359
+ expected = IntervalIndex.from_breaks(breaks.asi8)
360
+ tm.assert_index_equal(result, expected)
361
+
362
+ # interval
363
+ interval = Interval(breaks[0], breaks[1])
364
+ result = index._maybe_convert_i8(interval)
365
+ expected = Interval(breaks[0]._value, breaks[1]._value)
366
+ assert result == expected
367
+
368
+ # datetimelike index
369
+ result = index._maybe_convert_i8(breaks)
370
+ expected = Index(breaks.asi8)
371
+ tm.assert_index_equal(result, expected)
372
+
373
+ # datetimelike scalar
374
+ result = index._maybe_convert_i8(breaks[0])
375
+ expected = breaks[0]._value
376
+ assert result == expected
377
+
378
+ # list-like of datetimelike scalars
379
+ result = index._maybe_convert_i8(list(breaks))
380
+ expected = Index(breaks.asi8)
381
+ tm.assert_index_equal(result, expected)
382
+
383
+ @pytest.mark.parametrize(
384
+ "breaks",
385
+ [date_range("2018-01-01", periods=5), timedelta_range("0 days", periods=5)],
386
+ )
387
+ def test_maybe_convert_i8_nat(self, breaks):
388
+ # GH 20636
389
+ index = IntervalIndex.from_breaks(breaks)
390
+
391
+ to_convert = breaks._constructor([pd.NaT] * 3).as_unit("ns")
392
+ expected = Index([np.nan] * 3, dtype=np.float64)
393
+ result = index._maybe_convert_i8(to_convert)
394
+ tm.assert_index_equal(result, expected)
395
+
396
+ to_convert = to_convert.insert(0, breaks[0])
397
+ expected = expected.insert(0, float(breaks[0]._value))
398
+ result = index._maybe_convert_i8(to_convert)
399
+ tm.assert_index_equal(result, expected)
400
+
401
+ @pytest.mark.parametrize(
402
+ "make_key",
403
+ [lambda breaks: breaks, list],
404
+ ids=["lambda", "list"],
405
+ )
406
+ def test_maybe_convert_i8_numeric(self, make_key, any_real_numpy_dtype):
407
+ # GH 20636
408
+ breaks = np.arange(5, dtype=any_real_numpy_dtype)
409
+ index = IntervalIndex.from_breaks(breaks)
410
+ key = make_key(breaks)
411
+
412
+ result = index._maybe_convert_i8(key)
413
+ kind = breaks.dtype.kind
414
+ expected_dtype = {"i": np.int64, "u": np.uint64, "f": np.float64}[kind]
415
+ expected = Index(key, dtype=expected_dtype)
416
+ tm.assert_index_equal(result, expected)
417
+
418
+ @pytest.mark.parametrize(
419
+ "make_key",
420
+ [
421
+ IntervalIndex.from_breaks,
422
+ lambda breaks: Interval(breaks[0], breaks[1]),
423
+ lambda breaks: breaks[0],
424
+ ],
425
+ ids=["IntervalIndex", "Interval", "scalar"],
426
+ )
427
+ def test_maybe_convert_i8_numeric_identical(self, make_key, any_real_numpy_dtype):
428
+ # GH 20636
429
+ breaks = np.arange(5, dtype=any_real_numpy_dtype)
430
+ index = IntervalIndex.from_breaks(breaks)
431
+ key = make_key(breaks)
432
+
433
+ # test if _maybe_convert_i8 won't change key if an Interval or IntervalIndex
434
+ result = index._maybe_convert_i8(key)
435
+ assert result is key
436
+
437
+ @pytest.mark.parametrize(
438
+ "breaks1, breaks2",
439
+ permutations(
440
+ [
441
+ date_range("20180101", periods=4),
442
+ date_range("20180101", periods=4, tz="US/Eastern"),
443
+ timedelta_range("0 days", periods=4),
444
+ ],
445
+ 2,
446
+ ),
447
+ ids=lambda x: str(x.dtype),
448
+ )
449
+ @pytest.mark.parametrize(
450
+ "make_key",
451
+ [
452
+ IntervalIndex.from_breaks,
453
+ lambda breaks: Interval(breaks[0], breaks[1]),
454
+ lambda breaks: breaks,
455
+ lambda breaks: breaks[0],
456
+ list,
457
+ ],
458
+ ids=["IntervalIndex", "Interval", "Index", "scalar", "list"],
459
+ )
460
+ def test_maybe_convert_i8_errors(self, breaks1, breaks2, make_key):
461
+ # GH 20636
462
+ index = IntervalIndex.from_breaks(breaks1)
463
+ key = make_key(breaks2)
464
+
465
+ msg = (
466
+ f"Cannot index an IntervalIndex of subtype {breaks1.dtype} with "
467
+ f"values of dtype {breaks2.dtype}"
468
+ )
469
+ msg = re.escape(msg)
470
+ with pytest.raises(ValueError, match=msg):
471
+ index._maybe_convert_i8(key)
472
+
473
+ def test_contains_method(self):
474
+ # can select values that are IN the range of a value
475
+ i = IntervalIndex.from_arrays([0, 1], [1, 2])
476
+
477
+ expected = np.array([False, False], dtype="bool")
478
+ actual = i.contains(0)
479
+ tm.assert_numpy_array_equal(actual, expected)
480
+ actual = i.contains(3)
481
+ tm.assert_numpy_array_equal(actual, expected)
482
+
483
+ expected = np.array([True, False], dtype="bool")
484
+ actual = i.contains(0.5)
485
+ tm.assert_numpy_array_equal(actual, expected)
486
+ actual = i.contains(1)
487
+ tm.assert_numpy_array_equal(actual, expected)
488
+
489
+ # __contains__ not implemented for "interval in interval", follow
490
+ # that for the contains method for now
491
+ with pytest.raises(
492
+ NotImplementedError, match="contains not implemented for two"
493
+ ):
494
+ i.contains(Interval(0, 1))
495
+
496
+ def test_dropna(self, closed):
497
+ expected = IntervalIndex.from_tuples([(0.0, 1.0), (1.0, 2.0)], closed=closed)
498
+
499
+ ii = IntervalIndex.from_tuples([(0, 1), (1, 2), np.nan], closed=closed)
500
+ result = ii.dropna()
501
+ tm.assert_index_equal(result, expected)
502
+
503
+ ii = IntervalIndex.from_arrays([0, 1, np.nan], [1, 2, np.nan], closed=closed)
504
+ result = ii.dropna()
505
+ tm.assert_index_equal(result, expected)
506
+
507
+ def test_non_contiguous(self, closed):
508
+ index = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed)
509
+ target = [0.5, 1.5, 2.5]
510
+ actual = index.get_indexer(target)
511
+ expected = np.array([0, -1, 1], dtype="intp")
512
+ tm.assert_numpy_array_equal(actual, expected)
513
+
514
+ assert 1.5 not in index
515
+
516
+ def test_isin(self, closed):
517
+ index = self.create_index(closed=closed)
518
+
519
+ expected = np.array([True] + [False] * (len(index) - 1))
520
+ result = index.isin(index[:1])
521
+ tm.assert_numpy_array_equal(result, expected)
522
+
523
+ result = index.isin([index[0]])
524
+ tm.assert_numpy_array_equal(result, expected)
525
+
526
+ other = IntervalIndex.from_breaks(np.arange(-2, 10), closed=closed)
527
+ expected = np.array([True] * (len(index) - 1) + [False])
528
+ result = index.isin(other)
529
+ tm.assert_numpy_array_equal(result, expected)
530
+
531
+ result = index.isin(other.tolist())
532
+ tm.assert_numpy_array_equal(result, expected)
533
+
534
+ for other_closed in ["right", "left", "both", "neither"]:
535
+ other = self.create_index(closed=other_closed)
536
+ expected = np.repeat(closed == other_closed, len(index))
537
+ result = index.isin(other)
538
+ tm.assert_numpy_array_equal(result, expected)
539
+
540
+ result = index.isin(other.tolist())
541
+ tm.assert_numpy_array_equal(result, expected)
542
+
543
+ def test_comparison(self):
544
+ actual = Interval(0, 1) < self.index
545
+ expected = np.array([False, True])
546
+ tm.assert_numpy_array_equal(actual, expected)
547
+
548
+ actual = Interval(0.5, 1.5) < self.index
549
+ expected = np.array([False, True])
550
+ tm.assert_numpy_array_equal(actual, expected)
551
+ actual = self.index > Interval(0.5, 1.5)
552
+ tm.assert_numpy_array_equal(actual, expected)
553
+
554
+ actual = self.index == self.index
555
+ expected = np.array([True, True])
556
+ tm.assert_numpy_array_equal(actual, expected)
557
+ actual = self.index <= self.index
558
+ tm.assert_numpy_array_equal(actual, expected)
559
+ actual = self.index >= self.index
560
+ tm.assert_numpy_array_equal(actual, expected)
561
+
562
+ actual = self.index < self.index
563
+ expected = np.array([False, False])
564
+ tm.assert_numpy_array_equal(actual, expected)
565
+ actual = self.index > self.index
566
+ tm.assert_numpy_array_equal(actual, expected)
567
+
568
+ actual = self.index == IntervalIndex.from_breaks([0, 1, 2], "left")
569
+ tm.assert_numpy_array_equal(actual, expected)
570
+
571
+ actual = self.index == self.index.values
572
+ tm.assert_numpy_array_equal(actual, np.array([True, True]))
573
+ actual = self.index.values == self.index
574
+ tm.assert_numpy_array_equal(actual, np.array([True, True]))
575
+ actual = self.index <= self.index.values
576
+ tm.assert_numpy_array_equal(actual, np.array([True, True]))
577
+ actual = self.index != self.index.values
578
+ tm.assert_numpy_array_equal(actual, np.array([False, False]))
579
+ actual = self.index > self.index.values
580
+ tm.assert_numpy_array_equal(actual, np.array([False, False]))
581
+ actual = self.index.values > self.index
582
+ tm.assert_numpy_array_equal(actual, np.array([False, False]))
583
+
584
+ # invalid comparisons
585
+ actual = self.index == 0
586
+ tm.assert_numpy_array_equal(actual, np.array([False, False]))
587
+ actual = self.index == self.index.left
588
+ tm.assert_numpy_array_equal(actual, np.array([False, False]))
589
+
590
+ msg = "|".join(
591
+ [
592
+ "not supported between instances of 'int' and '.*.Interval'",
593
+ r"Invalid comparison between dtype=interval\[int64, right\] and ",
594
+ ]
595
+ )
596
+ with pytest.raises(TypeError, match=msg):
597
+ self.index > 0
598
+ with pytest.raises(TypeError, match=msg):
599
+ self.index <= 0
600
+ with pytest.raises(TypeError, match=msg):
601
+ self.index > np.arange(2)
602
+
603
+ msg = "Lengths must match to compare"
604
+ with pytest.raises(ValueError, match=msg):
605
+ self.index > np.arange(3)
606
+
607
+ def test_missing_values(self, closed):
608
+ idx = Index(
609
+ [np.nan, Interval(0, 1, closed=closed), Interval(1, 2, closed=closed)]
610
+ )
611
+ idx2 = IntervalIndex.from_arrays([np.nan, 0, 1], [np.nan, 1, 2], closed=closed)
612
+ assert idx.equals(idx2)
613
+
614
+ msg = (
615
+ "missing values must be missing in the same location both left "
616
+ "and right sides"
617
+ )
618
+ with pytest.raises(ValueError, match=msg):
619
+ IntervalIndex.from_arrays(
620
+ [np.nan, 0, 1], np.array([0, 1, 2]), closed=closed
621
+ )
622
+
623
+ tm.assert_numpy_array_equal(isna(idx), np.array([True, False, False]))
624
+
625
+ def test_sort_values(self, closed):
626
+ index = self.create_index(closed=closed)
627
+
628
+ result = index.sort_values()
629
+ tm.assert_index_equal(result, index)
630
+
631
+ result = index.sort_values(ascending=False)
632
+ tm.assert_index_equal(result, index[::-1])
633
+
634
+ # with nan
635
+ index = IntervalIndex([Interval(1, 2), np.nan, Interval(0, 1)])
636
+
637
+ result = index.sort_values()
638
+ expected = IntervalIndex([Interval(0, 1), Interval(1, 2), np.nan])
639
+ tm.assert_index_equal(result, expected)
640
+
641
+ result = index.sort_values(ascending=False, na_position="first")
642
+ expected = IntervalIndex([np.nan, Interval(1, 2), Interval(0, 1)])
643
+ tm.assert_index_equal(result, expected)
644
+
645
+ @pytest.mark.parametrize("tz", [None, "US/Eastern"])
646
+ def test_datetime(self, tz):
647
+ start = Timestamp("2000-01-01", tz=tz)
648
+ dates = date_range(start=start, periods=10)
649
+ index = IntervalIndex.from_breaks(dates)
650
+
651
+ # test mid
652
+ start = Timestamp("2000-01-01T12:00", tz=tz)
653
+ expected = date_range(start=start, periods=9)
654
+ tm.assert_index_equal(index.mid, expected)
655
+
656
+ # __contains__ doesn't check individual points
657
+ assert Timestamp("2000-01-01", tz=tz) not in index
658
+ assert Timestamp("2000-01-01T12", tz=tz) not in index
659
+ assert Timestamp("2000-01-02", tz=tz) not in index
660
+ iv_true = Interval(
661
+ Timestamp("2000-01-02", tz=tz), Timestamp("2000-01-03", tz=tz)
662
+ )
663
+ iv_false = Interval(
664
+ Timestamp("1999-12-31", tz=tz), Timestamp("2000-01-01", tz=tz)
665
+ )
666
+ assert iv_true in index
667
+ assert iv_false not in index
668
+
669
+ # .contains does check individual points
670
+ assert not index.contains(Timestamp("2000-01-01", tz=tz)).any()
671
+ assert index.contains(Timestamp("2000-01-01T12", tz=tz)).any()
672
+ assert index.contains(Timestamp("2000-01-02", tz=tz)).any()
673
+
674
+ # test get_indexer
675
+ start = Timestamp("1999-12-31T12:00", tz=tz)
676
+ target = date_range(start=start, periods=7, freq="12h")
677
+ actual = index.get_indexer(target)
678
+ expected = np.array([-1, -1, 0, 0, 1, 1, 2], dtype="intp")
679
+ tm.assert_numpy_array_equal(actual, expected)
680
+
681
+ start = Timestamp("2000-01-08T18:00", tz=tz)
682
+ target = date_range(start=start, periods=7, freq="6h")
683
+ actual = index.get_indexer(target)
684
+ expected = np.array([7, 7, 8, 8, 8, 8, -1], dtype="intp")
685
+ tm.assert_numpy_array_equal(actual, expected)
686
+
687
+ def test_append(self, closed):
688
+ index1 = IntervalIndex.from_arrays([0, 1], [1, 2], closed=closed)
689
+ index2 = IntervalIndex.from_arrays([1, 2], [2, 3], closed=closed)
690
+
691
+ result = index1.append(index2)
692
+ expected = IntervalIndex.from_arrays([0, 1, 1, 2], [1, 2, 2, 3], closed=closed)
693
+ tm.assert_index_equal(result, expected)
694
+
695
+ result = index1.append([index1, index2])
696
+ expected = IntervalIndex.from_arrays(
697
+ [0, 1, 0, 1, 1, 2], [1, 2, 1, 2, 2, 3], closed=closed
698
+ )
699
+ tm.assert_index_equal(result, expected)
700
+
701
+ for other_closed in {"left", "right", "both", "neither"} - {closed}:
702
+ index_other_closed = IntervalIndex.from_arrays(
703
+ [0, 1], [1, 2], closed=other_closed
704
+ )
705
+ result = index1.append(index_other_closed)
706
+ expected = index1.astype(object).append(index_other_closed.astype(object))
707
+ tm.assert_index_equal(result, expected)
708
+
709
+ def test_is_non_overlapping_monotonic(self, closed):
710
+ # Should be True in all cases
711
+ tpls = [(0, 1), (2, 3), (4, 5), (6, 7)]
712
+ idx = IntervalIndex.from_tuples(tpls, closed=closed)
713
+ assert idx.is_non_overlapping_monotonic is True
714
+
715
+ idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
716
+ assert idx.is_non_overlapping_monotonic is True
717
+
718
+ # Should be False in all cases (overlapping)
719
+ tpls = [(0, 2), (1, 3), (4, 5), (6, 7)]
720
+ idx = IntervalIndex.from_tuples(tpls, closed=closed)
721
+ assert idx.is_non_overlapping_monotonic is False
722
+
723
+ idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
724
+ assert idx.is_non_overlapping_monotonic is False
725
+
726
+ # Should be False in all cases (non-monotonic)
727
+ tpls = [(0, 1), (2, 3), (6, 7), (4, 5)]
728
+ idx = IntervalIndex.from_tuples(tpls, closed=closed)
729
+ assert idx.is_non_overlapping_monotonic is False
730
+
731
+ idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
732
+ assert idx.is_non_overlapping_monotonic is False
733
+
734
+ # Should be False for closed='both', otherwise True (GH16560)
735
+ if closed == "both":
736
+ idx = IntervalIndex.from_breaks(range(4), closed=closed)
737
+ assert idx.is_non_overlapping_monotonic is False
738
+ else:
739
+ idx = IntervalIndex.from_breaks(range(4), closed=closed)
740
+ assert idx.is_non_overlapping_monotonic is True
741
+
742
+ @pytest.mark.parametrize(
743
+ "start, shift, na_value",
744
+ [
745
+ (0, 1, np.nan),
746
+ (Timestamp("2018-01-01"), Timedelta("1 day"), pd.NaT),
747
+ (Timedelta("0 days"), Timedelta("1 day"), pd.NaT),
748
+ ],
749
+ )
750
+ def test_is_overlapping(self, start, shift, na_value, closed):
751
+ # GH 23309
752
+ # see test_interval_tree.py for extensive tests; interface tests here
753
+
754
+ # non-overlapping
755
+ tuples = [(start + n * shift, start + (n + 1) * shift) for n in (0, 2, 4)]
756
+ index = IntervalIndex.from_tuples(tuples, closed=closed)
757
+ assert index.is_overlapping is False
758
+
759
+ # non-overlapping with NA
760
+ tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
761
+ index = IntervalIndex.from_tuples(tuples, closed=closed)
762
+ assert index.is_overlapping is False
763
+
764
+ # overlapping
765
+ tuples = [(start + n * shift, start + (n + 2) * shift) for n in range(3)]
766
+ index = IntervalIndex.from_tuples(tuples, closed=closed)
767
+ assert index.is_overlapping is True
768
+
769
+ # overlapping with NA
770
+ tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
771
+ index = IntervalIndex.from_tuples(tuples, closed=closed)
772
+ assert index.is_overlapping is True
773
+
774
+ # common endpoints
775
+ tuples = [(start + n * shift, start + (n + 1) * shift) for n in range(3)]
776
+ index = IntervalIndex.from_tuples(tuples, closed=closed)
777
+ result = index.is_overlapping
778
+ expected = closed == "both"
779
+ assert result is expected
780
+
781
+ # common endpoints with NA
782
+ tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
783
+ index = IntervalIndex.from_tuples(tuples, closed=closed)
784
+ result = index.is_overlapping
785
+ assert result is expected
786
+
787
+ # intervals with duplicate left values
788
+ a = [10, 15, 20, 25, 30, 35, 40, 45, 45, 50, 55, 60, 65, 70, 75, 80, 85]
789
+ b = [15, 20, 25, 30, 35, 40, 45, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90]
790
+ index = IntervalIndex.from_arrays(a, b, closed="right")
791
+ result = index.is_overlapping
792
+ assert result is False
793
+
794
+ @pytest.mark.parametrize(
795
+ "tuples",
796
+ [
797
+ list(zip(range(10), range(1, 11))),
798
+ list(
799
+ zip(
800
+ date_range("20170101", periods=10),
801
+ date_range("20170101", periods=10),
802
+ )
803
+ ),
804
+ list(
805
+ zip(
806
+ timedelta_range("0 days", periods=10),
807
+ timedelta_range("1 day", periods=10),
808
+ )
809
+ ),
810
+ ],
811
+ )
812
+ def test_to_tuples(self, tuples):
813
+ # GH 18756
814
+ idx = IntervalIndex.from_tuples(tuples)
815
+ result = idx.to_tuples()
816
+ expected = Index(com.asarray_tuplesafe(tuples))
817
+ tm.assert_index_equal(result, expected)
818
+
819
+ @pytest.mark.parametrize(
820
+ "tuples",
821
+ [
822
+ list(zip(range(10), range(1, 11))) + [np.nan],
823
+ list(
824
+ zip(
825
+ date_range("20170101", periods=10),
826
+ date_range("20170101", periods=10),
827
+ )
828
+ )
829
+ + [np.nan],
830
+ list(
831
+ zip(
832
+ timedelta_range("0 days", periods=10),
833
+ timedelta_range("1 day", periods=10),
834
+ )
835
+ )
836
+ + [np.nan],
837
+ ],
838
+ )
839
+ @pytest.mark.parametrize("na_tuple", [True, False])
840
+ def test_to_tuples_na(self, tuples, na_tuple):
841
+ # GH 18756
842
+ idx = IntervalIndex.from_tuples(tuples)
843
+ result = idx.to_tuples(na_tuple=na_tuple)
844
+
845
+ # check the non-NA portion
846
+ expected_notna = Index(com.asarray_tuplesafe(tuples[:-1]))
847
+ result_notna = result[:-1]
848
+ tm.assert_index_equal(result_notna, expected_notna)
849
+
850
+ # check the NA portion
851
+ result_na = result[-1]
852
+ if na_tuple:
853
+ assert isinstance(result_na, tuple)
854
+ assert len(result_na) == 2
855
+ assert all(isna(x) for x in result_na)
856
+ else:
857
+ assert isna(result_na)
858
+
859
+ def test_nbytes(self):
860
+ # GH 19209
861
+ left = np.arange(0, 4, dtype="i8")
862
+ right = np.arange(1, 5, dtype="i8")
863
+
864
+ result = IntervalIndex.from_arrays(left, right).nbytes
865
+ expected = 64 # 4 * 8 * 2
866
+ assert result == expected
867
+
868
+ @pytest.mark.parametrize("new_closed", ["left", "right", "both", "neither"])
869
+ def test_set_closed(self, name, closed, new_closed):
870
+ # GH 21670
871
+ index = interval_range(0, 5, closed=closed, name=name)
872
+ result = index.set_closed(new_closed)
873
+ expected = interval_range(0, 5, closed=new_closed, name=name)
874
+ tm.assert_index_equal(result, expected)
875
+
876
+ @pytest.mark.parametrize("bad_closed", ["foo", 10, "LEFT", True, False])
877
+ def test_set_closed_errors(self, bad_closed):
878
+ # GH 21670
879
+ index = interval_range(0, 5)
880
+ msg = f"invalid option for 'closed': {bad_closed}"
881
+ with pytest.raises(ValueError, match=msg):
882
+ index.set_closed(bad_closed)
883
+
884
+ def test_is_all_dates(self):
885
+ # GH 23576
886
+ year_2017 = Interval(
887
+ Timestamp("2017-01-01 00:00:00"), Timestamp("2018-01-01 00:00:00")
888
+ )
889
+ year_2017_index = IntervalIndex([year_2017])
890
+ assert not year_2017_index._is_all_dates
891
+
892
+
893
+ def test_dir():
894
+ # GH#27571 dir(interval_index) should not raise
895
+ index = IntervalIndex.from_arrays([0, 1], [1, 2])
896
+ result = dir(index)
897
+ assert "str" not in result
898
+
899
+
900
+ def test_searchsorted_different_argument_classes(listlike_box):
901
+ # https://github.com/pandas-dev/pandas/issues/32762
902
+ values = IntervalIndex([Interval(0, 1), Interval(1, 2)])
903
+ result = values.searchsorted(listlike_box(values))
904
+ expected = np.array([0, 1], dtype=result.dtype)
905
+ tm.assert_numpy_array_equal(result, expected)
906
+
907
+ result = values._data.searchsorted(listlike_box(values))
908
+ tm.assert_numpy_array_equal(result, expected)
909
+
910
+
911
+ @pytest.mark.parametrize(
912
+ "arg", [[1, 2], ["a", "b"], [Timestamp("2020-01-01", tz="Europe/London")] * 2]
913
+ )
914
+ def test_searchsorted_invalid_argument(arg):
915
+ values = IntervalIndex([Interval(0, 1), Interval(1, 2)])
916
+ msg = "'<' not supported between instances of 'pandas._libs.interval.Interval' and "
917
+ with pytest.raises(TypeError, match=msg):
918
+ values.searchsorted(arg)
py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_interval_range.py ADDED
@@ -0,0 +1,369 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import timedelta
2
+
3
+ import numpy as np
4
+ import pytest
5
+
6
+ from pandas.core.dtypes.common import is_integer
7
+
8
+ from pandas import (
9
+ DateOffset,
10
+ Interval,
11
+ IntervalIndex,
12
+ Timedelta,
13
+ Timestamp,
14
+ date_range,
15
+ interval_range,
16
+ timedelta_range,
17
+ )
18
+ import pandas._testing as tm
19
+
20
+ from pandas.tseries.offsets import Day
21
+
22
+
23
+ @pytest.fixture(params=[None, "foo"])
24
+ def name(request):
25
+ return request.param
26
+
27
+
28
+ class TestIntervalRange:
29
+ @pytest.mark.parametrize("freq, periods", [(1, 100), (2.5, 40), (5, 20), (25, 4)])
30
+ def test_constructor_numeric(self, closed, name, freq, periods):
31
+ start, end = 0, 100
32
+ breaks = np.arange(101, step=freq)
33
+ expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
34
+
35
+ # defined from start/end/freq
36
+ result = interval_range(
37
+ start=start, end=end, freq=freq, name=name, closed=closed
38
+ )
39
+ tm.assert_index_equal(result, expected)
40
+
41
+ # defined from start/periods/freq
42
+ result = interval_range(
43
+ start=start, periods=periods, freq=freq, name=name, closed=closed
44
+ )
45
+ tm.assert_index_equal(result, expected)
46
+
47
+ # defined from end/periods/freq
48
+ result = interval_range(
49
+ end=end, periods=periods, freq=freq, name=name, closed=closed
50
+ )
51
+ tm.assert_index_equal(result, expected)
52
+
53
+ # GH 20976: linspace behavior defined from start/end/periods
54
+ result = interval_range(
55
+ start=start, end=end, periods=periods, name=name, closed=closed
56
+ )
57
+ tm.assert_index_equal(result, expected)
58
+
59
+ @pytest.mark.parametrize("tz", [None, "US/Eastern"])
60
+ @pytest.mark.parametrize(
61
+ "freq, periods", [("D", 364), ("2D", 182), ("22D18h", 16), ("ME", 11)]
62
+ )
63
+ def test_constructor_timestamp(self, closed, name, freq, periods, tz):
64
+ start, end = Timestamp("20180101", tz=tz), Timestamp("20181231", tz=tz)
65
+ breaks = date_range(start=start, end=end, freq=freq)
66
+ expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
67
+
68
+ # defined from start/end/freq
69
+ result = interval_range(
70
+ start=start, end=end, freq=freq, name=name, closed=closed
71
+ )
72
+ tm.assert_index_equal(result, expected)
73
+
74
+ # defined from start/periods/freq
75
+ result = interval_range(
76
+ start=start, periods=periods, freq=freq, name=name, closed=closed
77
+ )
78
+ tm.assert_index_equal(result, expected)
79
+
80
+ # defined from end/periods/freq
81
+ result = interval_range(
82
+ end=end, periods=periods, freq=freq, name=name, closed=closed
83
+ )
84
+ tm.assert_index_equal(result, expected)
85
+
86
+ # GH 20976: linspace behavior defined from start/end/periods
87
+ if not breaks.freq.n == 1 and tz is None:
88
+ result = interval_range(
89
+ start=start, end=end, periods=periods, name=name, closed=closed
90
+ )
91
+ tm.assert_index_equal(result, expected)
92
+
93
+ @pytest.mark.parametrize(
94
+ "freq, periods", [("D", 100), ("2D12h", 40), ("5D", 20), ("25D", 4)]
95
+ )
96
+ def test_constructor_timedelta(self, closed, name, freq, periods):
97
+ start, end = Timedelta("0 days"), Timedelta("100 days")
98
+ breaks = timedelta_range(start=start, end=end, freq=freq)
99
+ expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
100
+
101
+ # defined from start/end/freq
102
+ result = interval_range(
103
+ start=start, end=end, freq=freq, name=name, closed=closed
104
+ )
105
+ tm.assert_index_equal(result, expected)
106
+
107
+ # defined from start/periods/freq
108
+ result = interval_range(
109
+ start=start, periods=periods, freq=freq, name=name, closed=closed
110
+ )
111
+ tm.assert_index_equal(result, expected)
112
+
113
+ # defined from end/periods/freq
114
+ result = interval_range(
115
+ end=end, periods=periods, freq=freq, name=name, closed=closed
116
+ )
117
+ tm.assert_index_equal(result, expected)
118
+
119
+ # GH 20976: linspace behavior defined from start/end/periods
120
+ result = interval_range(
121
+ start=start, end=end, periods=periods, name=name, closed=closed
122
+ )
123
+ tm.assert_index_equal(result, expected)
124
+
125
+ @pytest.mark.parametrize(
126
+ "start, end, freq, expected_endpoint",
127
+ [
128
+ (0, 10, 3, 9),
129
+ (0, 10, 1.5, 9),
130
+ (0.5, 10, 3, 9.5),
131
+ (Timedelta("0D"), Timedelta("10D"), "2D4h", Timedelta("8D16h")),
132
+ (
133
+ Timestamp("2018-01-01"),
134
+ Timestamp("2018-02-09"),
135
+ "MS",
136
+ Timestamp("2018-02-01"),
137
+ ),
138
+ (
139
+ Timestamp("2018-01-01", tz="US/Eastern"),
140
+ Timestamp("2018-01-20", tz="US/Eastern"),
141
+ "5D12h",
142
+ Timestamp("2018-01-17 12:00:00", tz="US/Eastern"),
143
+ ),
144
+ ],
145
+ )
146
+ def test_early_truncation(self, start, end, freq, expected_endpoint):
147
+ # index truncates early if freq causes end to be skipped
148
+ result = interval_range(start=start, end=end, freq=freq)
149
+ result_endpoint = result.right[-1]
150
+ assert result_endpoint == expected_endpoint
151
+
152
+ @pytest.mark.parametrize(
153
+ "start, end, freq",
154
+ [(0.5, None, None), (None, 4.5, None), (0.5, None, 1.5), (None, 6.5, 1.5)],
155
+ )
156
+ def test_no_invalid_float_truncation(self, start, end, freq):
157
+ # GH 21161
158
+ if freq is None:
159
+ breaks = [0.5, 1.5, 2.5, 3.5, 4.5]
160
+ else:
161
+ breaks = [0.5, 2.0, 3.5, 5.0, 6.5]
162
+ expected = IntervalIndex.from_breaks(breaks)
163
+
164
+ result = interval_range(start=start, end=end, periods=4, freq=freq)
165
+ tm.assert_index_equal(result, expected)
166
+
167
+ @pytest.mark.parametrize(
168
+ "start, mid, end",
169
+ [
170
+ (
171
+ Timestamp("2018-03-10", tz="US/Eastern"),
172
+ Timestamp("2018-03-10 23:30:00", tz="US/Eastern"),
173
+ Timestamp("2018-03-12", tz="US/Eastern"),
174
+ ),
175
+ (
176
+ Timestamp("2018-11-03", tz="US/Eastern"),
177
+ Timestamp("2018-11-04 00:30:00", tz="US/Eastern"),
178
+ Timestamp("2018-11-05", tz="US/Eastern"),
179
+ ),
180
+ ],
181
+ )
182
+ def test_linspace_dst_transition(self, start, mid, end):
183
+ # GH 20976: linspace behavior defined from start/end/periods
184
+ # accounts for the hour gained/lost during DST transition
185
+ start = start.as_unit("ns")
186
+ mid = mid.as_unit("ns")
187
+ end = end.as_unit("ns")
188
+ result = interval_range(start=start, end=end, periods=2)
189
+ expected = IntervalIndex.from_breaks([start, mid, end])
190
+ tm.assert_index_equal(result, expected)
191
+
192
+ @pytest.mark.parametrize("freq", [2, 2.0])
193
+ @pytest.mark.parametrize("end", [10, 10.0])
194
+ @pytest.mark.parametrize("start", [0, 0.0])
195
+ def test_float_subtype(self, start, end, freq):
196
+ # Has float subtype if any of start/end/freq are float, even if all
197
+ # resulting endpoints can safely be upcast to integers
198
+
199
+ # defined from start/end/freq
200
+ index = interval_range(start=start, end=end, freq=freq)
201
+ result = index.dtype.subtype
202
+ expected = "int64" if is_integer(start + end + freq) else "float64"
203
+ assert result == expected
204
+
205
+ # defined from start/periods/freq
206
+ index = interval_range(start=start, periods=5, freq=freq)
207
+ result = index.dtype.subtype
208
+ expected = "int64" if is_integer(start + freq) else "float64"
209
+ assert result == expected
210
+
211
+ # defined from end/periods/freq
212
+ index = interval_range(end=end, periods=5, freq=freq)
213
+ result = index.dtype.subtype
214
+ expected = "int64" if is_integer(end + freq) else "float64"
215
+ assert result == expected
216
+
217
+ # GH 20976: linspace behavior defined from start/end/periods
218
+ index = interval_range(start=start, end=end, periods=5)
219
+ result = index.dtype.subtype
220
+ expected = "int64" if is_integer(start + end) else "float64"
221
+ assert result == expected
222
+
223
+ def test_interval_range_fractional_period(self):
224
+ # float value for periods
225
+ expected = interval_range(start=0, periods=10)
226
+ msg = "Non-integer 'periods' in pd.date_range, .* pd.interval_range"
227
+ with tm.assert_produces_warning(FutureWarning, match=msg):
228
+ result = interval_range(start=0, periods=10.5)
229
+ tm.assert_index_equal(result, expected)
230
+
231
+ def test_constructor_coverage(self):
232
+ # equivalent timestamp-like start/end
233
+ start, end = Timestamp("2017-01-01"), Timestamp("2017-01-15")
234
+ expected = interval_range(start=start, end=end)
235
+
236
+ result = interval_range(start=start.to_pydatetime(), end=end.to_pydatetime())
237
+ tm.assert_index_equal(result, expected)
238
+
239
+ result = interval_range(start=start.asm8, end=end.asm8)
240
+ tm.assert_index_equal(result, expected)
241
+
242
+ # equivalent freq with timestamp
243
+ equiv_freq = [
244
+ "D",
245
+ Day(),
246
+ Timedelta(days=1),
247
+ timedelta(days=1),
248
+ DateOffset(days=1),
249
+ ]
250
+ for freq in equiv_freq:
251
+ result = interval_range(start=start, end=end, freq=freq)
252
+ tm.assert_index_equal(result, expected)
253
+
254
+ # equivalent timedelta-like start/end
255
+ start, end = Timedelta(days=1), Timedelta(days=10)
256
+ expected = interval_range(start=start, end=end)
257
+
258
+ result = interval_range(start=start.to_pytimedelta(), end=end.to_pytimedelta())
259
+ tm.assert_index_equal(result, expected)
260
+
261
+ result = interval_range(start=start.asm8, end=end.asm8)
262
+ tm.assert_index_equal(result, expected)
263
+
264
+ # equivalent freq with timedelta
265
+ equiv_freq = ["D", Day(), Timedelta(days=1), timedelta(days=1)]
266
+ for freq in equiv_freq:
267
+ result = interval_range(start=start, end=end, freq=freq)
268
+ tm.assert_index_equal(result, expected)
269
+
270
+ def test_errors(self):
271
+ # not enough params
272
+ msg = (
273
+ "Of the four parameters: start, end, periods, and freq, "
274
+ "exactly three must be specified"
275
+ )
276
+
277
+ with pytest.raises(ValueError, match=msg):
278
+ interval_range(start=0)
279
+
280
+ with pytest.raises(ValueError, match=msg):
281
+ interval_range(end=5)
282
+
283
+ with pytest.raises(ValueError, match=msg):
284
+ interval_range(periods=2)
285
+
286
+ with pytest.raises(ValueError, match=msg):
287
+ interval_range()
288
+
289
+ # too many params
290
+ with pytest.raises(ValueError, match=msg):
291
+ interval_range(start=0, end=5, periods=6, freq=1.5)
292
+
293
+ # mixed units
294
+ msg = "start, end, freq need to be type compatible"
295
+ with pytest.raises(TypeError, match=msg):
296
+ interval_range(start=0, end=Timestamp("20130101"), freq=2)
297
+
298
+ with pytest.raises(TypeError, match=msg):
299
+ interval_range(start=0, end=Timedelta("1 day"), freq=2)
300
+
301
+ with pytest.raises(TypeError, match=msg):
302
+ interval_range(start=0, end=10, freq="D")
303
+
304
+ with pytest.raises(TypeError, match=msg):
305
+ interval_range(start=Timestamp("20130101"), end=10, freq="D")
306
+
307
+ with pytest.raises(TypeError, match=msg):
308
+ interval_range(
309
+ start=Timestamp("20130101"), end=Timedelta("1 day"), freq="D"
310
+ )
311
+
312
+ with pytest.raises(TypeError, match=msg):
313
+ interval_range(
314
+ start=Timestamp("20130101"), end=Timestamp("20130110"), freq=2
315
+ )
316
+
317
+ with pytest.raises(TypeError, match=msg):
318
+ interval_range(start=Timedelta("1 day"), end=10, freq="D")
319
+
320
+ with pytest.raises(TypeError, match=msg):
321
+ interval_range(
322
+ start=Timedelta("1 day"), end=Timestamp("20130110"), freq="D"
323
+ )
324
+
325
+ with pytest.raises(TypeError, match=msg):
326
+ interval_range(start=Timedelta("1 day"), end=Timedelta("10 days"), freq=2)
327
+
328
+ # invalid periods
329
+ msg = "periods must be a number, got foo"
330
+ with pytest.raises(TypeError, match=msg):
331
+ interval_range(start=0, periods="foo")
332
+
333
+ # invalid start
334
+ msg = "start must be numeric or datetime-like, got foo"
335
+ with pytest.raises(ValueError, match=msg):
336
+ interval_range(start="foo", periods=10)
337
+
338
+ # invalid end
339
+ msg = r"end must be numeric or datetime-like, got \(0, 1\]"
340
+ with pytest.raises(ValueError, match=msg):
341
+ interval_range(end=Interval(0, 1), periods=10)
342
+
343
+ # invalid freq for datetime-like
344
+ msg = "freq must be numeric or convertible to DateOffset, got foo"
345
+ with pytest.raises(ValueError, match=msg):
346
+ interval_range(start=0, end=10, freq="foo")
347
+
348
+ with pytest.raises(ValueError, match=msg):
349
+ interval_range(start=Timestamp("20130101"), periods=10, freq="foo")
350
+
351
+ with pytest.raises(ValueError, match=msg):
352
+ interval_range(end=Timedelta("1 day"), periods=10, freq="foo")
353
+
354
+ # mixed tz
355
+ start = Timestamp("2017-01-01", tz="US/Eastern")
356
+ end = Timestamp("2017-01-07", tz="US/Pacific")
357
+ msg = "Start and end cannot both be tz-aware with different timezones"
358
+ with pytest.raises(TypeError, match=msg):
359
+ interval_range(start=start, end=end)
360
+
361
+ def test_float_freq(self):
362
+ # GH 54477
363
+ result = interval_range(0, 1, freq=0.1)
364
+ expected = IntervalIndex.from_breaks([0 + 0.1 * n for n in range(11)])
365
+ tm.assert_index_equal(result, expected)
366
+
367
+ result = interval_range(0, 1, freq=0.6)
368
+ expected = IntervalIndex.from_breaks([0, 0.6])
369
+ tm.assert_index_equal(result, expected)
py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_interval_tree.py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from itertools import permutations
2
+
3
+ import numpy as np
4
+ import pytest
5
+
6
+ from pandas._libs.interval import IntervalTree
7
+ from pandas.compat import IS64
8
+
9
+ import pandas._testing as tm
10
+
11
+
12
+ def skipif_32bit(param):
13
+ """
14
+ Skip parameters in a parametrize on 32bit systems. Specifically used
15
+ here to skip leaf_size parameters related to GH 23440.
16
+ """
17
+ marks = pytest.mark.skipif(not IS64, reason="GH 23440: int type mismatch on 32bit")
18
+ return pytest.param(param, marks=marks)
19
+
20
+
21
+ @pytest.fixture(params=["int64", "float64", "uint64"])
22
+ def dtype(request):
23
+ return request.param
24
+
25
+
26
+ @pytest.fixture(params=[skipif_32bit(1), skipif_32bit(2), 10])
27
+ def leaf_size(request):
28
+ """
29
+ Fixture to specify IntervalTree leaf_size parameter; to be used with the
30
+ tree fixture.
31
+ """
32
+ return request.param
33
+
34
+
35
+ @pytest.fixture(
36
+ params=[
37
+ np.arange(5, dtype="int64"),
38
+ np.arange(5, dtype="uint64"),
39
+ np.arange(5, dtype="float64"),
40
+ np.array([0, 1, 2, 3, 4, np.nan], dtype="float64"),
41
+ ]
42
+ )
43
+ def tree(request, leaf_size):
44
+ left = request.param
45
+ return IntervalTree(left, left + 2, leaf_size=leaf_size)
46
+
47
+
48
+ class TestIntervalTree:
49
+ def test_get_indexer(self, tree):
50
+ result = tree.get_indexer(np.array([1.0, 5.5, 6.5]))
51
+ expected = np.array([0, 4, -1], dtype="intp")
52
+ tm.assert_numpy_array_equal(result, expected)
53
+
54
+ with pytest.raises(
55
+ KeyError, match="'indexer does not intersect a unique set of intervals'"
56
+ ):
57
+ tree.get_indexer(np.array([3.0]))
58
+
59
+ @pytest.mark.parametrize(
60
+ "dtype, target_value, target_dtype",
61
+ [("int64", 2**63 + 1, "uint64"), ("uint64", -1, "int64")],
62
+ )
63
+ def test_get_indexer_overflow(self, dtype, target_value, target_dtype):
64
+ left, right = np.array([0, 1], dtype=dtype), np.array([1, 2], dtype=dtype)
65
+ tree = IntervalTree(left, right)
66
+
67
+ result = tree.get_indexer(np.array([target_value], dtype=target_dtype))
68
+ expected = np.array([-1], dtype="intp")
69
+ tm.assert_numpy_array_equal(result, expected)
70
+
71
+ def test_get_indexer_non_unique(self, tree):
72
+ indexer, missing = tree.get_indexer_non_unique(np.array([1.0, 2.0, 6.5]))
73
+
74
+ result = indexer[:1]
75
+ expected = np.array([0], dtype="intp")
76
+ tm.assert_numpy_array_equal(result, expected)
77
+
78
+ result = np.sort(indexer[1:3])
79
+ expected = np.array([0, 1], dtype="intp")
80
+ tm.assert_numpy_array_equal(result, expected)
81
+
82
+ result = np.sort(indexer[3:])
83
+ expected = np.array([-1], dtype="intp")
84
+ tm.assert_numpy_array_equal(result, expected)
85
+
86
+ result = missing
87
+ expected = np.array([2], dtype="intp")
88
+ tm.assert_numpy_array_equal(result, expected)
89
+
90
+ @pytest.mark.parametrize(
91
+ "dtype, target_value, target_dtype",
92
+ [("int64", 2**63 + 1, "uint64"), ("uint64", -1, "int64")],
93
+ )
94
+ def test_get_indexer_non_unique_overflow(self, dtype, target_value, target_dtype):
95
+ left, right = np.array([0, 2], dtype=dtype), np.array([1, 3], dtype=dtype)
96
+ tree = IntervalTree(left, right)
97
+ target = np.array([target_value], dtype=target_dtype)
98
+
99
+ result_indexer, result_missing = tree.get_indexer_non_unique(target)
100
+ expected_indexer = np.array([-1], dtype="intp")
101
+ tm.assert_numpy_array_equal(result_indexer, expected_indexer)
102
+
103
+ expected_missing = np.array([0], dtype="intp")
104
+ tm.assert_numpy_array_equal(result_missing, expected_missing)
105
+
106
+ def test_duplicates(self, dtype):
107
+ left = np.array([0, 0, 0], dtype=dtype)
108
+ tree = IntervalTree(left, left + 1)
109
+
110
+ with pytest.raises(
111
+ KeyError, match="'indexer does not intersect a unique set of intervals'"
112
+ ):
113
+ tree.get_indexer(np.array([0.5]))
114
+
115
+ indexer, missing = tree.get_indexer_non_unique(np.array([0.5]))
116
+ result = np.sort(indexer)
117
+ expected = np.array([0, 1, 2], dtype="intp")
118
+ tm.assert_numpy_array_equal(result, expected)
119
+
120
+ result = missing
121
+ expected = np.array([], dtype="intp")
122
+ tm.assert_numpy_array_equal(result, expected)
123
+
124
+ @pytest.mark.parametrize(
125
+ "leaf_size", [skipif_32bit(1), skipif_32bit(10), skipif_32bit(100), 10000]
126
+ )
127
+ def test_get_indexer_closed(self, closed, leaf_size):
128
+ x = np.arange(1000, dtype="float64")
129
+ found = x.astype("intp")
130
+ not_found = (-1 * np.ones(1000)).astype("intp")
131
+
132
+ tree = IntervalTree(x, x + 0.5, closed=closed, leaf_size=leaf_size)
133
+ tm.assert_numpy_array_equal(found, tree.get_indexer(x + 0.25))
134
+
135
+ expected = found if tree.closed_left else not_found
136
+ tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.0))
137
+
138
+ expected = found if tree.closed_right else not_found
139
+ tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.5))
140
+
141
+ @pytest.mark.parametrize(
142
+ "left, right, expected",
143
+ [
144
+ (np.array([0, 1, 4], dtype="int64"), np.array([2, 3, 5]), True),
145
+ (np.array([0, 1, 2], dtype="int64"), np.array([5, 4, 3]), True),
146
+ (np.array([0, 1, np.nan]), np.array([5, 4, np.nan]), True),
147
+ (np.array([0, 2, 4], dtype="int64"), np.array([1, 3, 5]), False),
148
+ (np.array([0, 2, np.nan]), np.array([1, 3, np.nan]), False),
149
+ ],
150
+ )
151
+ @pytest.mark.parametrize("order", (list(x) for x in permutations(range(3))))
152
+ def test_is_overlapping(self, closed, order, left, right, expected):
153
+ # GH 23309
154
+ tree = IntervalTree(left[order], right[order], closed=closed)
155
+ result = tree.is_overlapping
156
+ assert result is expected
157
+
158
+ @pytest.mark.parametrize("order", (list(x) for x in permutations(range(3))))
159
+ def test_is_overlapping_endpoints(self, closed, order):
160
+ """shared endpoints are marked as overlapping"""
161
+ # GH 23309
162
+ left, right = np.arange(3, dtype="int64"), np.arange(1, 4)
163
+ tree = IntervalTree(left[order], right[order], closed=closed)
164
+ result = tree.is_overlapping
165
+ expected = closed == "both"
166
+ assert result is expected
167
+
168
+ @pytest.mark.parametrize(
169
+ "left, right",
170
+ [
171
+ (np.array([], dtype="int64"), np.array([], dtype="int64")),
172
+ (np.array([0], dtype="int64"), np.array([1], dtype="int64")),
173
+ (np.array([np.nan]), np.array([np.nan])),
174
+ (np.array([np.nan] * 3), np.array([np.nan] * 3)),
175
+ ],
176
+ )
177
+ def test_is_overlapping_trivial(self, closed, left, right):
178
+ # GH 23309
179
+ tree = IntervalTree(left, right, closed=closed)
180
+ assert tree.is_overlapping is False
181
+
182
+ @pytest.mark.skipif(not IS64, reason="GH 23440")
183
+ def test_construction_overflow(self):
184
+ # GH 25485
185
+ left, right = np.arange(101, dtype="int64"), [np.iinfo(np.int64).max] * 101
186
+ tree = IntervalTree(left, right)
187
+
188
+ # pivot should be average of left/right medians
189
+ result = tree.root.pivot
190
+ expected = (50 + np.iinfo(np.int64).max) / 2
191
+ assert result == expected
192
+
193
+ @pytest.mark.parametrize(
194
+ "left, right, expected",
195
+ [
196
+ ([-np.inf, 1.0], [1.0, 2.0], 0.0),
197
+ ([-np.inf, -2.0], [-2.0, -1.0], -2.0),
198
+ ([-2.0, -1.0], [-1.0, np.inf], 0.0),
199
+ ([1.0, 2.0], [2.0, np.inf], 2.0),
200
+ ],
201
+ )
202
+ def test_inf_bound_infinite_recursion(self, left, right, expected):
203
+ # GH 46658
204
+
205
+ tree = IntervalTree(left * 101, right * 101)
206
+
207
+ result = tree.root.pivot
208
+ assert result == expected
py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_join.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+
3
+ from pandas import (
4
+ IntervalIndex,
5
+ MultiIndex,
6
+ RangeIndex,
7
+ )
8
+ import pandas._testing as tm
9
+
10
+
11
+ @pytest.fixture
12
+ def range_index():
13
+ return RangeIndex(3, name="range_index")
14
+
15
+
16
+ @pytest.fixture
17
+ def interval_index():
18
+ return IntervalIndex.from_tuples(
19
+ [(0.0, 1.0), (1.0, 2.0), (1.5, 2.5)], name="interval_index"
20
+ )
21
+
22
+
23
+ def test_join_overlapping_in_mi_to_same_intervalindex(range_index, interval_index):
24
+ # GH-45661
25
+ multi_index = MultiIndex.from_product([interval_index, range_index])
26
+ result = multi_index.join(interval_index)
27
+
28
+ tm.assert_index_equal(result, multi_index)
29
+
30
+
31
+ def test_join_overlapping_to_multiindex_with_same_interval(range_index, interval_index):
32
+ # GH-45661
33
+ multi_index = MultiIndex.from_product([interval_index, range_index])
34
+ result = interval_index.join(multi_index)
35
+
36
+ tm.assert_index_equal(result, multi_index)
37
+
38
+
39
+ def test_join_overlapping_interval_to_another_intervalindex(interval_index):
40
+ # GH-45661
41
+ flipped_interval_index = interval_index[::-1]
42
+ result = interval_index.join(flipped_interval_index)
43
+
44
+ tm.assert_index_equal(result, interval_index)
py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_pickle.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+
3
+ from pandas import IntervalIndex
4
+ import pandas._testing as tm
5
+
6
+
7
+ class TestPickle:
8
+ @pytest.mark.parametrize("closed", ["left", "right", "both"])
9
+ def test_pickle_round_trip_closed(self, closed):
10
+ # https://github.com/pandas-dev/pandas/issues/35658
11
+ idx = IntervalIndex.from_tuples([(1, 2), (2, 3)], closed=closed)
12
+ result = tm.round_trip_pickle(idx)
13
+ tm.assert_index_equal(result, idx)
py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_setops.py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ from pandas import (
5
+ Index,
6
+ IntervalIndex,
7
+ Timestamp,
8
+ interval_range,
9
+ )
10
+ import pandas._testing as tm
11
+
12
+
13
+ def monotonic_index(start, end, dtype="int64", closed="right"):
14
+ return IntervalIndex.from_breaks(np.arange(start, end, dtype=dtype), closed=closed)
15
+
16
+
17
+ def empty_index(dtype="int64", closed="right"):
18
+ return IntervalIndex(np.array([], dtype=dtype), closed=closed)
19
+
20
+
21
+ class TestIntervalIndex:
22
+ def test_union(self, closed, sort):
23
+ index = monotonic_index(0, 11, closed=closed)
24
+ other = monotonic_index(5, 13, closed=closed)
25
+
26
+ expected = monotonic_index(0, 13, closed=closed)
27
+ result = index[::-1].union(other, sort=sort)
28
+ if sort in (None, True):
29
+ tm.assert_index_equal(result, expected)
30
+ else:
31
+ tm.assert_index_equal(result.sort_values(), expected)
32
+
33
+ result = other[::-1].union(index, sort=sort)
34
+ if sort in (None, True):
35
+ tm.assert_index_equal(result, expected)
36
+ else:
37
+ tm.assert_index_equal(result.sort_values(), expected)
38
+
39
+ tm.assert_index_equal(index.union(index, sort=sort), index)
40
+ tm.assert_index_equal(index.union(index[:1], sort=sort), index)
41
+
42
+ def test_union_empty_result(self, closed, sort):
43
+ # GH 19101: empty result, same dtype
44
+ index = empty_index(dtype="int64", closed=closed)
45
+ result = index.union(index, sort=sort)
46
+ tm.assert_index_equal(result, index)
47
+
48
+ # GH 19101: empty result, different numeric dtypes -> common dtype is f8
49
+ other = empty_index(dtype="float64", closed=closed)
50
+ result = index.union(other, sort=sort)
51
+ expected = other
52
+ tm.assert_index_equal(result, expected)
53
+
54
+ other = index.union(index, sort=sort)
55
+ tm.assert_index_equal(result, expected)
56
+
57
+ other = empty_index(dtype="uint64", closed=closed)
58
+ result = index.union(other, sort=sort)
59
+ tm.assert_index_equal(result, expected)
60
+
61
+ result = other.union(index, sort=sort)
62
+ tm.assert_index_equal(result, expected)
63
+
64
+ def test_intersection(self, closed, sort):
65
+ index = monotonic_index(0, 11, closed=closed)
66
+ other = monotonic_index(5, 13, closed=closed)
67
+
68
+ expected = monotonic_index(5, 11, closed=closed)
69
+ result = index[::-1].intersection(other, sort=sort)
70
+ if sort in (None, True):
71
+ tm.assert_index_equal(result, expected)
72
+ else:
73
+ tm.assert_index_equal(result.sort_values(), expected)
74
+
75
+ result = other[::-1].intersection(index, sort=sort)
76
+ if sort in (None, True):
77
+ tm.assert_index_equal(result, expected)
78
+ else:
79
+ tm.assert_index_equal(result.sort_values(), expected)
80
+
81
+ tm.assert_index_equal(index.intersection(index, sort=sort), index)
82
+
83
+ # GH 26225: nested intervals
84
+ index = IntervalIndex.from_tuples([(1, 2), (1, 3), (1, 4), (0, 2)])
85
+ other = IntervalIndex.from_tuples([(1, 2), (1, 3)])
86
+ expected = IntervalIndex.from_tuples([(1, 2), (1, 3)])
87
+ result = index.intersection(other)
88
+ tm.assert_index_equal(result, expected)
89
+
90
+ # GH 26225
91
+ index = IntervalIndex.from_tuples([(0, 3), (0, 2)])
92
+ other = IntervalIndex.from_tuples([(0, 2), (1, 3)])
93
+ expected = IntervalIndex.from_tuples([(0, 2)])
94
+ result = index.intersection(other)
95
+ tm.assert_index_equal(result, expected)
96
+
97
+ # GH 26225: duplicate nan element
98
+ index = IntervalIndex([np.nan, np.nan])
99
+ other = IntervalIndex([np.nan])
100
+ expected = IntervalIndex([np.nan])
101
+ result = index.intersection(other)
102
+ tm.assert_index_equal(result, expected)
103
+
104
+ def test_intersection_empty_result(self, closed, sort):
105
+ index = monotonic_index(0, 11, closed=closed)
106
+
107
+ # GH 19101: empty result, same dtype
108
+ other = monotonic_index(300, 314, closed=closed)
109
+ expected = empty_index(dtype="int64", closed=closed)
110
+ result = index.intersection(other, sort=sort)
111
+ tm.assert_index_equal(result, expected)
112
+
113
+ # GH 19101: empty result, different numeric dtypes -> common dtype is float64
114
+ other = monotonic_index(300, 314, dtype="float64", closed=closed)
115
+ result = index.intersection(other, sort=sort)
116
+ expected = other[:0]
117
+ tm.assert_index_equal(result, expected)
118
+
119
+ other = monotonic_index(300, 314, dtype="uint64", closed=closed)
120
+ result = index.intersection(other, sort=sort)
121
+ tm.assert_index_equal(result, expected)
122
+
123
+ def test_intersection_duplicates(self):
124
+ # GH#38743
125
+ index = IntervalIndex.from_tuples([(1, 2), (1, 2), (2, 3), (3, 4)])
126
+ other = IntervalIndex.from_tuples([(1, 2), (2, 3)])
127
+ expected = IntervalIndex.from_tuples([(1, 2), (2, 3)])
128
+ result = index.intersection(other)
129
+ tm.assert_index_equal(result, expected)
130
+
131
+ def test_difference(self, closed, sort):
132
+ index = IntervalIndex.from_arrays([1, 0, 3, 2], [1, 2, 3, 4], closed=closed)
133
+ result = index.difference(index[:1], sort=sort)
134
+ expected = index[1:]
135
+ if sort is None:
136
+ expected = expected.sort_values()
137
+ tm.assert_index_equal(result, expected)
138
+
139
+ # GH 19101: empty result, same dtype
140
+ result = index.difference(index, sort=sort)
141
+ expected = empty_index(dtype="int64", closed=closed)
142
+ tm.assert_index_equal(result, expected)
143
+
144
+ # GH 19101: empty result, different dtypes
145
+ other = IntervalIndex.from_arrays(
146
+ index.left.astype("float64"), index.right, closed=closed
147
+ )
148
+ result = index.difference(other, sort=sort)
149
+ tm.assert_index_equal(result, expected)
150
+
151
+ def test_symmetric_difference(self, closed, sort):
152
+ index = monotonic_index(0, 11, closed=closed)
153
+ result = index[1:].symmetric_difference(index[:-1], sort=sort)
154
+ expected = IntervalIndex([index[0], index[-1]])
155
+ if sort in (None, True):
156
+ tm.assert_index_equal(result, expected)
157
+ else:
158
+ tm.assert_index_equal(result.sort_values(), expected)
159
+
160
+ # GH 19101: empty result, same dtype
161
+ result = index.symmetric_difference(index, sort=sort)
162
+ expected = empty_index(dtype="int64", closed=closed)
163
+ if sort in (None, True):
164
+ tm.assert_index_equal(result, expected)
165
+ else:
166
+ tm.assert_index_equal(result.sort_values(), expected)
167
+
168
+ # GH 19101: empty result, different dtypes
169
+ other = IntervalIndex.from_arrays(
170
+ index.left.astype("float64"), index.right, closed=closed
171
+ )
172
+ result = index.symmetric_difference(other, sort=sort)
173
+ expected = empty_index(dtype="float64", closed=closed)
174
+ tm.assert_index_equal(result, expected)
175
+
176
+ @pytest.mark.filterwarnings("ignore:'<' not supported between:RuntimeWarning")
177
+ @pytest.mark.parametrize(
178
+ "op_name", ["union", "intersection", "difference", "symmetric_difference"]
179
+ )
180
+ def test_set_incompatible_types(self, closed, op_name, sort):
181
+ index = monotonic_index(0, 11, closed=closed)
182
+ set_op = getattr(index, op_name)
183
+
184
+ # TODO: standardize return type of non-union setops type(self vs other)
185
+ # non-IntervalIndex
186
+ if op_name == "difference":
187
+ expected = index
188
+ else:
189
+ expected = getattr(index.astype("O"), op_name)(Index([1, 2, 3]))
190
+ result = set_op(Index([1, 2, 3]), sort=sort)
191
+ tm.assert_index_equal(result, expected)
192
+
193
+ # mixed closed -> cast to object
194
+ for other_closed in {"right", "left", "both", "neither"} - {closed}:
195
+ other = monotonic_index(0, 11, closed=other_closed)
196
+ expected = getattr(index.astype(object), op_name)(other, sort=sort)
197
+ if op_name == "difference":
198
+ expected = index
199
+ result = set_op(other, sort=sort)
200
+ tm.assert_index_equal(result, expected)
201
+
202
+ # GH 19016: incompatible dtypes -> cast to object
203
+ other = interval_range(Timestamp("20180101"), periods=9, closed=closed)
204
+ expected = getattr(index.astype(object), op_name)(other, sort=sort)
205
+ if op_name == "difference":
206
+ expected = index
207
+ result = set_op(other, sort=sort)
208
+ tm.assert_index_equal(result, expected)
py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/__init__.py ADDED
File without changes
py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/conftest.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ from pandas import (
5
+ Index,
6
+ MultiIndex,
7
+ )
8
+
9
+
10
+ # Note: identical the "multi" entry in the top-level "index" fixture
11
+ @pytest.fixture
12
+ def idx():
13
+ # a MultiIndex used to test the general functionality of the
14
+ # general functionality of this object
15
+ major_axis = Index(["foo", "bar", "baz", "qux"])
16
+ minor_axis = Index(["one", "two"])
17
+
18
+ major_codes = np.array([0, 0, 1, 2, 3, 3])
19
+ minor_codes = np.array([0, 1, 0, 1, 0, 1])
20
+ index_names = ["first", "second"]
21
+ mi = MultiIndex(
22
+ levels=[major_axis, minor_axis],
23
+ codes=[major_codes, minor_codes],
24
+ names=index_names,
25
+ verify_integrity=False,
26
+ )
27
+ return mi
py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_analytics.py ADDED
@@ -0,0 +1,263 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ import pandas as pd
5
+ from pandas import (
6
+ Index,
7
+ MultiIndex,
8
+ date_range,
9
+ period_range,
10
+ )
11
+ import pandas._testing as tm
12
+
13
+
14
+ def test_infer_objects(idx):
15
+ with pytest.raises(NotImplementedError, match="to_frame"):
16
+ idx.infer_objects()
17
+
18
+
19
+ def test_shift(idx):
20
+ # GH8083 test the base class for shift
21
+ msg = (
22
+ "This method is only implemented for DatetimeIndex, PeriodIndex and "
23
+ "TimedeltaIndex; Got type MultiIndex"
24
+ )
25
+ with pytest.raises(NotImplementedError, match=msg):
26
+ idx.shift(1)
27
+ with pytest.raises(NotImplementedError, match=msg):
28
+ idx.shift(1, 2)
29
+
30
+
31
+ def test_groupby(idx):
32
+ groups = idx.groupby(np.array([1, 1, 1, 2, 2, 2]))
33
+ labels = idx.tolist()
34
+ exp = {1: labels[:3], 2: labels[3:]}
35
+ tm.assert_dict_equal(groups, exp)
36
+
37
+ # GH5620
38
+ groups = idx.groupby(idx)
39
+ exp = {key: [key] for key in idx}
40
+ tm.assert_dict_equal(groups, exp)
41
+
42
+
43
+ def test_truncate_multiindex():
44
+ # GH 34564 for MultiIndex level names check
45
+ major_axis = Index(list(range(4)))
46
+ minor_axis = Index(list(range(2)))
47
+
48
+ major_codes = np.array([0, 0, 1, 2, 3, 3])
49
+ minor_codes = np.array([0, 1, 0, 1, 0, 1])
50
+
51
+ index = MultiIndex(
52
+ levels=[major_axis, minor_axis],
53
+ codes=[major_codes, minor_codes],
54
+ names=["L1", "L2"],
55
+ )
56
+
57
+ result = index.truncate(before=1)
58
+ assert "foo" not in result.levels[0]
59
+ assert 1 in result.levels[0]
60
+ assert index.names == result.names
61
+
62
+ result = index.truncate(after=1)
63
+ assert 2 not in result.levels[0]
64
+ assert 1 in result.levels[0]
65
+ assert index.names == result.names
66
+
67
+ result = index.truncate(before=1, after=2)
68
+ assert len(result.levels[0]) == 2
69
+ assert index.names == result.names
70
+
71
+ msg = "after < before"
72
+ with pytest.raises(ValueError, match=msg):
73
+ index.truncate(3, 1)
74
+
75
+
76
+ # TODO: reshape
77
+
78
+
79
+ def test_reorder_levels(idx):
80
+ # this blows up
81
+ with pytest.raises(IndexError, match="^Too many levels"):
82
+ idx.reorder_levels([2, 1, 0])
83
+
84
+
85
+ def test_numpy_repeat():
86
+ reps = 2
87
+ numbers = [1, 2, 3]
88
+ names = np.array(["foo", "bar"])
89
+
90
+ m = MultiIndex.from_product([numbers, names], names=names)
91
+ expected = MultiIndex.from_product([numbers, names.repeat(reps)], names=names)
92
+ tm.assert_index_equal(np.repeat(m, reps), expected)
93
+
94
+ msg = "the 'axis' parameter is not supported"
95
+ with pytest.raises(ValueError, match=msg):
96
+ np.repeat(m, reps, axis=1)
97
+
98
+
99
+ def test_append_mixed_dtypes():
100
+ # GH 13660
101
+ dti = date_range("2011-01-01", freq="ME", periods=3)
102
+ dti_tz = date_range("2011-01-01", freq="ME", periods=3, tz="US/Eastern")
103
+ pi = period_range("2011-01", freq="M", periods=3)
104
+
105
+ mi = MultiIndex.from_arrays(
106
+ [[1, 2, 3], [1.1, np.nan, 3.3], ["a", "b", "c"], dti, dti_tz, pi]
107
+ )
108
+ assert mi.nlevels == 6
109
+
110
+ res = mi.append(mi)
111
+ exp = MultiIndex.from_arrays(
112
+ [
113
+ [1, 2, 3, 1, 2, 3],
114
+ [1.1, np.nan, 3.3, 1.1, np.nan, 3.3],
115
+ ["a", "b", "c", "a", "b", "c"],
116
+ dti.append(dti),
117
+ dti_tz.append(dti_tz),
118
+ pi.append(pi),
119
+ ]
120
+ )
121
+ tm.assert_index_equal(res, exp)
122
+
123
+ other = MultiIndex.from_arrays(
124
+ [
125
+ ["x", "y", "z"],
126
+ ["x", "y", "z"],
127
+ ["x", "y", "z"],
128
+ ["x", "y", "z"],
129
+ ["x", "y", "z"],
130
+ ["x", "y", "z"],
131
+ ]
132
+ )
133
+
134
+ res = mi.append(other)
135
+ exp = MultiIndex.from_arrays(
136
+ [
137
+ [1, 2, 3, "x", "y", "z"],
138
+ [1.1, np.nan, 3.3, "x", "y", "z"],
139
+ ["a", "b", "c", "x", "y", "z"],
140
+ dti.append(Index(["x", "y", "z"])),
141
+ dti_tz.append(Index(["x", "y", "z"])),
142
+ pi.append(Index(["x", "y", "z"])),
143
+ ]
144
+ )
145
+ tm.assert_index_equal(res, exp)
146
+
147
+
148
+ def test_iter(idx):
149
+ result = list(idx)
150
+ expected = [
151
+ ("foo", "one"),
152
+ ("foo", "two"),
153
+ ("bar", "one"),
154
+ ("baz", "two"),
155
+ ("qux", "one"),
156
+ ("qux", "two"),
157
+ ]
158
+ assert result == expected
159
+
160
+
161
+ def test_sub(idx):
162
+ first = idx
163
+
164
+ # - now raises (previously was set op difference)
165
+ msg = "cannot perform __sub__ with this index type: MultiIndex"
166
+ with pytest.raises(TypeError, match=msg):
167
+ first - idx[-3:]
168
+ with pytest.raises(TypeError, match=msg):
169
+ idx[-3:] - first
170
+ with pytest.raises(TypeError, match=msg):
171
+ idx[-3:] - first.tolist()
172
+ msg = "cannot perform __rsub__ with this index type: MultiIndex"
173
+ with pytest.raises(TypeError, match=msg):
174
+ first.tolist() - idx[-3:]
175
+
176
+
177
+ def test_map(idx):
178
+ # callable
179
+ index = idx
180
+
181
+ result = index.map(lambda x: x)
182
+ tm.assert_index_equal(result, index)
183
+
184
+
185
+ @pytest.mark.parametrize(
186
+ "mapper",
187
+ [
188
+ lambda values, idx: {i: e for e, i in zip(values, idx)},
189
+ lambda values, idx: pd.Series(values, idx),
190
+ ],
191
+ )
192
+ def test_map_dictlike(idx, mapper):
193
+ identity = mapper(idx.values, idx)
194
+
195
+ # we don't infer to uint64 dtype for a dict
196
+ if idx.dtype == np.uint64 and isinstance(identity, dict):
197
+ expected = idx.astype("int64")
198
+ else:
199
+ expected = idx
200
+
201
+ result = idx.map(identity)
202
+ tm.assert_index_equal(result, expected)
203
+
204
+ # empty mappable
205
+ expected = Index([np.nan] * len(idx))
206
+ result = idx.map(mapper(expected, idx))
207
+ tm.assert_index_equal(result, expected)
208
+
209
+
210
+ @pytest.mark.parametrize(
211
+ "func",
212
+ [
213
+ np.exp,
214
+ np.exp2,
215
+ np.expm1,
216
+ np.log,
217
+ np.log2,
218
+ np.log10,
219
+ np.log1p,
220
+ np.sqrt,
221
+ np.sin,
222
+ np.cos,
223
+ np.tan,
224
+ np.arcsin,
225
+ np.arccos,
226
+ np.arctan,
227
+ np.sinh,
228
+ np.cosh,
229
+ np.tanh,
230
+ np.arcsinh,
231
+ np.arccosh,
232
+ np.arctanh,
233
+ np.deg2rad,
234
+ np.rad2deg,
235
+ ],
236
+ ids=lambda func: func.__name__,
237
+ )
238
+ def test_numpy_ufuncs(idx, func):
239
+ # test ufuncs of numpy. see:
240
+ # https://numpy.org/doc/stable/reference/ufuncs.html
241
+
242
+ expected_exception = TypeError
243
+ msg = (
244
+ "loop of ufunc does not support argument 0 of type tuple which "
245
+ f"has no callable {func.__name__} method"
246
+ )
247
+ with pytest.raises(expected_exception, match=msg):
248
+ func(idx)
249
+
250
+
251
+ @pytest.mark.parametrize(
252
+ "func",
253
+ [np.isfinite, np.isinf, np.isnan, np.signbit],
254
+ ids=lambda func: func.__name__,
255
+ )
256
+ def test_numpy_type_funcs(idx, func):
257
+ msg = (
258
+ f"ufunc '{func.__name__}' not supported for the input types, and the inputs "
259
+ "could not be safely coerced to any supported types according to "
260
+ "the casting rule ''safe''"
261
+ )
262
+ with pytest.raises(TypeError, match=msg):
263
+ func(idx)
py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_astype.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ from pandas.core.dtypes.dtypes import CategoricalDtype
5
+
6
+ import pandas._testing as tm
7
+
8
+
9
+ def test_astype(idx):
10
+ expected = idx.copy()
11
+ actual = idx.astype("O")
12
+ tm.assert_copy(actual.levels, expected.levels)
13
+ tm.assert_copy(actual.codes, expected.codes)
14
+ assert actual.names == list(expected.names)
15
+
16
+ with pytest.raises(TypeError, match="^Setting.*dtype.*object"):
17
+ idx.astype(np.dtype(int))
18
+
19
+
20
+ @pytest.mark.parametrize("ordered", [True, False])
21
+ def test_astype_category(idx, ordered):
22
+ # GH 18630
23
+ msg = "> 1 ndim Categorical are not supported at this time"
24
+ with pytest.raises(NotImplementedError, match=msg):
25
+ idx.astype(CategoricalDtype(ordered=ordered))
26
+
27
+ if ordered is False:
28
+ # dtype='category' defaults to ordered=False, so only test once
29
+ with pytest.raises(NotImplementedError, match=msg):
30
+ idx.astype("category")
py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_constructors.py ADDED
@@ -0,0 +1,860 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import (
2
+ date,
3
+ datetime,
4
+ )
5
+ import itertools
6
+
7
+ import numpy as np
8
+ import pytest
9
+
10
+ from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
11
+
12
+ import pandas as pd
13
+ from pandas import (
14
+ Index,
15
+ MultiIndex,
16
+ Series,
17
+ Timestamp,
18
+ date_range,
19
+ )
20
+ import pandas._testing as tm
21
+
22
+
23
+ def test_constructor_single_level():
24
+ result = MultiIndex(
25
+ levels=[["foo", "bar", "baz", "qux"]], codes=[[0, 1, 2, 3]], names=["first"]
26
+ )
27
+ assert isinstance(result, MultiIndex)
28
+ expected = Index(["foo", "bar", "baz", "qux"], name="first")
29
+ tm.assert_index_equal(result.levels[0], expected)
30
+ assert result.names == ["first"]
31
+
32
+
33
+ def test_constructor_no_levels():
34
+ msg = "non-zero number of levels/codes"
35
+ with pytest.raises(ValueError, match=msg):
36
+ MultiIndex(levels=[], codes=[])
37
+
38
+ msg = "Must pass both levels and codes"
39
+ with pytest.raises(TypeError, match=msg):
40
+ MultiIndex(levels=[])
41
+ with pytest.raises(TypeError, match=msg):
42
+ MultiIndex(codes=[])
43
+
44
+
45
+ def test_constructor_nonhashable_names():
46
+ # GH 20527
47
+ levels = [[1, 2], ["one", "two"]]
48
+ codes = [[0, 0, 1, 1], [0, 1, 0, 1]]
49
+ names = (["foo"], ["bar"])
50
+ msg = r"MultiIndex\.name must be a hashable type"
51
+ with pytest.raises(TypeError, match=msg):
52
+ MultiIndex(levels=levels, codes=codes, names=names)
53
+
54
+ # With .rename()
55
+ mi = MultiIndex(
56
+ levels=[[1, 2], ["one", "two"]],
57
+ codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
58
+ names=("foo", "bar"),
59
+ )
60
+ renamed = [["fooo"], ["barr"]]
61
+ with pytest.raises(TypeError, match=msg):
62
+ mi.rename(names=renamed)
63
+
64
+ # With .set_names()
65
+ with pytest.raises(TypeError, match=msg):
66
+ mi.set_names(names=renamed)
67
+
68
+
69
+ def test_constructor_mismatched_codes_levels(idx):
70
+ codes = [np.array([1]), np.array([2]), np.array([3])]
71
+ levels = ["a"]
72
+
73
+ msg = "Length of levels and codes must be the same"
74
+ with pytest.raises(ValueError, match=msg):
75
+ MultiIndex(levels=levels, codes=codes)
76
+
77
+ length_error = (
78
+ r"On level 0, code max \(3\) >= length of level \(1\)\. "
79
+ "NOTE: this index is in an inconsistent state"
80
+ )
81
+ label_error = r"Unequal code lengths: \[4, 2\]"
82
+ code_value_error = r"On level 0, code value \(-2\) < -1"
83
+
84
+ # important to check that it's looking at the right thing.
85
+ with pytest.raises(ValueError, match=length_error):
86
+ MultiIndex(levels=[["a"], ["b"]], codes=[[0, 1, 2, 3], [0, 3, 4, 1]])
87
+
88
+ with pytest.raises(ValueError, match=label_error):
89
+ MultiIndex(levels=[["a"], ["b"]], codes=[[0, 0, 0, 0], [0, 0]])
90
+
91
+ # external API
92
+ with pytest.raises(ValueError, match=length_error):
93
+ idx.copy().set_levels([["a"], ["b"]])
94
+
95
+ with pytest.raises(ValueError, match=label_error):
96
+ idx.copy().set_codes([[0, 0, 0, 0], [0, 0]])
97
+
98
+ # test set_codes with verify_integrity=False
99
+ # the setting should not raise any value error
100
+ idx.copy().set_codes(codes=[[0, 0, 0, 0], [0, 0]], verify_integrity=False)
101
+
102
+ # code value smaller than -1
103
+ with pytest.raises(ValueError, match=code_value_error):
104
+ MultiIndex(levels=[["a"], ["b"]], codes=[[0, -2], [0, 0]])
105
+
106
+
107
+ def test_na_levels():
108
+ # GH26408
109
+ # test if codes are re-assigned value -1 for levels
110
+ # with missing values (NaN, NaT, None)
111
+ result = MultiIndex(
112
+ levels=[[np.nan, None, pd.NaT, 128, 2]], codes=[[0, -1, 1, 2, 3, 4]]
113
+ )
114
+ expected = MultiIndex(
115
+ levels=[[np.nan, None, pd.NaT, 128, 2]], codes=[[-1, -1, -1, -1, 3, 4]]
116
+ )
117
+ tm.assert_index_equal(result, expected)
118
+
119
+ result = MultiIndex(
120
+ levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[0, -1, 1, 2, 3, 4]]
121
+ )
122
+ expected = MultiIndex(
123
+ levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[-1, -1, 1, -1, 3, -1]]
124
+ )
125
+ tm.assert_index_equal(result, expected)
126
+
127
+ # verify set_levels and set_codes
128
+ result = MultiIndex(
129
+ levels=[[1, 2, 3, 4, 5]], codes=[[0, -1, 1, 2, 3, 4]]
130
+ ).set_levels([[np.nan, "s", pd.NaT, 128, None]])
131
+ tm.assert_index_equal(result, expected)
132
+
133
+ result = MultiIndex(
134
+ levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[1, 2, 2, 2, 2, 2]]
135
+ ).set_codes([[0, -1, 1, 2, 3, 4]])
136
+ tm.assert_index_equal(result, expected)
137
+
138
+
139
+ def test_copy_in_constructor():
140
+ levels = np.array(["a", "b", "c"])
141
+ codes = np.array([1, 1, 2, 0, 0, 1, 1])
142
+ val = codes[0]
143
+ mi = MultiIndex(levels=[levels, levels], codes=[codes, codes], copy=True)
144
+ assert mi.codes[0][0] == val
145
+ codes[0] = 15
146
+ assert mi.codes[0][0] == val
147
+ val = levels[0]
148
+ levels[0] = "PANDA"
149
+ assert mi.levels[0][0] == val
150
+
151
+
152
+ # ----------------------------------------------------------------------------
153
+ # from_arrays
154
+ # ----------------------------------------------------------------------------
155
+ def test_from_arrays(idx):
156
+ arrays = [
157
+ np.asarray(lev).take(level_codes)
158
+ for lev, level_codes in zip(idx.levels, idx.codes)
159
+ ]
160
+
161
+ # list of arrays as input
162
+ result = MultiIndex.from_arrays(arrays, names=idx.names)
163
+ tm.assert_index_equal(result, idx)
164
+
165
+ # infer correctly
166
+ result = MultiIndex.from_arrays([[pd.NaT, Timestamp("20130101")], ["a", "b"]])
167
+ assert result.levels[0].equals(Index([Timestamp("20130101")]))
168
+ assert result.levels[1].equals(Index(["a", "b"]))
169
+
170
+
171
+ def test_from_arrays_iterator(idx):
172
+ # GH 18434
173
+ arrays = [
174
+ np.asarray(lev).take(level_codes)
175
+ for lev, level_codes in zip(idx.levels, idx.codes)
176
+ ]
177
+
178
+ # iterator as input
179
+ result = MultiIndex.from_arrays(iter(arrays), names=idx.names)
180
+ tm.assert_index_equal(result, idx)
181
+
182
+ # invalid iterator input
183
+ msg = "Input must be a list / sequence of array-likes."
184
+ with pytest.raises(TypeError, match=msg):
185
+ MultiIndex.from_arrays(0)
186
+
187
+
188
+ def test_from_arrays_tuples(idx):
189
+ arrays = tuple(
190
+ tuple(np.asarray(lev).take(level_codes))
191
+ for lev, level_codes in zip(idx.levels, idx.codes)
192
+ )
193
+
194
+ # tuple of tuples as input
195
+ result = MultiIndex.from_arrays(arrays, names=idx.names)
196
+ tm.assert_index_equal(result, idx)
197
+
198
+
199
+ @pytest.mark.parametrize(
200
+ ("idx1", "idx2"),
201
+ [
202
+ (
203
+ pd.period_range("2011-01-01", freq="D", periods=3),
204
+ pd.period_range("2015-01-01", freq="h", periods=3),
205
+ ),
206
+ (
207
+ date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern"),
208
+ date_range("2015-01-01 10:00", freq="h", periods=3, tz="Asia/Tokyo"),
209
+ ),
210
+ (
211
+ pd.timedelta_range("1 days", freq="D", periods=3),
212
+ pd.timedelta_range("2 hours", freq="h", periods=3),
213
+ ),
214
+ ],
215
+ )
216
+ def test_from_arrays_index_series_period_datetimetz_and_timedelta(idx1, idx2):
217
+ result = MultiIndex.from_arrays([idx1, idx2])
218
+ tm.assert_index_equal(result.get_level_values(0), idx1)
219
+ tm.assert_index_equal(result.get_level_values(1), idx2)
220
+
221
+ result2 = MultiIndex.from_arrays([Series(idx1), Series(idx2)])
222
+ tm.assert_index_equal(result2.get_level_values(0), idx1)
223
+ tm.assert_index_equal(result2.get_level_values(1), idx2)
224
+
225
+ tm.assert_index_equal(result, result2)
226
+
227
+
228
+ def test_from_arrays_index_datetimelike_mixed():
229
+ idx1 = date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern")
230
+ idx2 = date_range("2015-01-01 10:00", freq="h", periods=3)
231
+ idx3 = pd.timedelta_range("1 days", freq="D", periods=3)
232
+ idx4 = pd.period_range("2011-01-01", freq="D", periods=3)
233
+
234
+ result = MultiIndex.from_arrays([idx1, idx2, idx3, idx4])
235
+ tm.assert_index_equal(result.get_level_values(0), idx1)
236
+ tm.assert_index_equal(result.get_level_values(1), idx2)
237
+ tm.assert_index_equal(result.get_level_values(2), idx3)
238
+ tm.assert_index_equal(result.get_level_values(3), idx4)
239
+
240
+ result2 = MultiIndex.from_arrays(
241
+ [Series(idx1), Series(idx2), Series(idx3), Series(idx4)]
242
+ )
243
+ tm.assert_index_equal(result2.get_level_values(0), idx1)
244
+ tm.assert_index_equal(result2.get_level_values(1), idx2)
245
+ tm.assert_index_equal(result2.get_level_values(2), idx3)
246
+ tm.assert_index_equal(result2.get_level_values(3), idx4)
247
+
248
+ tm.assert_index_equal(result, result2)
249
+
250
+
251
+ def test_from_arrays_index_series_categorical():
252
+ # GH13743
253
+ idx1 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=False)
254
+ idx2 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=True)
255
+
256
+ result = MultiIndex.from_arrays([idx1, idx2])
257
+ tm.assert_index_equal(result.get_level_values(0), idx1)
258
+ tm.assert_index_equal(result.get_level_values(1), idx2)
259
+
260
+ result2 = MultiIndex.from_arrays([Series(idx1), Series(idx2)])
261
+ tm.assert_index_equal(result2.get_level_values(0), idx1)
262
+ tm.assert_index_equal(result2.get_level_values(1), idx2)
263
+
264
+ result3 = MultiIndex.from_arrays([idx1.values, idx2.values])
265
+ tm.assert_index_equal(result3.get_level_values(0), idx1)
266
+ tm.assert_index_equal(result3.get_level_values(1), idx2)
267
+
268
+
269
+ def test_from_arrays_empty():
270
+ # 0 levels
271
+ msg = "Must pass non-zero number of levels/codes"
272
+ with pytest.raises(ValueError, match=msg):
273
+ MultiIndex.from_arrays(arrays=[])
274
+
275
+ # 1 level
276
+ result = MultiIndex.from_arrays(arrays=[[]], names=["A"])
277
+ assert isinstance(result, MultiIndex)
278
+ expected = Index([], name="A")
279
+ tm.assert_index_equal(result.levels[0], expected)
280
+ assert result.names == ["A"]
281
+
282
+ # N levels
283
+ for N in [2, 3]:
284
+ arrays = [[]] * N
285
+ names = list("ABC")[:N]
286
+ result = MultiIndex.from_arrays(arrays=arrays, names=names)
287
+ expected = MultiIndex(levels=[[]] * N, codes=[[]] * N, names=names)
288
+ tm.assert_index_equal(result, expected)
289
+
290
+
291
+ @pytest.mark.parametrize(
292
+ "invalid_sequence_of_arrays",
293
+ [
294
+ 1,
295
+ [1],
296
+ [1, 2],
297
+ [[1], 2],
298
+ [1, [2]],
299
+ "a",
300
+ ["a"],
301
+ ["a", "b"],
302
+ [["a"], "b"],
303
+ (1,),
304
+ (1, 2),
305
+ ([1], 2),
306
+ (1, [2]),
307
+ "a",
308
+ ("a",),
309
+ ("a", "b"),
310
+ (["a"], "b"),
311
+ [(1,), 2],
312
+ [1, (2,)],
313
+ [("a",), "b"],
314
+ ((1,), 2),
315
+ (1, (2,)),
316
+ (("a",), "b"),
317
+ ],
318
+ )
319
+ def test_from_arrays_invalid_input(invalid_sequence_of_arrays):
320
+ msg = "Input must be a list / sequence of array-likes"
321
+ with pytest.raises(TypeError, match=msg):
322
+ MultiIndex.from_arrays(arrays=invalid_sequence_of_arrays)
323
+
324
+
325
+ @pytest.mark.parametrize(
326
+ "idx1, idx2", [([1, 2, 3], ["a", "b"]), ([], ["a", "b"]), ([1, 2, 3], [])]
327
+ )
328
+ def test_from_arrays_different_lengths(idx1, idx2):
329
+ # see gh-13599
330
+ msg = "^all arrays must be same length$"
331
+ with pytest.raises(ValueError, match=msg):
332
+ MultiIndex.from_arrays([idx1, idx2])
333
+
334
+
335
+ def test_from_arrays_respects_none_names():
336
+ # GH27292
337
+ a = Series([1, 2, 3], name="foo")
338
+ b = Series(["a", "b", "c"], name="bar")
339
+
340
+ result = MultiIndex.from_arrays([a, b], names=None)
341
+ expected = MultiIndex(
342
+ levels=[[1, 2, 3], ["a", "b", "c"]], codes=[[0, 1, 2], [0, 1, 2]], names=None
343
+ )
344
+
345
+ tm.assert_index_equal(result, expected)
346
+
347
+
348
+ # ----------------------------------------------------------------------------
349
+ # from_tuples
350
+ # ----------------------------------------------------------------------------
351
+ def test_from_tuples():
352
+ msg = "Cannot infer number of levels from empty list"
353
+ with pytest.raises(TypeError, match=msg):
354
+ MultiIndex.from_tuples([])
355
+
356
+ expected = MultiIndex(
357
+ levels=[[1, 3], [2, 4]], codes=[[0, 1], [0, 1]], names=["a", "b"]
358
+ )
359
+
360
+ # input tuples
361
+ result = MultiIndex.from_tuples(((1, 2), (3, 4)), names=["a", "b"])
362
+ tm.assert_index_equal(result, expected)
363
+
364
+
365
+ def test_from_tuples_iterator():
366
+ # GH 18434
367
+ # input iterator for tuples
368
+ expected = MultiIndex(
369
+ levels=[[1, 3], [2, 4]], codes=[[0, 1], [0, 1]], names=["a", "b"]
370
+ )
371
+
372
+ result = MultiIndex.from_tuples(zip([1, 3], [2, 4]), names=["a", "b"])
373
+ tm.assert_index_equal(result, expected)
374
+
375
+ # input non-iterables
376
+ msg = "Input must be a list / sequence of tuple-likes."
377
+ with pytest.raises(TypeError, match=msg):
378
+ MultiIndex.from_tuples(0)
379
+
380
+
381
+ def test_from_tuples_empty():
382
+ # GH 16777
383
+ result = MultiIndex.from_tuples([], names=["a", "b"])
384
+ expected = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"])
385
+ tm.assert_index_equal(result, expected)
386
+
387
+
388
+ def test_from_tuples_index_values(idx):
389
+ result = MultiIndex.from_tuples(idx)
390
+ assert (result.values == idx.values).all()
391
+
392
+
393
+ def test_tuples_with_name_string():
394
+ # GH 15110 and GH 14848
395
+
396
+ li = [(0, 0, 1), (0, 1, 0), (1, 0, 0)]
397
+ msg = "Names should be list-like for a MultiIndex"
398
+ with pytest.raises(ValueError, match=msg):
399
+ Index(li, name="abc")
400
+ with pytest.raises(ValueError, match=msg):
401
+ Index(li, name="a")
402
+
403
+
404
+ def test_from_tuples_with_tuple_label():
405
+ # GH 15457
406
+ expected = pd.DataFrame(
407
+ [[2, 1, 2], [4, (1, 2), 3]], columns=["a", "b", "c"]
408
+ ).set_index(["a", "b"])
409
+ idx = MultiIndex.from_tuples([(2, 1), (4, (1, 2))], names=("a", "b"))
410
+ result = pd.DataFrame([2, 3], columns=["c"], index=idx)
411
+ tm.assert_frame_equal(expected, result)
412
+
413
+
414
+ # ----------------------------------------------------------------------------
415
+ # from_product
416
+ # ----------------------------------------------------------------------------
417
+ def test_from_product_empty_zero_levels():
418
+ # 0 levels
419
+ msg = "Must pass non-zero number of levels/codes"
420
+ with pytest.raises(ValueError, match=msg):
421
+ MultiIndex.from_product([])
422
+
423
+
424
+ def test_from_product_empty_one_level():
425
+ result = MultiIndex.from_product([[]], names=["A"])
426
+ expected = Index([], name="A")
427
+ tm.assert_index_equal(result.levels[0], expected)
428
+ assert result.names == ["A"]
429
+
430
+
431
+ @pytest.mark.parametrize(
432
+ "first, second", [([], []), (["foo", "bar", "baz"], []), ([], ["a", "b", "c"])]
433
+ )
434
+ def test_from_product_empty_two_levels(first, second):
435
+ names = ["A", "B"]
436
+ result = MultiIndex.from_product([first, second], names=names)
437
+ expected = MultiIndex(levels=[first, second], codes=[[], []], names=names)
438
+ tm.assert_index_equal(result, expected)
439
+
440
+
441
+ @pytest.mark.parametrize("N", list(range(4)))
442
+ def test_from_product_empty_three_levels(N):
443
+ # GH12258
444
+ names = ["A", "B", "C"]
445
+ lvl2 = list(range(N))
446
+ result = MultiIndex.from_product([[], lvl2, []], names=names)
447
+ expected = MultiIndex(levels=[[], lvl2, []], codes=[[], [], []], names=names)
448
+ tm.assert_index_equal(result, expected)
449
+
450
+
451
+ @pytest.mark.parametrize(
452
+ "invalid_input", [1, [1], [1, 2], [[1], 2], "a", ["a"], ["a", "b"], [["a"], "b"]]
453
+ )
454
+ def test_from_product_invalid_input(invalid_input):
455
+ msg = r"Input must be a list / sequence of iterables|Input must be list-like"
456
+ with pytest.raises(TypeError, match=msg):
457
+ MultiIndex.from_product(iterables=invalid_input)
458
+
459
+
460
+ def test_from_product_datetimeindex():
461
+ dt_index = date_range("2000-01-01", periods=2)
462
+ mi = MultiIndex.from_product([[1, 2], dt_index])
463
+ etalon = construct_1d_object_array_from_listlike(
464
+ [
465
+ (1, Timestamp("2000-01-01")),
466
+ (1, Timestamp("2000-01-02")),
467
+ (2, Timestamp("2000-01-01")),
468
+ (2, Timestamp("2000-01-02")),
469
+ ]
470
+ )
471
+ tm.assert_numpy_array_equal(mi.values, etalon)
472
+
473
+
474
+ def test_from_product_rangeindex():
475
+ # RangeIndex is preserved by factorize, so preserved in levels
476
+ rng = Index(range(5))
477
+ other = ["a", "b"]
478
+ mi = MultiIndex.from_product([rng, other])
479
+ tm.assert_index_equal(mi._levels[0], rng, exact=True)
480
+
481
+
482
+ @pytest.mark.parametrize("ordered", [False, True])
483
+ @pytest.mark.parametrize("f", [lambda x: x, lambda x: Series(x), lambda x: x.values])
484
+ def test_from_product_index_series_categorical(ordered, f):
485
+ # GH13743
486
+ first = ["foo", "bar"]
487
+
488
+ idx = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=ordered)
489
+ expected = pd.CategoricalIndex(
490
+ list("abcaab") + list("abcaab"), categories=list("bac"), ordered=ordered
491
+ )
492
+
493
+ result = MultiIndex.from_product([first, f(idx)])
494
+ tm.assert_index_equal(result.get_level_values(1), expected)
495
+
496
+
497
+ def test_from_product():
498
+ first = ["foo", "bar", "buz"]
499
+ second = ["a", "b", "c"]
500
+ names = ["first", "second"]
501
+ result = MultiIndex.from_product([first, second], names=names)
502
+
503
+ tuples = [
504
+ ("foo", "a"),
505
+ ("foo", "b"),
506
+ ("foo", "c"),
507
+ ("bar", "a"),
508
+ ("bar", "b"),
509
+ ("bar", "c"),
510
+ ("buz", "a"),
511
+ ("buz", "b"),
512
+ ("buz", "c"),
513
+ ]
514
+ expected = MultiIndex.from_tuples(tuples, names=names)
515
+
516
+ tm.assert_index_equal(result, expected)
517
+
518
+
519
+ def test_from_product_iterator():
520
+ # GH 18434
521
+ first = ["foo", "bar", "buz"]
522
+ second = ["a", "b", "c"]
523
+ names = ["first", "second"]
524
+ tuples = [
525
+ ("foo", "a"),
526
+ ("foo", "b"),
527
+ ("foo", "c"),
528
+ ("bar", "a"),
529
+ ("bar", "b"),
530
+ ("bar", "c"),
531
+ ("buz", "a"),
532
+ ("buz", "b"),
533
+ ("buz", "c"),
534
+ ]
535
+ expected = MultiIndex.from_tuples(tuples, names=names)
536
+
537
+ # iterator as input
538
+ result = MultiIndex.from_product(iter([first, second]), names=names)
539
+ tm.assert_index_equal(result, expected)
540
+
541
+ # Invalid non-iterable input
542
+ msg = "Input must be a list / sequence of iterables."
543
+ with pytest.raises(TypeError, match=msg):
544
+ MultiIndex.from_product(0)
545
+
546
+
547
+ @pytest.mark.parametrize(
548
+ "a, b, expected_names",
549
+ [
550
+ (
551
+ Series([1, 2, 3], name="foo"),
552
+ Series(["a", "b"], name="bar"),
553
+ ["foo", "bar"],
554
+ ),
555
+ (Series([1, 2, 3], name="foo"), ["a", "b"], ["foo", None]),
556
+ ([1, 2, 3], ["a", "b"], None),
557
+ ],
558
+ )
559
+ def test_from_product_infer_names(a, b, expected_names):
560
+ # GH27292
561
+ result = MultiIndex.from_product([a, b])
562
+ expected = MultiIndex(
563
+ levels=[[1, 2, 3], ["a", "b"]],
564
+ codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
565
+ names=expected_names,
566
+ )
567
+ tm.assert_index_equal(result, expected)
568
+
569
+
570
+ def test_from_product_respects_none_names():
571
+ # GH27292
572
+ a = Series([1, 2, 3], name="foo")
573
+ b = Series(["a", "b"], name="bar")
574
+
575
+ result = MultiIndex.from_product([a, b], names=None)
576
+ expected = MultiIndex(
577
+ levels=[[1, 2, 3], ["a", "b"]],
578
+ codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
579
+ names=None,
580
+ )
581
+ tm.assert_index_equal(result, expected)
582
+
583
+
584
+ def test_from_product_readonly():
585
+ # GH#15286 passing read-only array to from_product
586
+ a = np.array(range(3))
587
+ b = ["a", "b"]
588
+ expected = MultiIndex.from_product([a, b])
589
+
590
+ a.setflags(write=False)
591
+ result = MultiIndex.from_product([a, b])
592
+ tm.assert_index_equal(result, expected)
593
+
594
+
595
+ def test_create_index_existing_name(idx):
596
+ # GH11193, when an existing index is passed, and a new name is not
597
+ # specified, the new index should inherit the previous object name
598
+ index = idx
599
+ index.names = ["foo", "bar"]
600
+ result = Index(index)
601
+ expected = Index(
602
+ Index(
603
+ [
604
+ ("foo", "one"),
605
+ ("foo", "two"),
606
+ ("bar", "one"),
607
+ ("baz", "two"),
608
+ ("qux", "one"),
609
+ ("qux", "two"),
610
+ ],
611
+ dtype="object",
612
+ )
613
+ )
614
+ tm.assert_index_equal(result, expected)
615
+
616
+ result = Index(index, name="A")
617
+ expected = Index(
618
+ Index(
619
+ [
620
+ ("foo", "one"),
621
+ ("foo", "two"),
622
+ ("bar", "one"),
623
+ ("baz", "two"),
624
+ ("qux", "one"),
625
+ ("qux", "two"),
626
+ ],
627
+ dtype="object",
628
+ ),
629
+ name="A",
630
+ )
631
+ tm.assert_index_equal(result, expected)
632
+
633
+
634
+ # ----------------------------------------------------------------------------
635
+ # from_frame
636
+ # ----------------------------------------------------------------------------
637
+ def test_from_frame():
638
+ # GH 22420
639
+ df = pd.DataFrame(
640
+ [["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]], columns=["L1", "L2"]
641
+ )
642
+ expected = MultiIndex.from_tuples(
643
+ [("a", "a"), ("a", "b"), ("b", "a"), ("b", "b")], names=["L1", "L2"]
644
+ )
645
+ result = MultiIndex.from_frame(df)
646
+ tm.assert_index_equal(expected, result)
647
+
648
+
649
+ def test_from_frame_missing_values_multiIndex():
650
+ # GH 39984
651
+ pa = pytest.importorskip("pyarrow")
652
+
653
+ df = pd.DataFrame(
654
+ {
655
+ "a": Series([1, 2, None], dtype="Int64"),
656
+ "b": pd.Float64Dtype().__from_arrow__(pa.array([0.2, np.nan, None])),
657
+ }
658
+ )
659
+ multi_indexed = MultiIndex.from_frame(df)
660
+ expected = MultiIndex.from_arrays(
661
+ [
662
+ Series([1, 2, None]).astype("Int64"),
663
+ pd.Float64Dtype().__from_arrow__(pa.array([0.2, np.nan, None])),
664
+ ],
665
+ names=["a", "b"],
666
+ )
667
+ tm.assert_index_equal(multi_indexed, expected)
668
+
669
+
670
+ @pytest.mark.parametrize(
671
+ "non_frame",
672
+ [
673
+ Series([1, 2, 3, 4]),
674
+ [1, 2, 3, 4],
675
+ [[1, 2], [3, 4], [5, 6]],
676
+ Index([1, 2, 3, 4]),
677
+ np.array([[1, 2], [3, 4], [5, 6]]),
678
+ 27,
679
+ ],
680
+ )
681
+ def test_from_frame_error(non_frame):
682
+ # GH 22420
683
+ with pytest.raises(TypeError, match="Input must be a DataFrame"):
684
+ MultiIndex.from_frame(non_frame)
685
+
686
+
687
+ def test_from_frame_dtype_fidelity():
688
+ # GH 22420
689
+ df = pd.DataFrame(
690
+ {
691
+ "dates": date_range("19910905", periods=6, tz="US/Eastern"),
692
+ "a": [1, 1, 1, 2, 2, 2],
693
+ "b": pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
694
+ "c": ["x", "x", "y", "z", "x", "y"],
695
+ }
696
+ )
697
+ original_dtypes = df.dtypes.to_dict()
698
+
699
+ expected_mi = MultiIndex.from_arrays(
700
+ [
701
+ date_range("19910905", periods=6, tz="US/Eastern"),
702
+ [1, 1, 1, 2, 2, 2],
703
+ pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
704
+ ["x", "x", "y", "z", "x", "y"],
705
+ ],
706
+ names=["dates", "a", "b", "c"],
707
+ )
708
+ mi = MultiIndex.from_frame(df)
709
+ mi_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)}
710
+
711
+ tm.assert_index_equal(expected_mi, mi)
712
+ assert original_dtypes == mi_dtypes
713
+
714
+
715
+ @pytest.mark.parametrize(
716
+ "names_in,names_out", [(None, [("L1", "x"), ("L2", "y")]), (["x", "y"], ["x", "y"])]
717
+ )
718
+ def test_from_frame_valid_names(names_in, names_out):
719
+ # GH 22420
720
+ df = pd.DataFrame(
721
+ [["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]],
722
+ columns=MultiIndex.from_tuples([("L1", "x"), ("L2", "y")]),
723
+ )
724
+ mi = MultiIndex.from_frame(df, names=names_in)
725
+ assert mi.names == names_out
726
+
727
+
728
+ @pytest.mark.parametrize(
729
+ "names,expected_error_msg",
730
+ [
731
+ ("bad_input", "Names should be list-like for a MultiIndex"),
732
+ (["a", "b", "c"], "Length of names must match number of levels in MultiIndex"),
733
+ ],
734
+ )
735
+ def test_from_frame_invalid_names(names, expected_error_msg):
736
+ # GH 22420
737
+ df = pd.DataFrame(
738
+ [["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]],
739
+ columns=MultiIndex.from_tuples([("L1", "x"), ("L2", "y")]),
740
+ )
741
+ with pytest.raises(ValueError, match=expected_error_msg):
742
+ MultiIndex.from_frame(df, names=names)
743
+
744
+
745
+ def test_index_equal_empty_iterable():
746
+ # #16844
747
+ a = MultiIndex(levels=[[], []], codes=[[], []], names=["a", "b"])
748
+ b = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"])
749
+ tm.assert_index_equal(a, b)
750
+
751
+
752
+ def test_raise_invalid_sortorder():
753
+ # Test that the MultiIndex constructor raise when a incorrect sortorder is given
754
+ # GH#28518
755
+
756
+ levels = [[0, 1], [0, 1, 2]]
757
+
758
+ # Correct sortorder
759
+ MultiIndex(
760
+ levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2
761
+ )
762
+
763
+ with pytest.raises(ValueError, match=r".* sortorder 2 with lexsort_depth 1.*"):
764
+ MultiIndex(
765
+ levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=2
766
+ )
767
+
768
+ with pytest.raises(ValueError, match=r".* sortorder 1 with lexsort_depth 0.*"):
769
+ MultiIndex(
770
+ levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=1
771
+ )
772
+
773
+
774
+ def test_datetimeindex():
775
+ idx1 = pd.DatetimeIndex(
776
+ ["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"] * 2, tz="Asia/Tokyo"
777
+ )
778
+ idx2 = date_range("2010/01/01", periods=6, freq="ME", tz="US/Eastern")
779
+ idx = MultiIndex.from_arrays([idx1, idx2])
780
+
781
+ expected1 = pd.DatetimeIndex(
782
+ ["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"], tz="Asia/Tokyo"
783
+ )
784
+
785
+ tm.assert_index_equal(idx.levels[0], expected1)
786
+ tm.assert_index_equal(idx.levels[1], idx2)
787
+
788
+ # from datetime combos
789
+ # GH 7888
790
+ date1 = np.datetime64("today")
791
+ date2 = datetime.today()
792
+ date3 = Timestamp.today()
793
+
794
+ for d1, d2 in itertools.product([date1, date2, date3], [date1, date2, date3]):
795
+ index = MultiIndex.from_product([[d1], [d2]])
796
+ assert isinstance(index.levels[0], pd.DatetimeIndex)
797
+ assert isinstance(index.levels[1], pd.DatetimeIndex)
798
+
799
+ # but NOT date objects, matching Index behavior
800
+ date4 = date.today()
801
+ index = MultiIndex.from_product([[date4], [date2]])
802
+ assert not isinstance(index.levels[0], pd.DatetimeIndex)
803
+ assert isinstance(index.levels[1], pd.DatetimeIndex)
804
+
805
+
806
+ def test_constructor_with_tz():
807
+ index = pd.DatetimeIndex(
808
+ ["2013/01/01 09:00", "2013/01/02 09:00"], name="dt1", tz="US/Pacific"
809
+ )
810
+ columns = pd.DatetimeIndex(
811
+ ["2014/01/01 09:00", "2014/01/02 09:00"], name="dt2", tz="Asia/Tokyo"
812
+ )
813
+
814
+ result = MultiIndex.from_arrays([index, columns])
815
+
816
+ assert result.names == ["dt1", "dt2"]
817
+ tm.assert_index_equal(result.levels[0], index)
818
+ tm.assert_index_equal(result.levels[1], columns)
819
+
820
+ result = MultiIndex.from_arrays([Series(index), Series(columns)])
821
+
822
+ assert result.names == ["dt1", "dt2"]
823
+ tm.assert_index_equal(result.levels[0], index)
824
+ tm.assert_index_equal(result.levels[1], columns)
825
+
826
+
827
+ def test_multiindex_inference_consistency():
828
+ # check that inference behavior matches the base class
829
+
830
+ v = date.today()
831
+
832
+ arr = [v, v]
833
+
834
+ idx = Index(arr)
835
+ assert idx.dtype == object
836
+
837
+ mi = MultiIndex.from_arrays([arr])
838
+ lev = mi.levels[0]
839
+ assert lev.dtype == object
840
+
841
+ mi = MultiIndex.from_product([arr])
842
+ lev = mi.levels[0]
843
+ assert lev.dtype == object
844
+
845
+ mi = MultiIndex.from_tuples([(x,) for x in arr])
846
+ lev = mi.levels[0]
847
+ assert lev.dtype == object
848
+
849
+
850
+ def test_dtype_representation(using_infer_string):
851
+ # GH#46900
852
+ pmidx = MultiIndex.from_arrays([[1], ["a"]], names=[("a", "b"), ("c", "d")])
853
+ result = pmidx.dtypes
854
+ exp = "object" if not using_infer_string else pd.StringDtype(na_value=np.nan)
855
+ expected = Series(
856
+ ["int64", exp],
857
+ index=MultiIndex.from_tuples([("a", "b"), ("c", "d")]),
858
+ dtype=object,
859
+ )
860
+ tm.assert_series_equal(result, expected)
py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_conversion.py ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ from pandas.compat.numpy import np_version_gt2
5
+
6
+ import pandas as pd
7
+ from pandas import (
8
+ DataFrame,
9
+ MultiIndex,
10
+ )
11
+ import pandas._testing as tm
12
+
13
+
14
+ def test_to_numpy(idx):
15
+ result = idx.to_numpy()
16
+ exp = idx.values
17
+ tm.assert_numpy_array_equal(result, exp)
18
+
19
+
20
+ def test_array_interface(idx):
21
+ # https://github.com/pandas-dev/pandas/pull/60046
22
+ result = np.asarray(idx)
23
+ expected = np.empty((6,), dtype=object)
24
+ expected[:] = [
25
+ ("foo", "one"),
26
+ ("foo", "two"),
27
+ ("bar", "one"),
28
+ ("baz", "two"),
29
+ ("qux", "one"),
30
+ ("qux", "two"),
31
+ ]
32
+ tm.assert_numpy_array_equal(result, expected)
33
+
34
+ # it always gives a copy by default, but the values are cached, so results
35
+ # are still sharing memory
36
+ result_copy1 = np.asarray(idx)
37
+ result_copy2 = np.asarray(idx)
38
+ assert np.may_share_memory(result_copy1, result_copy2)
39
+
40
+ # with explicit copy=True, then it is an actual copy
41
+ result_copy1 = np.array(idx, copy=True)
42
+ result_copy2 = np.array(idx, copy=True)
43
+ assert not np.may_share_memory(result_copy1, result_copy2)
44
+
45
+ if not np_version_gt2:
46
+ # copy=False semantics are only supported in NumPy>=2.
47
+ return
48
+
49
+ # for MultiIndex, copy=False is never allowed
50
+ msg = "Starting with NumPy 2.0, the behavior of the 'copy' keyword has changed"
51
+ with tm.assert_produces_warning(FutureWarning, match=msg):
52
+ np.array(idx, copy=False)
53
+
54
+
55
+ def test_to_frame():
56
+ tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")]
57
+
58
+ index = MultiIndex.from_tuples(tuples)
59
+ result = index.to_frame(index=False)
60
+ expected = DataFrame(tuples)
61
+ tm.assert_frame_equal(result, expected)
62
+
63
+ result = index.to_frame()
64
+ expected.index = index
65
+ tm.assert_frame_equal(result, expected)
66
+
67
+ tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")]
68
+ index = MultiIndex.from_tuples(tuples, names=["first", "second"])
69
+ result = index.to_frame(index=False)
70
+ expected = DataFrame(tuples)
71
+ expected.columns = ["first", "second"]
72
+ tm.assert_frame_equal(result, expected)
73
+
74
+ result = index.to_frame()
75
+ expected.index = index
76
+ tm.assert_frame_equal(result, expected)
77
+
78
+ # See GH-22580
79
+ index = MultiIndex.from_tuples(tuples)
80
+ result = index.to_frame(index=False, name=["first", "second"])
81
+ expected = DataFrame(tuples)
82
+ expected.columns = ["first", "second"]
83
+ tm.assert_frame_equal(result, expected)
84
+
85
+ result = index.to_frame(name=["first", "second"])
86
+ expected.index = index
87
+ expected.columns = ["first", "second"]
88
+ tm.assert_frame_equal(result, expected)
89
+
90
+ msg = "'name' must be a list / sequence of column names."
91
+ with pytest.raises(TypeError, match=msg):
92
+ index.to_frame(name="first")
93
+
94
+ msg = "'name' should have same length as number of levels on index."
95
+ with pytest.raises(ValueError, match=msg):
96
+ index.to_frame(name=["first"])
97
+
98
+ # Tests for datetime index
99
+ index = MultiIndex.from_product([range(5), pd.date_range("20130101", periods=3)])
100
+ result = index.to_frame(index=False)
101
+ expected = DataFrame(
102
+ {
103
+ 0: np.repeat(np.arange(5, dtype="int64"), 3),
104
+ 1: np.tile(pd.date_range("20130101", periods=3), 5),
105
+ }
106
+ )
107
+ tm.assert_frame_equal(result, expected)
108
+
109
+ result = index.to_frame()
110
+ expected.index = index
111
+ tm.assert_frame_equal(result, expected)
112
+
113
+ # See GH-22580
114
+ result = index.to_frame(index=False, name=["first", "second"])
115
+ expected = DataFrame(
116
+ {
117
+ "first": np.repeat(np.arange(5, dtype="int64"), 3),
118
+ "second": np.tile(pd.date_range("20130101", periods=3), 5),
119
+ }
120
+ )
121
+ tm.assert_frame_equal(result, expected)
122
+
123
+ result = index.to_frame(name=["first", "second"])
124
+ expected.index = index
125
+ tm.assert_frame_equal(result, expected)
126
+
127
+
128
+ def test_to_frame_dtype_fidelity():
129
+ # GH 22420
130
+ mi = MultiIndex.from_arrays(
131
+ [
132
+ pd.date_range("19910905", periods=6, tz="US/Eastern"),
133
+ [1, 1, 1, 2, 2, 2],
134
+ pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
135
+ ["x", "x", "y", "z", "x", "y"],
136
+ ],
137
+ names=["dates", "a", "b", "c"],
138
+ )
139
+ original_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)}
140
+
141
+ expected_df = DataFrame(
142
+ {
143
+ "dates": pd.date_range("19910905", periods=6, tz="US/Eastern"),
144
+ "a": [1, 1, 1, 2, 2, 2],
145
+ "b": pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
146
+ "c": ["x", "x", "y", "z", "x", "y"],
147
+ }
148
+ )
149
+ df = mi.to_frame(index=False)
150
+ df_dtypes = df.dtypes.to_dict()
151
+
152
+ tm.assert_frame_equal(df, expected_df)
153
+ assert original_dtypes == df_dtypes
154
+
155
+
156
+ def test_to_frame_resulting_column_order():
157
+ # GH 22420
158
+ expected = ["z", 0, "a"]
159
+ mi = MultiIndex.from_arrays(
160
+ [["a", "b", "c"], ["x", "y", "z"], ["q", "w", "e"]], names=expected
161
+ )
162
+ result = mi.to_frame().columns.tolist()
163
+ assert result == expected
164
+
165
+
166
+ def test_to_frame_duplicate_labels():
167
+ # GH 45245
168
+ data = [(1, 2), (3, 4)]
169
+ names = ["a", "a"]
170
+ index = MultiIndex.from_tuples(data, names=names)
171
+ with pytest.raises(ValueError, match="Cannot create duplicate column labels"):
172
+ index.to_frame()
173
+
174
+ result = index.to_frame(allow_duplicates=True)
175
+ expected = DataFrame(data, index=index, columns=names)
176
+ tm.assert_frame_equal(result, expected)
177
+
178
+ names = [None, 0]
179
+ index = MultiIndex.from_tuples(data, names=names)
180
+ with pytest.raises(ValueError, match="Cannot create duplicate column labels"):
181
+ index.to_frame()
182
+
183
+ result = index.to_frame(allow_duplicates=True)
184
+ expected = DataFrame(data, index=index, columns=[0, 0])
185
+ tm.assert_frame_equal(result, expected)
186
+
187
+
188
+ def test_to_flat_index(idx):
189
+ expected = pd.Index(
190
+ (
191
+ ("foo", "one"),
192
+ ("foo", "two"),
193
+ ("bar", "one"),
194
+ ("baz", "two"),
195
+ ("qux", "one"),
196
+ ("qux", "two"),
197
+ ),
198
+ tupleize_cols=False,
199
+ )
200
+ result = idx.to_flat_index()
201
+ tm.assert_index_equal(result, expected)
py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_copy.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from copy import (
2
+ copy,
3
+ deepcopy,
4
+ )
5
+
6
+ import pytest
7
+
8
+ from pandas import MultiIndex
9
+ import pandas._testing as tm
10
+
11
+
12
+ def assert_multiindex_copied(copy, original):
13
+ # Levels should be (at least, shallow copied)
14
+ tm.assert_copy(copy.levels, original.levels)
15
+ tm.assert_almost_equal(copy.codes, original.codes)
16
+
17
+ # Labels doesn't matter which way copied
18
+ tm.assert_almost_equal(copy.codes, original.codes)
19
+ assert copy.codes is not original.codes
20
+
21
+ # Names doesn't matter which way copied
22
+ assert copy.names == original.names
23
+ assert copy.names is not original.names
24
+
25
+ # Sort order should be copied
26
+ assert copy.sortorder == original.sortorder
27
+
28
+
29
+ def test_copy(idx):
30
+ i_copy = idx.copy()
31
+
32
+ assert_multiindex_copied(i_copy, idx)
33
+
34
+
35
+ def test_shallow_copy(idx):
36
+ i_copy = idx._view()
37
+
38
+ assert_multiindex_copied(i_copy, idx)
39
+
40
+
41
+ def test_view(idx):
42
+ i_view = idx.view()
43
+ assert_multiindex_copied(i_view, idx)
44
+
45
+
46
+ @pytest.mark.parametrize("func", [copy, deepcopy])
47
+ def test_copy_and_deepcopy(func):
48
+ idx = MultiIndex(
49
+ levels=[["foo", "bar"], ["fizz", "buzz"]],
50
+ codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
51
+ names=["first", "second"],
52
+ )
53
+ idx_copy = func(idx)
54
+ assert idx_copy is not idx
55
+ assert idx_copy.equals(idx)
56
+
57
+
58
+ @pytest.mark.parametrize("deep", [True, False])
59
+ def test_copy_method(deep):
60
+ idx = MultiIndex(
61
+ levels=[["foo", "bar"], ["fizz", "buzz"]],
62
+ codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
63
+ names=["first", "second"],
64
+ )
65
+ idx_copy = idx.copy(deep=deep)
66
+ assert idx_copy.equals(idx)
67
+
68
+
69
+ @pytest.mark.parametrize("deep", [True, False])
70
+ @pytest.mark.parametrize(
71
+ "kwarg, value",
72
+ [
73
+ ("names", ["third", "fourth"]),
74
+ ],
75
+ )
76
+ def test_copy_method_kwargs(deep, kwarg, value):
77
+ # gh-12309: Check that the "name" argument as well other kwargs are honored
78
+ idx = MultiIndex(
79
+ levels=[["foo", "bar"], ["fizz", "buzz"]],
80
+ codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
81
+ names=["first", "second"],
82
+ )
83
+ idx_copy = idx.copy(**{kwarg: value, "deep": deep})
84
+ assert getattr(idx_copy, kwarg) == value
85
+
86
+
87
+ def test_copy_deep_false_retains_id():
88
+ # GH#47878
89
+ idx = MultiIndex(
90
+ levels=[["foo", "bar"], ["fizz", "buzz"]],
91
+ codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
92
+ names=["first", "second"],
93
+ )
94
+
95
+ res = idx.copy(deep=False)
96
+ assert res._id is idx._id
py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_drop.py ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ from pandas.errors import PerformanceWarning
5
+
6
+ import pandas as pd
7
+ from pandas import (
8
+ Index,
9
+ MultiIndex,
10
+ )
11
+ import pandas._testing as tm
12
+
13
+
14
+ def test_drop(idx):
15
+ dropped = idx.drop([("foo", "two"), ("qux", "one")])
16
+
17
+ index = MultiIndex.from_tuples([("foo", "two"), ("qux", "one")])
18
+ dropped2 = idx.drop(index)
19
+
20
+ expected = idx[[0, 2, 3, 5]]
21
+ tm.assert_index_equal(dropped, expected)
22
+ tm.assert_index_equal(dropped2, expected)
23
+
24
+ dropped = idx.drop(["bar"])
25
+ expected = idx[[0, 1, 3, 4, 5]]
26
+ tm.assert_index_equal(dropped, expected)
27
+
28
+ dropped = idx.drop("foo")
29
+ expected = idx[[2, 3, 4, 5]]
30
+ tm.assert_index_equal(dropped, expected)
31
+
32
+ index = MultiIndex.from_tuples([("bar", "two")])
33
+ with pytest.raises(KeyError, match=r"^\('bar', 'two'\)$"):
34
+ idx.drop([("bar", "two")])
35
+ with pytest.raises(KeyError, match=r"^\('bar', 'two'\)$"):
36
+ idx.drop(index)
37
+ with pytest.raises(KeyError, match=r"^'two'$"):
38
+ idx.drop(["foo", "two"])
39
+
40
+ # partially correct argument
41
+ mixed_index = MultiIndex.from_tuples([("qux", "one"), ("bar", "two")])
42
+ with pytest.raises(KeyError, match=r"^\('bar', 'two'\)$"):
43
+ idx.drop(mixed_index)
44
+
45
+ # error='ignore'
46
+ dropped = idx.drop(index, errors="ignore")
47
+ expected = idx[[0, 1, 2, 3, 4, 5]]
48
+ tm.assert_index_equal(dropped, expected)
49
+
50
+ dropped = idx.drop(mixed_index, errors="ignore")
51
+ expected = idx[[0, 1, 2, 3, 5]]
52
+ tm.assert_index_equal(dropped, expected)
53
+
54
+ dropped = idx.drop(["foo", "two"], errors="ignore")
55
+ expected = idx[[2, 3, 4, 5]]
56
+ tm.assert_index_equal(dropped, expected)
57
+
58
+ # mixed partial / full drop
59
+ dropped = idx.drop(["foo", ("qux", "one")])
60
+ expected = idx[[2, 3, 5]]
61
+ tm.assert_index_equal(dropped, expected)
62
+
63
+ # mixed partial / full drop / error='ignore'
64
+ mixed_index = ["foo", ("qux", "one"), "two"]
65
+ with pytest.raises(KeyError, match=r"^'two'$"):
66
+ idx.drop(mixed_index)
67
+ dropped = idx.drop(mixed_index, errors="ignore")
68
+ expected = idx[[2, 3, 5]]
69
+ tm.assert_index_equal(dropped, expected)
70
+
71
+
72
+ def test_droplevel_with_names(idx):
73
+ index = idx[idx.get_loc("foo")]
74
+ dropped = index.droplevel(0)
75
+ assert dropped.name == "second"
76
+
77
+ index = MultiIndex(
78
+ levels=[Index(range(4)), Index(range(4)), Index(range(4))],
79
+ codes=[
80
+ np.array([0, 0, 1, 2, 2, 2, 3, 3]),
81
+ np.array([0, 1, 0, 0, 0, 1, 0, 1]),
82
+ np.array([1, 0, 1, 1, 0, 0, 1, 0]),
83
+ ],
84
+ names=["one", "two", "three"],
85
+ )
86
+ dropped = index.droplevel(0)
87
+ assert dropped.names == ("two", "three")
88
+
89
+ dropped = index.droplevel("two")
90
+ expected = index.droplevel(1)
91
+ assert dropped.equals(expected)
92
+
93
+
94
+ def test_droplevel_list():
95
+ index = MultiIndex(
96
+ levels=[Index(range(4)), Index(range(4)), Index(range(4))],
97
+ codes=[
98
+ np.array([0, 0, 1, 2, 2, 2, 3, 3]),
99
+ np.array([0, 1, 0, 0, 0, 1, 0, 1]),
100
+ np.array([1, 0, 1, 1, 0, 0, 1, 0]),
101
+ ],
102
+ names=["one", "two", "three"],
103
+ )
104
+
105
+ dropped = index[:2].droplevel(["three", "one"])
106
+ expected = index[:2].droplevel(2).droplevel(0)
107
+ assert dropped.equals(expected)
108
+
109
+ dropped = index[:2].droplevel([])
110
+ expected = index[:2]
111
+ assert dropped.equals(expected)
112
+
113
+ msg = (
114
+ "Cannot remove 3 levels from an index with 3 levels: "
115
+ "at least one level must be left"
116
+ )
117
+ with pytest.raises(ValueError, match=msg):
118
+ index[:2].droplevel(["one", "two", "three"])
119
+
120
+ with pytest.raises(KeyError, match="'Level four not found'"):
121
+ index[:2].droplevel(["one", "four"])
122
+
123
+
124
+ def test_drop_not_lexsorted():
125
+ # GH 12078
126
+
127
+ # define the lexsorted version of the multi-index
128
+ tuples = [("a", ""), ("b1", "c1"), ("b2", "c2")]
129
+ lexsorted_mi = MultiIndex.from_tuples(tuples, names=["b", "c"])
130
+ assert lexsorted_mi._is_lexsorted()
131
+
132
+ # and the not-lexsorted version
133
+ df = pd.DataFrame(
134
+ columns=["a", "b", "c", "d"], data=[[1, "b1", "c1", 3], [1, "b2", "c2", 4]]
135
+ )
136
+ df = df.pivot_table(index="a", columns=["b", "c"], values="d")
137
+ df = df.reset_index()
138
+ not_lexsorted_mi = df.columns
139
+ assert not not_lexsorted_mi._is_lexsorted()
140
+
141
+ # compare the results
142
+ tm.assert_index_equal(lexsorted_mi, not_lexsorted_mi)
143
+ with tm.assert_produces_warning(PerformanceWarning):
144
+ tm.assert_index_equal(lexsorted_mi.drop("a"), not_lexsorted_mi.drop("a"))
145
+
146
+
147
+ def test_drop_with_nan_in_index(nulls_fixture):
148
+ # GH#18853
149
+ mi = MultiIndex.from_tuples([("blah", nulls_fixture)], names=["name", "date"])
150
+ msg = r"labels \[Timestamp\('2001-01-01 00:00:00'\)\] not found in level"
151
+ with pytest.raises(KeyError, match=msg):
152
+ mi.drop(pd.Timestamp("2001"), level="date")
153
+
154
+
155
+ @pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")
156
+ def test_drop_with_non_monotonic_duplicates():
157
+ # GH#33494
158
+ mi = MultiIndex.from_tuples([(1, 2), (2, 3), (1, 2)])
159
+ result = mi.drop((1, 2))
160
+ expected = MultiIndex.from_tuples([(2, 3)])
161
+ tm.assert_index_equal(result, expected)
162
+
163
+
164
+ def test_single_level_drop_partially_missing_elements():
165
+ # GH 37820
166
+
167
+ mi = MultiIndex.from_tuples([(1, 2), (2, 2), (3, 2)])
168
+ msg = r"labels \[4\] not found in level"
169
+ with pytest.raises(KeyError, match=msg):
170
+ mi.drop(4, level=0)
171
+ with pytest.raises(KeyError, match=msg):
172
+ mi.drop([1, 4], level=0)
173
+ msg = r"labels \[nan\] not found in level"
174
+ with pytest.raises(KeyError, match=msg):
175
+ mi.drop([np.nan], level=0)
176
+ with pytest.raises(KeyError, match=msg):
177
+ mi.drop([np.nan, 1, 2, 3], level=0)
178
+
179
+ mi = MultiIndex.from_tuples([(np.nan, 1), (1, 2)])
180
+ msg = r"labels \['a'\] not found in level"
181
+ with pytest.raises(KeyError, match=msg):
182
+ mi.drop([np.nan, 1, "a"], level=0)
183
+
184
+
185
+ def test_droplevel_multiindex_one_level():
186
+ # GH#37208
187
+ index = MultiIndex.from_tuples([(2,)], names=("b",))
188
+ result = index.droplevel([])
189
+ expected = Index([2], name="b")
190
+ tm.assert_index_equal(result, expected)
py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_equivalence.py ADDED
@@ -0,0 +1,284 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ from pandas.core.dtypes.common import is_any_real_numeric_dtype
5
+
6
+ import pandas as pd
7
+ from pandas import (
8
+ Index,
9
+ MultiIndex,
10
+ Series,
11
+ )
12
+ import pandas._testing as tm
13
+
14
+
15
+ def test_equals(idx):
16
+ assert idx.equals(idx)
17
+ assert idx.equals(idx.copy())
18
+ assert idx.equals(idx.astype(object))
19
+ assert idx.equals(idx.to_flat_index())
20
+ assert idx.equals(idx.to_flat_index().astype("category"))
21
+
22
+ assert not idx.equals(list(idx))
23
+ assert not idx.equals(np.array(idx))
24
+
25
+ same_values = Index(idx, dtype=object)
26
+ assert idx.equals(same_values)
27
+ assert same_values.equals(idx)
28
+
29
+ if idx.nlevels == 1:
30
+ # do not test MultiIndex
31
+ assert not idx.equals(Series(idx))
32
+
33
+
34
+ def test_equals_op(idx):
35
+ # GH9947, GH10637
36
+ index_a = idx
37
+
38
+ n = len(index_a)
39
+ index_b = index_a[0:-1]
40
+ index_c = index_a[0:-1].append(index_a[-2:-1])
41
+ index_d = index_a[0:1]
42
+ with pytest.raises(ValueError, match="Lengths must match"):
43
+ index_a == index_b
44
+ expected1 = np.array([True] * n)
45
+ expected2 = np.array([True] * (n - 1) + [False])
46
+ tm.assert_numpy_array_equal(index_a == index_a, expected1)
47
+ tm.assert_numpy_array_equal(index_a == index_c, expected2)
48
+
49
+ # test comparisons with numpy arrays
50
+ array_a = np.array(index_a)
51
+ array_b = np.array(index_a[0:-1])
52
+ array_c = np.array(index_a[0:-1].append(index_a[-2:-1]))
53
+ array_d = np.array(index_a[0:1])
54
+ with pytest.raises(ValueError, match="Lengths must match"):
55
+ index_a == array_b
56
+ tm.assert_numpy_array_equal(index_a == array_a, expected1)
57
+ tm.assert_numpy_array_equal(index_a == array_c, expected2)
58
+
59
+ # test comparisons with Series
60
+ series_a = Series(array_a)
61
+ series_b = Series(array_b)
62
+ series_c = Series(array_c)
63
+ series_d = Series(array_d)
64
+ with pytest.raises(ValueError, match="Lengths must match"):
65
+ index_a == series_b
66
+
67
+ tm.assert_numpy_array_equal(index_a == series_a, expected1)
68
+ tm.assert_numpy_array_equal(index_a == series_c, expected2)
69
+
70
+ # cases where length is 1 for one of them
71
+ with pytest.raises(ValueError, match="Lengths must match"):
72
+ index_a == index_d
73
+ with pytest.raises(ValueError, match="Lengths must match"):
74
+ index_a == series_d
75
+ with pytest.raises(ValueError, match="Lengths must match"):
76
+ index_a == array_d
77
+ msg = "Can only compare identically-labeled Series objects"
78
+ with pytest.raises(ValueError, match=msg):
79
+ series_a == series_d
80
+ with pytest.raises(ValueError, match="Lengths must match"):
81
+ series_a == array_d
82
+
83
+ # comparing with a scalar should broadcast; note that we are excluding
84
+ # MultiIndex because in this case each item in the index is a tuple of
85
+ # length 2, and therefore is considered an array of length 2 in the
86
+ # comparison instead of a scalar
87
+ if not isinstance(index_a, MultiIndex):
88
+ expected3 = np.array([False] * (len(index_a) - 2) + [True, False])
89
+ # assuming the 2nd to last item is unique in the data
90
+ item = index_a[-2]
91
+ tm.assert_numpy_array_equal(index_a == item, expected3)
92
+ tm.assert_series_equal(series_a == item, Series(expected3))
93
+
94
+
95
+ def test_compare_tuple():
96
+ # GH#21517
97
+ mi = MultiIndex.from_product([[1, 2]] * 2)
98
+
99
+ all_false = np.array([False, False, False, False])
100
+
101
+ result = mi == mi[0]
102
+ expected = np.array([True, False, False, False])
103
+ tm.assert_numpy_array_equal(result, expected)
104
+
105
+ result = mi != mi[0]
106
+ tm.assert_numpy_array_equal(result, ~expected)
107
+
108
+ result = mi < mi[0]
109
+ tm.assert_numpy_array_equal(result, all_false)
110
+
111
+ result = mi <= mi[0]
112
+ tm.assert_numpy_array_equal(result, expected)
113
+
114
+ result = mi > mi[0]
115
+ tm.assert_numpy_array_equal(result, ~expected)
116
+
117
+ result = mi >= mi[0]
118
+ tm.assert_numpy_array_equal(result, ~all_false)
119
+
120
+
121
+ def test_compare_tuple_strs():
122
+ # GH#34180
123
+
124
+ mi = MultiIndex.from_tuples([("a", "b"), ("b", "c"), ("c", "a")])
125
+
126
+ result = mi == ("c", "a")
127
+ expected = np.array([False, False, True])
128
+ tm.assert_numpy_array_equal(result, expected)
129
+
130
+ result = mi == ("c",)
131
+ expected = np.array([False, False, False])
132
+ tm.assert_numpy_array_equal(result, expected)
133
+
134
+
135
+ def test_equals_multi(idx):
136
+ assert idx.equals(idx)
137
+ assert not idx.equals(idx.values)
138
+ assert idx.equals(Index(idx.values))
139
+
140
+ assert idx.equal_levels(idx)
141
+ assert not idx.equals(idx[:-1])
142
+ assert not idx.equals(idx[-1])
143
+
144
+ # different number of levels
145
+ index = MultiIndex(
146
+ levels=[Index(list(range(4))), Index(list(range(4))), Index(list(range(4)))],
147
+ codes=[
148
+ np.array([0, 0, 1, 2, 2, 2, 3, 3]),
149
+ np.array([0, 1, 0, 0, 0, 1, 0, 1]),
150
+ np.array([1, 0, 1, 1, 0, 0, 1, 0]),
151
+ ],
152
+ )
153
+
154
+ index2 = MultiIndex(levels=index.levels[:-1], codes=index.codes[:-1])
155
+ assert not index.equals(index2)
156
+ assert not index.equal_levels(index2)
157
+
158
+ # levels are different
159
+ major_axis = Index(list(range(4)))
160
+ minor_axis = Index(list(range(2)))
161
+
162
+ major_codes = np.array([0, 0, 1, 2, 2, 3])
163
+ minor_codes = np.array([0, 1, 0, 0, 1, 0])
164
+
165
+ index = MultiIndex(
166
+ levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
167
+ )
168
+ assert not idx.equals(index)
169
+ assert not idx.equal_levels(index)
170
+
171
+ # some of the labels are different
172
+ major_axis = Index(["foo", "bar", "baz", "qux"])
173
+ minor_axis = Index(["one", "two"])
174
+
175
+ major_codes = np.array([0, 0, 2, 2, 3, 3])
176
+ minor_codes = np.array([0, 1, 0, 1, 0, 1])
177
+
178
+ index = MultiIndex(
179
+ levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
180
+ )
181
+ assert not idx.equals(index)
182
+
183
+
184
+ def test_identical(idx):
185
+ mi = idx.copy()
186
+ mi2 = idx.copy()
187
+ assert mi.identical(mi2)
188
+
189
+ mi = mi.set_names(["new1", "new2"])
190
+ assert mi.equals(mi2)
191
+ assert not mi.identical(mi2)
192
+
193
+ mi2 = mi2.set_names(["new1", "new2"])
194
+ assert mi.identical(mi2)
195
+
196
+ mi4 = Index(mi.tolist(), tupleize_cols=False)
197
+ assert not mi.identical(mi4)
198
+ assert mi.equals(mi4)
199
+
200
+
201
+ def test_equals_operator(idx):
202
+ # GH9785
203
+ assert (idx == idx).all()
204
+
205
+
206
+ def test_equals_missing_values():
207
+ # make sure take is not using -1
208
+ i = MultiIndex.from_tuples([(0, pd.NaT), (0, pd.Timestamp("20130101"))])
209
+ result = i[0:1].equals(i[0])
210
+ assert not result
211
+ result = i[1:2].equals(i[1])
212
+ assert not result
213
+
214
+
215
+ def test_equals_missing_values_differently_sorted():
216
+ # GH#38439
217
+ mi1 = MultiIndex.from_tuples([(81.0, np.nan), (np.nan, np.nan)])
218
+ mi2 = MultiIndex.from_tuples([(np.nan, np.nan), (81.0, np.nan)])
219
+ assert not mi1.equals(mi2)
220
+
221
+ mi2 = MultiIndex.from_tuples([(81.0, np.nan), (np.nan, np.nan)])
222
+ assert mi1.equals(mi2)
223
+
224
+
225
+ def test_is_():
226
+ mi = MultiIndex.from_tuples(zip(range(10), range(10)))
227
+ assert mi.is_(mi)
228
+ assert mi.is_(mi.view())
229
+ assert mi.is_(mi.view().view().view().view())
230
+ mi2 = mi.view()
231
+ # names are metadata, they don't change id
232
+ mi2.names = ["A", "B"]
233
+ assert mi2.is_(mi)
234
+ assert mi.is_(mi2)
235
+
236
+ assert not mi.is_(mi.set_names(["C", "D"]))
237
+ # levels are inherent properties, they change identity
238
+ mi3 = mi2.set_levels([list(range(10)), list(range(10))])
239
+ assert not mi3.is_(mi2)
240
+ # shouldn't change
241
+ assert mi2.is_(mi)
242
+ mi4 = mi3.view()
243
+
244
+ # GH 17464 - Remove duplicate MultiIndex levels
245
+ mi4 = mi4.set_levels([list(range(10)), list(range(10))])
246
+ assert not mi4.is_(mi3)
247
+ mi5 = mi.view()
248
+ mi5 = mi5.set_levels(mi5.levels)
249
+ assert not mi5.is_(mi)
250
+
251
+
252
+ def test_is_all_dates(idx):
253
+ assert not idx._is_all_dates
254
+
255
+
256
+ def test_is_numeric(idx):
257
+ # MultiIndex is never numeric
258
+ assert not is_any_real_numeric_dtype(idx)
259
+
260
+
261
+ def test_multiindex_compare():
262
+ # GH 21149
263
+ # Ensure comparison operations for MultiIndex with nlevels == 1
264
+ # behave consistently with those for MultiIndex with nlevels > 1
265
+
266
+ midx = MultiIndex.from_product([[0, 1]])
267
+
268
+ # Equality self-test: MultiIndex object vs self
269
+ expected = Series([True, True])
270
+ result = Series(midx == midx)
271
+ tm.assert_series_equal(result, expected)
272
+
273
+ # Greater than comparison: MultiIndex object vs self
274
+ expected = Series([False, False])
275
+ result = Series(midx > midx)
276
+ tm.assert_series_equal(result, expected)
277
+
278
+
279
+ def test_equals_ea_int_regular_int():
280
+ # GH#46026
281
+ mi1 = MultiIndex.from_arrays([Index([1, 2], dtype="Int64"), [3, 4]])
282
+ mi2 = MultiIndex.from_arrays([[1, 2], [3, 4]])
283
+ assert not mi1.equals(mi2)
284
+ assert not mi2.equals(mi1)
py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_formats.py ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ import pandas as pd
5
+ from pandas import (
6
+ Index,
7
+ MultiIndex,
8
+ )
9
+ import pandas._testing as tm
10
+
11
+
12
+ def test_format(idx):
13
+ msg = "MultiIndex.format is deprecated"
14
+ with tm.assert_produces_warning(FutureWarning, match=msg):
15
+ idx.format()
16
+ idx[:0].format()
17
+
18
+
19
+ def test_format_integer_names():
20
+ index = MultiIndex(
21
+ levels=[[0, 1], [0, 1]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]], names=[0, 1]
22
+ )
23
+ msg = "MultiIndex.format is deprecated"
24
+ with tm.assert_produces_warning(FutureWarning, match=msg):
25
+ index.format(names=True)
26
+
27
+
28
+ def test_format_sparse_config(idx):
29
+ # GH1538
30
+ msg = "MultiIndex.format is deprecated"
31
+ with pd.option_context("display.multi_sparse", False):
32
+ with tm.assert_produces_warning(FutureWarning, match=msg):
33
+ result = idx.format()
34
+ assert result[1] == "foo two"
35
+
36
+
37
+ def test_format_sparse_display():
38
+ index = MultiIndex(
39
+ levels=[[0, 1], [0, 1], [0, 1], [0]],
40
+ codes=[
41
+ [0, 0, 0, 1, 1, 1],
42
+ [0, 0, 1, 0, 0, 1],
43
+ [0, 1, 0, 0, 1, 0],
44
+ [0, 0, 0, 0, 0, 0],
45
+ ],
46
+ )
47
+ msg = "MultiIndex.format is deprecated"
48
+ with tm.assert_produces_warning(FutureWarning, match=msg):
49
+ result = index.format()
50
+ assert result[3] == "1 0 0 0"
51
+
52
+
53
+ def test_repr_with_unicode_data():
54
+ with pd.option_context("display.encoding", "UTF-8"):
55
+ d = {"a": ["\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
56
+ index = pd.DataFrame(d).set_index(["a", "b"]).index
57
+ assert "\\" not in repr(index) # we don't want unicode-escaped
58
+
59
+
60
+ def test_repr_roundtrip_raises():
61
+ mi = MultiIndex.from_product([list("ab"), range(3)], names=["first", "second"])
62
+ msg = "Must pass both levels and codes"
63
+ with pytest.raises(TypeError, match=msg):
64
+ eval(repr(mi))
65
+
66
+
67
+ def test_unicode_string_with_unicode():
68
+ d = {"a": ["\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
69
+ idx = pd.DataFrame(d).set_index(["a", "b"]).index
70
+ str(idx)
71
+
72
+
73
+ def test_repr_max_seq_item_setting(idx):
74
+ # GH10182
75
+ idx = idx.repeat(50)
76
+ with pd.option_context("display.max_seq_items", None):
77
+ repr(idx)
78
+ assert "..." not in str(idx)
79
+
80
+
81
+ class TestRepr:
82
+ def test_unicode_repr_issues(self):
83
+ levels = [Index(["a/\u03c3", "b/\u03c3", "c/\u03c3"]), Index([0, 1])]
84
+ codes = [np.arange(3).repeat(2), np.tile(np.arange(2), 3)]
85
+ index = MultiIndex(levels=levels, codes=codes)
86
+
87
+ repr(index.levels)
88
+ repr(index.get_level_values(1))
89
+
90
+ def test_repr_max_seq_items_equal_to_n(self, idx):
91
+ # display.max_seq_items == n
92
+ with pd.option_context("display.max_seq_items", 6):
93
+ result = idx.__repr__()
94
+ expected = """\
95
+ MultiIndex([('foo', 'one'),
96
+ ('foo', 'two'),
97
+ ('bar', 'one'),
98
+ ('baz', 'two'),
99
+ ('qux', 'one'),
100
+ ('qux', 'two')],
101
+ names=['first', 'second'])"""
102
+ assert result == expected
103
+
104
+ def test_repr(self, idx):
105
+ result = idx[:1].__repr__()
106
+ expected = """\
107
+ MultiIndex([('foo', 'one')],
108
+ names=['first', 'second'])"""
109
+ assert result == expected
110
+
111
+ result = idx.__repr__()
112
+ expected = """\
113
+ MultiIndex([('foo', 'one'),
114
+ ('foo', 'two'),
115
+ ('bar', 'one'),
116
+ ('baz', 'two'),
117
+ ('qux', 'one'),
118
+ ('qux', 'two')],
119
+ names=['first', 'second'])"""
120
+ assert result == expected
121
+
122
+ with pd.option_context("display.max_seq_items", 5):
123
+ result = idx.__repr__()
124
+ expected = """\
125
+ MultiIndex([('foo', 'one'),
126
+ ('foo', 'two'),
127
+ ...
128
+ ('qux', 'one'),
129
+ ('qux', 'two')],
130
+ names=['first', 'second'], length=6)"""
131
+ assert result == expected
132
+
133
+ # display.max_seq_items == 1
134
+ with pd.option_context("display.max_seq_items", 1):
135
+ result = idx.__repr__()
136
+ expected = """\
137
+ MultiIndex([...
138
+ ('qux', 'two')],
139
+ names=['first', ...], length=6)"""
140
+ assert result == expected
141
+
142
+ def test_rjust(self):
143
+ n = 1000
144
+ ci = pd.CategoricalIndex(list("a" * n) + (["abc"] * n))
145
+ dti = pd.date_range("2000-01-01", freq="s", periods=n * 2)
146
+ mi = MultiIndex.from_arrays([ci, ci.codes + 9, dti], names=["a", "b", "dti"])
147
+ result = mi[:1].__repr__()
148
+ expected = """\
149
+ MultiIndex([('a', 9, '2000-01-01 00:00:00')],
150
+ names=['a', 'b', 'dti'])"""
151
+ assert result == expected
152
+
153
+ result = mi[::500].__repr__()
154
+ expected = """\
155
+ MultiIndex([( 'a', 9, '2000-01-01 00:00:00'),
156
+ ( 'a', 9, '2000-01-01 00:08:20'),
157
+ ('abc', 10, '2000-01-01 00:16:40'),
158
+ ('abc', 10, '2000-01-01 00:25:00')],
159
+ names=['a', 'b', 'dti'])"""
160
+ assert result == expected
161
+
162
+ result = mi.__repr__()
163
+ expected = """\
164
+ MultiIndex([( 'a', 9, '2000-01-01 00:00:00'),
165
+ ( 'a', 9, '2000-01-01 00:00:01'),
166
+ ( 'a', 9, '2000-01-01 00:00:02'),
167
+ ( 'a', 9, '2000-01-01 00:00:03'),
168
+ ( 'a', 9, '2000-01-01 00:00:04'),
169
+ ( 'a', 9, '2000-01-01 00:00:05'),
170
+ ( 'a', 9, '2000-01-01 00:00:06'),
171
+ ( 'a', 9, '2000-01-01 00:00:07'),
172
+ ( 'a', 9, '2000-01-01 00:00:08'),
173
+ ( 'a', 9, '2000-01-01 00:00:09'),
174
+ ...
175
+ ('abc', 10, '2000-01-01 00:33:10'),
176
+ ('abc', 10, '2000-01-01 00:33:11'),
177
+ ('abc', 10, '2000-01-01 00:33:12'),
178
+ ('abc', 10, '2000-01-01 00:33:13'),
179
+ ('abc', 10, '2000-01-01 00:33:14'),
180
+ ('abc', 10, '2000-01-01 00:33:15'),
181
+ ('abc', 10, '2000-01-01 00:33:16'),
182
+ ('abc', 10, '2000-01-01 00:33:17'),
183
+ ('abc', 10, '2000-01-01 00:33:18'),
184
+ ('abc', 10, '2000-01-01 00:33:19')],
185
+ names=['a', 'b', 'dti'], length=2000)"""
186
+ assert result == expected
187
+
188
+ def test_tuple_width(self):
189
+ n = 1000
190
+ ci = pd.CategoricalIndex(list("a" * n) + (["abc"] * n))
191
+ dti = pd.date_range("2000-01-01", freq="s", periods=n * 2)
192
+ levels = [ci, ci.codes + 9, dti, dti, dti]
193
+ names = ["a", "b", "dti_1", "dti_2", "dti_3"]
194
+ mi = MultiIndex.from_arrays(levels, names=names)
195
+ result = mi[:1].__repr__()
196
+ expected = """MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...)],
197
+ names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])""" # noqa: E501
198
+ assert result == expected
199
+
200
+ result = mi[:10].__repr__()
201
+ expected = """\
202
+ MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...),
203
+ ('a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...),
204
+ ('a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...),
205
+ ('a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...),
206
+ ('a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...),
207
+ ('a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...),
208
+ ('a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...),
209
+ ('a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...),
210
+ ('a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...),
211
+ ('a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...)],
212
+ names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])"""
213
+ assert result == expected
214
+
215
+ result = mi.__repr__()
216
+ expected = """\
217
+ MultiIndex([( 'a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...),
218
+ ( 'a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...),
219
+ ( 'a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...),
220
+ ( 'a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...),
221
+ ( 'a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...),
222
+ ( 'a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...),
223
+ ( 'a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...),
224
+ ( 'a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...),
225
+ ( 'a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...),
226
+ ( 'a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...),
227
+ ...
228
+ ('abc', 10, '2000-01-01 00:33:10', '2000-01-01 00:33:10', ...),
229
+ ('abc', 10, '2000-01-01 00:33:11', '2000-01-01 00:33:11', ...),
230
+ ('abc', 10, '2000-01-01 00:33:12', '2000-01-01 00:33:12', ...),
231
+ ('abc', 10, '2000-01-01 00:33:13', '2000-01-01 00:33:13', ...),
232
+ ('abc', 10, '2000-01-01 00:33:14', '2000-01-01 00:33:14', ...),
233
+ ('abc', 10, '2000-01-01 00:33:15', '2000-01-01 00:33:15', ...),
234
+ ('abc', 10, '2000-01-01 00:33:16', '2000-01-01 00:33:16', ...),
235
+ ('abc', 10, '2000-01-01 00:33:17', '2000-01-01 00:33:17', ...),
236
+ ('abc', 10, '2000-01-01 00:33:18', '2000-01-01 00:33:18', ...),
237
+ ('abc', 10, '2000-01-01 00:33:19', '2000-01-01 00:33:19', ...)],
238
+ names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'], length=2000)"""
239
+ assert result == expected
240
+
241
+ def test_multiindex_long_element(self):
242
+ # Non-regression test towards GH#52960
243
+ data = MultiIndex.from_tuples([("c" * 62,)])
244
+
245
+ expected = (
246
+ "MultiIndex([('cccccccccccccccccccccccccccccccccccccccc"
247
+ "cccccccccccccccccccccc',)],\n )"
248
+ )
249
+ assert str(data) == expected
py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_integrity.py ADDED
@@ -0,0 +1,289 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ import numpy as np
4
+ import pytest
5
+
6
+ from pandas._libs import index as libindex
7
+
8
+ from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
9
+
10
+ import pandas as pd
11
+ from pandas import (
12
+ Index,
13
+ IntervalIndex,
14
+ MultiIndex,
15
+ RangeIndex,
16
+ )
17
+ import pandas._testing as tm
18
+
19
+
20
+ def test_labels_dtypes():
21
+ # GH 8456
22
+ i = MultiIndex.from_tuples([("A", 1), ("A", 2)])
23
+ assert i.codes[0].dtype == "int8"
24
+ assert i.codes[1].dtype == "int8"
25
+
26
+ i = MultiIndex.from_product([["a"], range(40)])
27
+ assert i.codes[1].dtype == "int8"
28
+ i = MultiIndex.from_product([["a"], range(400)])
29
+ assert i.codes[1].dtype == "int16"
30
+ i = MultiIndex.from_product([["a"], range(40000)])
31
+ assert i.codes[1].dtype == "int32"
32
+
33
+ i = MultiIndex.from_product([["a"], range(1000)])
34
+ assert (i.codes[0] >= 0).all()
35
+ assert (i.codes[1] >= 0).all()
36
+
37
+
38
+ def test_values_boxed():
39
+ tuples = [
40
+ (1, pd.Timestamp("2000-01-01")),
41
+ (2, pd.NaT),
42
+ (3, pd.Timestamp("2000-01-03")),
43
+ (1, pd.Timestamp("2000-01-04")),
44
+ (2, pd.Timestamp("2000-01-02")),
45
+ (3, pd.Timestamp("2000-01-03")),
46
+ ]
47
+ result = MultiIndex.from_tuples(tuples)
48
+ expected = construct_1d_object_array_from_listlike(tuples)
49
+ tm.assert_numpy_array_equal(result.values, expected)
50
+ # Check that code branches for boxed values produce identical results
51
+ tm.assert_numpy_array_equal(result.values[:4], result[:4].values)
52
+
53
+
54
+ def test_values_multiindex_datetimeindex():
55
+ # Test to ensure we hit the boxing / nobox part of MI.values
56
+ ints = np.arange(10**18, 10**18 + 5)
57
+ naive = pd.DatetimeIndex(ints)
58
+
59
+ aware = pd.DatetimeIndex(ints, tz="US/Central")
60
+
61
+ idx = MultiIndex.from_arrays([naive, aware])
62
+ result = idx.values
63
+
64
+ outer = pd.DatetimeIndex([x[0] for x in result])
65
+ tm.assert_index_equal(outer, naive)
66
+
67
+ inner = pd.DatetimeIndex([x[1] for x in result])
68
+ tm.assert_index_equal(inner, aware)
69
+
70
+ # n_lev > n_lab
71
+ result = idx[:2].values
72
+
73
+ outer = pd.DatetimeIndex([x[0] for x in result])
74
+ tm.assert_index_equal(outer, naive[:2])
75
+
76
+ inner = pd.DatetimeIndex([x[1] for x in result])
77
+ tm.assert_index_equal(inner, aware[:2])
78
+
79
+
80
+ def test_values_multiindex_periodindex():
81
+ # Test to ensure we hit the boxing / nobox part of MI.values
82
+ ints = np.arange(2007, 2012)
83
+ pidx = pd.PeriodIndex(ints, freq="D")
84
+
85
+ idx = MultiIndex.from_arrays([ints, pidx])
86
+ result = idx.values
87
+
88
+ outer = Index([x[0] for x in result])
89
+ tm.assert_index_equal(outer, Index(ints, dtype=np.int64))
90
+
91
+ inner = pd.PeriodIndex([x[1] for x in result])
92
+ tm.assert_index_equal(inner, pidx)
93
+
94
+ # n_lev > n_lab
95
+ result = idx[:2].values
96
+
97
+ outer = Index([x[0] for x in result])
98
+ tm.assert_index_equal(outer, Index(ints[:2], dtype=np.int64))
99
+
100
+ inner = pd.PeriodIndex([x[1] for x in result])
101
+ tm.assert_index_equal(inner, pidx[:2])
102
+
103
+
104
+ def test_consistency():
105
+ # need to construct an overflow
106
+ major_axis = list(range(70000))
107
+ minor_axis = list(range(10))
108
+
109
+ major_codes = np.arange(70000)
110
+ minor_codes = np.repeat(range(10), 7000)
111
+
112
+ # the fact that is works means it's consistent
113
+ index = MultiIndex(
114
+ levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
115
+ )
116
+
117
+ # inconsistent
118
+ major_codes = np.array([0, 0, 1, 1, 1, 2, 2, 3, 3])
119
+ minor_codes = np.array([0, 1, 0, 1, 1, 0, 1, 0, 1])
120
+ index = MultiIndex(
121
+ levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
122
+ )
123
+
124
+ assert index.is_unique is False
125
+
126
+
127
+ @pytest.mark.slow
128
+ def test_hash_collisions(monkeypatch):
129
+ # non-smoke test that we don't get hash collisions
130
+ size_cutoff = 50
131
+ with monkeypatch.context() as m:
132
+ m.setattr(libindex, "_SIZE_CUTOFF", size_cutoff)
133
+ index = MultiIndex.from_product(
134
+ [np.arange(8), np.arange(8)], names=["one", "two"]
135
+ )
136
+ result = index.get_indexer(index.values)
137
+ tm.assert_numpy_array_equal(result, np.arange(len(index), dtype="intp"))
138
+
139
+ for i in [0, 1, len(index) - 2, len(index) - 1]:
140
+ result = index.get_loc(index[i])
141
+ assert result == i
142
+
143
+
144
+ def test_dims():
145
+ pass
146
+
147
+
148
+ def test_take_invalid_kwargs():
149
+ vals = [["A", "B"], [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]]
150
+ idx = MultiIndex.from_product(vals, names=["str", "dt"])
151
+ indices = [1, 2]
152
+
153
+ msg = r"take\(\) got an unexpected keyword argument 'foo'"
154
+ with pytest.raises(TypeError, match=msg):
155
+ idx.take(indices, foo=2)
156
+
157
+ msg = "the 'out' parameter is not supported"
158
+ with pytest.raises(ValueError, match=msg):
159
+ idx.take(indices, out=indices)
160
+
161
+ msg = "the 'mode' parameter is not supported"
162
+ with pytest.raises(ValueError, match=msg):
163
+ idx.take(indices, mode="clip")
164
+
165
+
166
+ def test_isna_behavior(idx):
167
+ # should not segfault GH5123
168
+ # NOTE: if MI representation changes, may make sense to allow
169
+ # isna(MI)
170
+ msg = "isna is not defined for MultiIndex"
171
+ with pytest.raises(NotImplementedError, match=msg):
172
+ pd.isna(idx)
173
+
174
+
175
+ def test_large_multiindex_error(monkeypatch):
176
+ # GH12527
177
+ size_cutoff = 50
178
+ with monkeypatch.context() as m:
179
+ m.setattr(libindex, "_SIZE_CUTOFF", size_cutoff)
180
+ df_below_cutoff = pd.DataFrame(
181
+ 1,
182
+ index=MultiIndex.from_product([[1, 2], range(size_cutoff - 1)]),
183
+ columns=["dest"],
184
+ )
185
+ with pytest.raises(KeyError, match=r"^\(-1, 0\)$"):
186
+ df_below_cutoff.loc[(-1, 0), "dest"]
187
+ with pytest.raises(KeyError, match=r"^\(3, 0\)$"):
188
+ df_below_cutoff.loc[(3, 0), "dest"]
189
+ df_above_cutoff = pd.DataFrame(
190
+ 1,
191
+ index=MultiIndex.from_product([[1, 2], range(size_cutoff + 1)]),
192
+ columns=["dest"],
193
+ )
194
+ with pytest.raises(KeyError, match=r"^\(-1, 0\)$"):
195
+ df_above_cutoff.loc[(-1, 0), "dest"]
196
+ with pytest.raises(KeyError, match=r"^\(3, 0\)$"):
197
+ df_above_cutoff.loc[(3, 0), "dest"]
198
+
199
+
200
+ def test_mi_hashtable_populated_attribute_error(monkeypatch):
201
+ # GH 18165
202
+ monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 50)
203
+ r = range(50)
204
+ df = pd.DataFrame({"a": r, "b": r}, index=MultiIndex.from_arrays([r, r]))
205
+
206
+ msg = "'Series' object has no attribute 'foo'"
207
+ with pytest.raises(AttributeError, match=msg):
208
+ df["a"].foo()
209
+
210
+
211
+ def test_can_hold_identifiers(idx):
212
+ key = idx[0]
213
+ assert idx._can_hold_identifiers_and_holds_name(key) is True
214
+
215
+
216
+ def test_metadata_immutable(idx):
217
+ levels, codes = idx.levels, idx.codes
218
+ # shouldn't be able to set at either the top level or base level
219
+ mutable_regex = re.compile("does not support mutable operations")
220
+ with pytest.raises(TypeError, match=mutable_regex):
221
+ levels[0] = levels[0]
222
+ with pytest.raises(TypeError, match=mutable_regex):
223
+ levels[0][0] = levels[0][0]
224
+ # ditto for labels
225
+ with pytest.raises(TypeError, match=mutable_regex):
226
+ codes[0] = codes[0]
227
+ with pytest.raises(ValueError, match="assignment destination is read-only"):
228
+ codes[0][0] = codes[0][0]
229
+ # and for names
230
+ names = idx.names
231
+ with pytest.raises(TypeError, match=mutable_regex):
232
+ names[0] = names[0]
233
+
234
+
235
+ def test_level_setting_resets_attributes():
236
+ ind = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]])
237
+ assert ind.is_monotonic_increasing
238
+ ind = ind.set_levels([["A", "B"], [1, 3, 2]])
239
+ # if this fails, probably didn't reset the cache correctly.
240
+ assert not ind.is_monotonic_increasing
241
+
242
+
243
+ def test_rangeindex_fallback_coercion_bug():
244
+ # GH 12893
245
+ df1 = pd.DataFrame(np.arange(100).reshape((10, 10)))
246
+ df2 = pd.DataFrame(np.arange(100).reshape((10, 10)))
247
+ df = pd.concat(
248
+ {"df1": df1.stack(future_stack=True), "df2": df2.stack(future_stack=True)},
249
+ axis=1,
250
+ )
251
+ df.index.names = ["fizz", "buzz"]
252
+
253
+ expected = pd.DataFrame(
254
+ {"df2": np.arange(100), "df1": np.arange(100)},
255
+ index=MultiIndex.from_product([range(10), range(10)], names=["fizz", "buzz"]),
256
+ )
257
+ tm.assert_frame_equal(df, expected, check_like=True)
258
+
259
+ result = df.index.get_level_values("fizz")
260
+ expected = Index(np.arange(10, dtype=np.int64), name="fizz").repeat(10)
261
+ tm.assert_index_equal(result, expected)
262
+
263
+ result = df.index.get_level_values("buzz")
264
+ expected = Index(np.tile(np.arange(10, dtype=np.int64), 10), name="buzz")
265
+ tm.assert_index_equal(result, expected)
266
+
267
+
268
+ def test_memory_usage(idx):
269
+ result = idx.memory_usage()
270
+ if len(idx):
271
+ idx.get_loc(idx[0])
272
+ result2 = idx.memory_usage()
273
+ result3 = idx.memory_usage(deep=True)
274
+
275
+ # RangeIndex, IntervalIndex
276
+ # don't have engines
277
+ if not isinstance(idx, (RangeIndex, IntervalIndex)):
278
+ assert result2 > result
279
+
280
+ if idx.inferred_type == "object":
281
+ assert result3 > result2
282
+
283
+ else:
284
+ # we report 0 for no-length
285
+ assert result == 0
286
+
287
+
288
+ def test_nlevels(idx):
289
+ assert idx.nlevels == 2
py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_join.py ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ from pandas import (
5
+ DataFrame,
6
+ Index,
7
+ Interval,
8
+ MultiIndex,
9
+ Series,
10
+ StringDtype,
11
+ )
12
+ import pandas._testing as tm
13
+
14
+
15
+ @pytest.mark.parametrize(
16
+ "other", [Index(["three", "one", "two"]), Index(["one"]), Index(["one", "three"])]
17
+ )
18
+ def test_join_level(idx, other, join_type):
19
+ join_index, lidx, ridx = other.join(
20
+ idx, how=join_type, level="second", return_indexers=True
21
+ )
22
+
23
+ exp_level = other.join(idx.levels[1], how=join_type)
24
+ assert join_index.levels[0].equals(idx.levels[0])
25
+ assert join_index.levels[1].equals(exp_level)
26
+
27
+ # pare down levels
28
+ mask = np.array([x[1] in exp_level for x in idx], dtype=bool)
29
+ exp_values = idx.values[mask]
30
+ tm.assert_numpy_array_equal(join_index.values, exp_values)
31
+
32
+ if join_type in ("outer", "inner"):
33
+ join_index2, ridx2, lidx2 = idx.join(
34
+ other, how=join_type, level="second", return_indexers=True
35
+ )
36
+
37
+ assert join_index.equals(join_index2)
38
+ tm.assert_numpy_array_equal(lidx, lidx2)
39
+ tm.assert_numpy_array_equal(ridx, ridx2)
40
+ tm.assert_numpy_array_equal(join_index2.values, exp_values)
41
+
42
+
43
+ def test_join_level_corner_case(idx):
44
+ # some corner cases
45
+ index = Index(["three", "one", "two"])
46
+ result = index.join(idx, level="second")
47
+ assert isinstance(result, MultiIndex)
48
+
49
+ with pytest.raises(TypeError, match="Join.*MultiIndex.*ambiguous"):
50
+ idx.join(idx, level=1)
51
+
52
+
53
+ def test_join_self(idx, join_type):
54
+ result = idx.join(idx, how=join_type)
55
+ expected = idx
56
+ if join_type == "outer":
57
+ expected = expected.sort_values()
58
+ tm.assert_index_equal(result, expected)
59
+
60
+
61
+ def test_join_multi():
62
+ # GH 10665
63
+ midx = MultiIndex.from_product([np.arange(4), np.arange(4)], names=["a", "b"])
64
+ idx = Index([1, 2, 5], name="b")
65
+
66
+ # inner
67
+ jidx, lidx, ridx = midx.join(idx, how="inner", return_indexers=True)
68
+ exp_idx = MultiIndex.from_product([np.arange(4), [1, 2]], names=["a", "b"])
69
+ exp_lidx = np.array([1, 2, 5, 6, 9, 10, 13, 14], dtype=np.intp)
70
+ exp_ridx = np.array([0, 1, 0, 1, 0, 1, 0, 1], dtype=np.intp)
71
+ tm.assert_index_equal(jidx, exp_idx)
72
+ tm.assert_numpy_array_equal(lidx, exp_lidx)
73
+ tm.assert_numpy_array_equal(ridx, exp_ridx)
74
+ # flip
75
+ jidx, ridx, lidx = idx.join(midx, how="inner", return_indexers=True)
76
+ tm.assert_index_equal(jidx, exp_idx)
77
+ tm.assert_numpy_array_equal(lidx, exp_lidx)
78
+ tm.assert_numpy_array_equal(ridx, exp_ridx)
79
+
80
+ # keep MultiIndex
81
+ jidx, lidx, ridx = midx.join(idx, how="left", return_indexers=True)
82
+ exp_ridx = np.array(
83
+ [-1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1], dtype=np.intp
84
+ )
85
+ tm.assert_index_equal(jidx, midx)
86
+ assert lidx is None
87
+ tm.assert_numpy_array_equal(ridx, exp_ridx)
88
+ # flip
89
+ jidx, ridx, lidx = idx.join(midx, how="right", return_indexers=True)
90
+ tm.assert_index_equal(jidx, midx)
91
+ assert lidx is None
92
+ tm.assert_numpy_array_equal(ridx, exp_ridx)
93
+
94
+
95
+ def test_join_multi_wrong_order():
96
+ # GH 25760
97
+ # GH 28956
98
+
99
+ midx1 = MultiIndex.from_product([[1, 2], [3, 4]], names=["a", "b"])
100
+ midx2 = MultiIndex.from_product([[1, 2], [3, 4]], names=["b", "a"])
101
+
102
+ join_idx, lidx, ridx = midx1.join(midx2, return_indexers=True)
103
+
104
+ exp_ridx = np.array([-1, -1, -1, -1], dtype=np.intp)
105
+
106
+ tm.assert_index_equal(midx1, join_idx)
107
+ assert lidx is None
108
+ tm.assert_numpy_array_equal(ridx, exp_ridx)
109
+
110
+
111
+ def test_join_multi_return_indexers():
112
+ # GH 34074
113
+
114
+ midx1 = MultiIndex.from_product([[1, 2], [3, 4], [5, 6]], names=["a", "b", "c"])
115
+ midx2 = MultiIndex.from_product([[1, 2], [3, 4]], names=["a", "b"])
116
+
117
+ result = midx1.join(midx2, return_indexers=False)
118
+ tm.assert_index_equal(result, midx1)
119
+
120
+
121
+ def test_join_overlapping_interval_level():
122
+ # GH 44096
123
+ idx_1 = MultiIndex.from_tuples(
124
+ [
125
+ (1, Interval(0.0, 1.0)),
126
+ (1, Interval(1.0, 2.0)),
127
+ (1, Interval(2.0, 5.0)),
128
+ (2, Interval(0.0, 1.0)),
129
+ (2, Interval(1.0, 3.0)), # interval limit is here at 3.0, not at 2.0
130
+ (2, Interval(3.0, 5.0)),
131
+ ],
132
+ names=["num", "interval"],
133
+ )
134
+
135
+ idx_2 = MultiIndex.from_tuples(
136
+ [
137
+ (1, Interval(2.0, 5.0)),
138
+ (1, Interval(0.0, 1.0)),
139
+ (1, Interval(1.0, 2.0)),
140
+ (2, Interval(3.0, 5.0)),
141
+ (2, Interval(0.0, 1.0)),
142
+ (2, Interval(1.0, 3.0)),
143
+ ],
144
+ names=["num", "interval"],
145
+ )
146
+
147
+ expected = MultiIndex.from_tuples(
148
+ [
149
+ (1, Interval(0.0, 1.0)),
150
+ (1, Interval(1.0, 2.0)),
151
+ (1, Interval(2.0, 5.0)),
152
+ (2, Interval(0.0, 1.0)),
153
+ (2, Interval(1.0, 3.0)),
154
+ (2, Interval(3.0, 5.0)),
155
+ ],
156
+ names=["num", "interval"],
157
+ )
158
+ result = idx_1.join(idx_2, how="outer")
159
+
160
+ tm.assert_index_equal(result, expected)
161
+
162
+
163
+ def test_join_midx_ea():
164
+ # GH#49277
165
+ midx = MultiIndex.from_arrays(
166
+ [Series([1, 1, 3], dtype="Int64"), Series([1, 2, 3], dtype="Int64")],
167
+ names=["a", "b"],
168
+ )
169
+ midx2 = MultiIndex.from_arrays(
170
+ [Series([1], dtype="Int64"), Series([3], dtype="Int64")], names=["a", "c"]
171
+ )
172
+ result = midx.join(midx2, how="inner")
173
+ expected = MultiIndex.from_arrays(
174
+ [
175
+ Series([1, 1], dtype="Int64"),
176
+ Series([1, 2], dtype="Int64"),
177
+ Series([3, 3], dtype="Int64"),
178
+ ],
179
+ names=["a", "b", "c"],
180
+ )
181
+ tm.assert_index_equal(result, expected)
182
+
183
+
184
+ def test_join_midx_string():
185
+ # GH#49277
186
+ midx = MultiIndex.from_arrays(
187
+ [
188
+ Series(["a", "a", "c"], dtype=StringDtype()),
189
+ Series(["a", "b", "c"], dtype=StringDtype()),
190
+ ],
191
+ names=["a", "b"],
192
+ )
193
+ midx2 = MultiIndex.from_arrays(
194
+ [Series(["a"], dtype=StringDtype()), Series(["c"], dtype=StringDtype())],
195
+ names=["a", "c"],
196
+ )
197
+ result = midx.join(midx2, how="inner")
198
+ expected = MultiIndex.from_arrays(
199
+ [
200
+ Series(["a", "a"], dtype=StringDtype()),
201
+ Series(["a", "b"], dtype=StringDtype()),
202
+ Series(["c", "c"], dtype=StringDtype()),
203
+ ],
204
+ names=["a", "b", "c"],
205
+ )
206
+ tm.assert_index_equal(result, expected)
207
+
208
+
209
+ def test_join_multi_with_nan():
210
+ # GH29252
211
+ df1 = DataFrame(
212
+ data={"col1": [1.1, 1.2]},
213
+ index=MultiIndex.from_product([["A"], [1.0, 2.0]], names=["id1", "id2"]),
214
+ )
215
+ df2 = DataFrame(
216
+ data={"col2": [2.1, 2.2]},
217
+ index=MultiIndex.from_product([["A"], [np.nan, 2.0]], names=["id1", "id2"]),
218
+ )
219
+ result = df1.join(df2)
220
+ expected = DataFrame(
221
+ data={"col1": [1.1, 1.2], "col2": [np.nan, 2.2]},
222
+ index=MultiIndex.from_product([["A"], [1.0, 2.0]], names=["id1", "id2"]),
223
+ )
224
+ tm.assert_frame_equal(result, expected)
225
+
226
+
227
+ @pytest.mark.parametrize("val", [0, 5])
228
+ def test_join_dtypes(any_numeric_ea_dtype, val):
229
+ # GH#49830
230
+ midx = MultiIndex.from_arrays([Series([1, 2], dtype=any_numeric_ea_dtype), [3, 4]])
231
+ midx2 = MultiIndex.from_arrays(
232
+ [Series([1, val, val], dtype=any_numeric_ea_dtype), [3, 4, 4]]
233
+ )
234
+ result = midx.join(midx2, how="outer")
235
+ expected = MultiIndex.from_arrays(
236
+ [Series([val, val, 1, 2], dtype=any_numeric_ea_dtype), [4, 4, 3, 4]]
237
+ ).sort_values()
238
+ tm.assert_index_equal(result, expected)
239
+
240
+
241
+ def test_join_dtypes_all_nan(any_numeric_ea_dtype):
242
+ # GH#49830
243
+ midx = MultiIndex.from_arrays(
244
+ [Series([1, 2], dtype=any_numeric_ea_dtype), [np.nan, np.nan]]
245
+ )
246
+ midx2 = MultiIndex.from_arrays(
247
+ [Series([1, 0, 0], dtype=any_numeric_ea_dtype), [np.nan, np.nan, np.nan]]
248
+ )
249
+ result = midx.join(midx2, how="outer")
250
+ expected = MultiIndex.from_arrays(
251
+ [
252
+ Series([0, 0, 1, 2], dtype=any_numeric_ea_dtype),
253
+ [np.nan, np.nan, np.nan, np.nan],
254
+ ]
255
+ )
256
+ tm.assert_index_equal(result, expected)
257
+
258
+
259
+ def test_join_index_levels():
260
+ # GH#53093
261
+ midx = midx = MultiIndex.from_tuples([("a", "2019-02-01"), ("a", "2019-02-01")])
262
+ midx2 = MultiIndex.from_tuples([("a", "2019-01-31")])
263
+ result = midx.join(midx2, how="outer")
264
+ expected = MultiIndex.from_tuples(
265
+ [("a", "2019-01-31"), ("a", "2019-02-01"), ("a", "2019-02-01")]
266
+ )
267
+ tm.assert_index_equal(result.levels[1], expected.levels[1])
268
+ tm.assert_index_equal(result, expected)
py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_lexsort.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pandas import MultiIndex
2
+
3
+
4
+ class TestIsLexsorted:
5
+ def test_is_lexsorted(self):
6
+ levels = [[0, 1], [0, 1, 2]]
7
+
8
+ index = MultiIndex(
9
+ levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]
10
+ )
11
+ assert index._is_lexsorted()
12
+
13
+ index = MultiIndex(
14
+ levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]]
15
+ )
16
+ assert not index._is_lexsorted()
17
+
18
+ index = MultiIndex(
19
+ levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]]
20
+ )
21
+ assert not index._is_lexsorted()
22
+ assert index._lexsort_depth == 0
23
+
24
+
25
+ class TestLexsortDepth:
26
+ def test_lexsort_depth(self):
27
+ # Test that lexsort_depth return the correct sortorder
28
+ # when it was given to the MultiIndex const.
29
+ # GH#28518
30
+
31
+ levels = [[0, 1], [0, 1, 2]]
32
+
33
+ index = MultiIndex(
34
+ levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2
35
+ )
36
+ assert index._lexsort_depth == 2
37
+
38
+ index = MultiIndex(
39
+ levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=1
40
+ )
41
+ assert index._lexsort_depth == 1
42
+
43
+ index = MultiIndex(
44
+ levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=0
45
+ )
46
+ assert index._lexsort_depth == 0
py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_monotonic.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ from pandas import (
5
+ Index,
6
+ MultiIndex,
7
+ )
8
+
9
+
10
+ def test_is_monotonic_increasing_lexsorted(lexsorted_two_level_string_multiindex):
11
+ # string ordering
12
+ mi = lexsorted_two_level_string_multiindex
13
+ assert mi.is_monotonic_increasing is False
14
+ assert Index(mi.values).is_monotonic_increasing is False
15
+ assert mi._is_strictly_monotonic_increasing is False
16
+ assert Index(mi.values)._is_strictly_monotonic_increasing is False
17
+
18
+
19
+ def test_is_monotonic_increasing():
20
+ i = MultiIndex.from_product([np.arange(10), np.arange(10)], names=["one", "two"])
21
+ assert i.is_monotonic_increasing is True
22
+ assert i._is_strictly_monotonic_increasing is True
23
+ assert Index(i.values).is_monotonic_increasing is True
24
+ assert i._is_strictly_monotonic_increasing is True
25
+
26
+ i = MultiIndex.from_product(
27
+ [np.arange(10, 0, -1), np.arange(10)], names=["one", "two"]
28
+ )
29
+ assert i.is_monotonic_increasing is False
30
+ assert i._is_strictly_monotonic_increasing is False
31
+ assert Index(i.values).is_monotonic_increasing is False
32
+ assert Index(i.values)._is_strictly_monotonic_increasing is False
33
+
34
+ i = MultiIndex.from_product(
35
+ [np.arange(10), np.arange(10, 0, -1)], names=["one", "two"]
36
+ )
37
+ assert i.is_monotonic_increasing is False
38
+ assert i._is_strictly_monotonic_increasing is False
39
+ assert Index(i.values).is_monotonic_increasing is False
40
+ assert Index(i.values)._is_strictly_monotonic_increasing is False
41
+
42
+ i = MultiIndex.from_product([[1.0, np.nan, 2.0], ["a", "b", "c"]])
43
+ assert i.is_monotonic_increasing is False
44
+ assert i._is_strictly_monotonic_increasing is False
45
+ assert Index(i.values).is_monotonic_increasing is False
46
+ assert Index(i.values)._is_strictly_monotonic_increasing is False
47
+
48
+ i = MultiIndex(
49
+ levels=[["bar", "baz", "foo", "qux"], ["mom", "next", "zenith"]],
50
+ codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
51
+ names=["first", "second"],
52
+ )
53
+ assert i.is_monotonic_increasing is True
54
+ assert Index(i.values).is_monotonic_increasing is True
55
+ assert i._is_strictly_monotonic_increasing is True
56
+ assert Index(i.values)._is_strictly_monotonic_increasing is True
57
+
58
+ # mixed levels, hits the TypeError
59
+ i = MultiIndex(
60
+ levels=[
61
+ [1, 2, 3, 4],
62
+ [
63
+ "gb00b03mlx29",
64
+ "lu0197800237",
65
+ "nl0000289783",
66
+ "nl0000289965",
67
+ "nl0000301109",
68
+ ],
69
+ ],
70
+ codes=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]],
71
+ names=["household_id", "asset_id"],
72
+ )
73
+
74
+ assert i.is_monotonic_increasing is False
75
+ assert i._is_strictly_monotonic_increasing is False
76
+
77
+ # empty
78
+ i = MultiIndex.from_arrays([[], []])
79
+ assert i.is_monotonic_increasing is True
80
+ assert Index(i.values).is_monotonic_increasing is True
81
+ assert i._is_strictly_monotonic_increasing is True
82
+ assert Index(i.values)._is_strictly_monotonic_increasing is True
83
+
84
+
85
+ def test_is_monotonic_decreasing():
86
+ i = MultiIndex.from_product(
87
+ [np.arange(9, -1, -1), np.arange(9, -1, -1)], names=["one", "two"]
88
+ )
89
+ assert i.is_monotonic_decreasing is True
90
+ assert i._is_strictly_monotonic_decreasing is True
91
+ assert Index(i.values).is_monotonic_decreasing is True
92
+ assert i._is_strictly_monotonic_decreasing is True
93
+
94
+ i = MultiIndex.from_product(
95
+ [np.arange(10), np.arange(10, 0, -1)], names=["one", "two"]
96
+ )
97
+ assert i.is_monotonic_decreasing is False
98
+ assert i._is_strictly_monotonic_decreasing is False
99
+ assert Index(i.values).is_monotonic_decreasing is False
100
+ assert Index(i.values)._is_strictly_monotonic_decreasing is False
101
+
102
+ i = MultiIndex.from_product(
103
+ [np.arange(10, 0, -1), np.arange(10)], names=["one", "two"]
104
+ )
105
+ assert i.is_monotonic_decreasing is False
106
+ assert i._is_strictly_monotonic_decreasing is False
107
+ assert Index(i.values).is_monotonic_decreasing is False
108
+ assert Index(i.values)._is_strictly_monotonic_decreasing is False
109
+
110
+ i = MultiIndex.from_product([[2.0, np.nan, 1.0], ["c", "b", "a"]])
111
+ assert i.is_monotonic_decreasing is False
112
+ assert i._is_strictly_monotonic_decreasing is False
113
+ assert Index(i.values).is_monotonic_decreasing is False
114
+ assert Index(i.values)._is_strictly_monotonic_decreasing is False
115
+
116
+ # string ordering
117
+ i = MultiIndex(
118
+ levels=[["qux", "foo", "baz", "bar"], ["three", "two", "one"]],
119
+ codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
120
+ names=["first", "second"],
121
+ )
122
+ assert i.is_monotonic_decreasing is False
123
+ assert Index(i.values).is_monotonic_decreasing is False
124
+ assert i._is_strictly_monotonic_decreasing is False
125
+ assert Index(i.values)._is_strictly_monotonic_decreasing is False
126
+
127
+ i = MultiIndex(
128
+ levels=[["qux", "foo", "baz", "bar"], ["zenith", "next", "mom"]],
129
+ codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
130
+ names=["first", "second"],
131
+ )
132
+ assert i.is_monotonic_decreasing is True
133
+ assert Index(i.values).is_monotonic_decreasing is True
134
+ assert i._is_strictly_monotonic_decreasing is True
135
+ assert Index(i.values)._is_strictly_monotonic_decreasing is True
136
+
137
+ # mixed levels, hits the TypeError
138
+ i = MultiIndex(
139
+ levels=[
140
+ [4, 3, 2, 1],
141
+ [
142
+ "nl0000301109",
143
+ "nl0000289965",
144
+ "nl0000289783",
145
+ "lu0197800237",
146
+ "gb00b03mlx29",
147
+ ],
148
+ ],
149
+ codes=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]],
150
+ names=["household_id", "asset_id"],
151
+ )
152
+
153
+ assert i.is_monotonic_decreasing is False
154
+ assert i._is_strictly_monotonic_decreasing is False
155
+
156
+ # empty
157
+ i = MultiIndex.from_arrays([[], []])
158
+ assert i.is_monotonic_decreasing is True
159
+ assert Index(i.values).is_monotonic_decreasing is True
160
+ assert i._is_strictly_monotonic_decreasing is True
161
+ assert Index(i.values)._is_strictly_monotonic_decreasing is True
162
+
163
+
164
+ def test_is_strictly_monotonic_increasing():
165
+ idx = MultiIndex(
166
+ levels=[["bar", "baz"], ["mom", "next"]], codes=[[0, 0, 1, 1], [0, 0, 0, 1]]
167
+ )
168
+ assert idx.is_monotonic_increasing is True
169
+ assert idx._is_strictly_monotonic_increasing is False
170
+
171
+
172
+ def test_is_strictly_monotonic_decreasing():
173
+ idx = MultiIndex(
174
+ levels=[["baz", "bar"], ["next", "mom"]], codes=[[0, 0, 1, 1], [0, 0, 0, 1]]
175
+ )
176
+ assert idx.is_monotonic_decreasing is True
177
+ assert idx._is_strictly_monotonic_decreasing is False
178
+
179
+
180
+ @pytest.mark.parametrize("attr", ["is_monotonic_increasing", "is_monotonic_decreasing"])
181
+ @pytest.mark.parametrize(
182
+ "values",
183
+ [[(np.nan,), (1,), (2,)], [(1,), (np.nan,), (2,)], [(1,), (2,), (np.nan,)]],
184
+ )
185
+ def test_is_monotonic_with_nans(values, attr):
186
+ # GH: 37220
187
+ idx = MultiIndex.from_tuples(values, names=["test"])
188
+ assert getattr(idx, attr) is False
py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_reindex.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ import pandas as pd
5
+ from pandas import (
6
+ Index,
7
+ MultiIndex,
8
+ )
9
+ import pandas._testing as tm
10
+
11
+
12
+ def test_reindex(idx):
13
+ result, indexer = idx.reindex(list(idx[:4]))
14
+ assert isinstance(result, MultiIndex)
15
+ assert result.names == ["first", "second"]
16
+ assert [level.name for level in result.levels] == ["first", "second"]
17
+
18
+ result, indexer = idx.reindex(list(idx))
19
+ assert isinstance(result, MultiIndex)
20
+ assert indexer is None
21
+ assert result.names == ["first", "second"]
22
+ assert [level.name for level in result.levels] == ["first", "second"]
23
+
24
+
25
+ def test_reindex_level(idx):
26
+ index = Index(["one"])
27
+
28
+ target, indexer = idx.reindex(index, level="second")
29
+ target2, indexer2 = index.reindex(idx, level="second")
30
+
31
+ exp_index = idx.join(index, level="second", how="right")
32
+ exp_index2 = idx.join(index, level="second", how="left")
33
+
34
+ assert target.equals(exp_index)
35
+ exp_indexer = np.array([0, 2, 4])
36
+ tm.assert_numpy_array_equal(indexer, exp_indexer, check_dtype=False)
37
+
38
+ assert target2.equals(exp_index2)
39
+ exp_indexer2 = np.array([0, -1, 0, -1, 0, -1])
40
+ tm.assert_numpy_array_equal(indexer2, exp_indexer2, check_dtype=False)
41
+
42
+ with pytest.raises(TypeError, match="Fill method not supported"):
43
+ idx.reindex(idx, method="pad", level="second")
44
+
45
+
46
+ def test_reindex_preserves_names_when_target_is_list_or_ndarray(idx):
47
+ # GH6552
48
+ idx = idx.copy()
49
+ target = idx.copy()
50
+ idx.names = target.names = [None, None]
51
+
52
+ other_dtype = MultiIndex.from_product([[1, 2], [3, 4]])
53
+
54
+ # list & ndarray cases
55
+ assert idx.reindex([])[0].names == [None, None]
56
+ assert idx.reindex(np.array([]))[0].names == [None, None]
57
+ assert idx.reindex(target.tolist())[0].names == [None, None]
58
+ assert idx.reindex(target.values)[0].names == [None, None]
59
+ assert idx.reindex(other_dtype.tolist())[0].names == [None, None]
60
+ assert idx.reindex(other_dtype.values)[0].names == [None, None]
61
+
62
+ idx.names = ["foo", "bar"]
63
+ assert idx.reindex([])[0].names == ["foo", "bar"]
64
+ assert idx.reindex(np.array([]))[0].names == ["foo", "bar"]
65
+ assert idx.reindex(target.tolist())[0].names == ["foo", "bar"]
66
+ assert idx.reindex(target.values)[0].names == ["foo", "bar"]
67
+ assert idx.reindex(other_dtype.tolist())[0].names == ["foo", "bar"]
68
+ assert idx.reindex(other_dtype.values)[0].names == ["foo", "bar"]
69
+
70
+
71
+ def test_reindex_lvl_preserves_names_when_target_is_list_or_array():
72
+ # GH7774
73
+ idx = MultiIndex.from_product([[0, 1], ["a", "b"]], names=["foo", "bar"])
74
+ assert idx.reindex([], level=0)[0].names == ["foo", "bar"]
75
+ assert idx.reindex([], level=1)[0].names == ["foo", "bar"]
76
+
77
+
78
+ def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array(
79
+ using_infer_string,
80
+ ):
81
+ # GH7774
82
+ idx = MultiIndex.from_product([[0, 1], ["a", "b"]])
83
+ assert idx.reindex([], level=0)[0].levels[0].dtype.type == np.int64
84
+ exp = np.object_ if not using_infer_string else str
85
+ assert idx.reindex([], level=1)[0].levels[1].dtype.type == exp
86
+
87
+ # case with EA levels
88
+ cat = pd.Categorical(["foo", "bar"])
89
+ dti = pd.date_range("2016-01-01", periods=2, tz="US/Pacific")
90
+ mi = MultiIndex.from_product([cat, dti])
91
+ assert mi.reindex([], level=0)[0].levels[0].dtype == cat.dtype
92
+ assert mi.reindex([], level=1)[0].levels[1].dtype == dti.dtype
93
+
94
+
95
+ def test_reindex_base(idx):
96
+ expected = np.arange(idx.size, dtype=np.intp)
97
+
98
+ actual = idx.get_indexer(idx)
99
+ tm.assert_numpy_array_equal(expected, actual)
100
+
101
+ with pytest.raises(ValueError, match="Invalid fill method"):
102
+ idx.get_indexer(idx, method="invalid")
103
+
104
+
105
+ def test_reindex_non_unique():
106
+ idx = MultiIndex.from_tuples([(0, 0), (1, 1), (1, 1), (2, 2)])
107
+ a = pd.Series(np.arange(4), index=idx)
108
+ new_idx = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)])
109
+
110
+ msg = "cannot handle a non-unique multi-index!"
111
+ with pytest.raises(ValueError, match=msg):
112
+ a.reindex(new_idx)
113
+
114
+
115
+ @pytest.mark.parametrize("values", [[["a"], ["x"]], [[], []]])
116
+ def test_reindex_empty_with_level(values):
117
+ # GH41170
118
+ idx = MultiIndex.from_arrays(values)
119
+ result, result_indexer = idx.reindex(np.array(["b"]), level=0)
120
+ expected = MultiIndex(levels=[["b"], values[1]], codes=[[], []])
121
+ expected_indexer = np.array([], dtype=result_indexer.dtype)
122
+ tm.assert_index_equal(result, expected)
123
+ tm.assert_numpy_array_equal(result_indexer, expected_indexer)
124
+
125
+
126
+ def test_reindex_not_all_tuples():
127
+ keys = [("i", "i"), ("i", "j"), ("j", "i"), "j"]
128
+ mi = MultiIndex.from_tuples(keys[:-1])
129
+ idx = Index(keys)
130
+ res, indexer = mi.reindex(idx)
131
+
132
+ tm.assert_index_equal(res, idx)
133
+ expected = np.array([0, 1, 2, -1], dtype=np.intp)
134
+ tm.assert_numpy_array_equal(indexer, expected)
135
+
136
+
137
+ def test_reindex_limit_arg_with_multiindex():
138
+ # GH21247
139
+
140
+ idx = MultiIndex.from_tuples([(3, "A"), (4, "A"), (4, "B")])
141
+
142
+ df = pd.Series([0.02, 0.01, 0.012], index=idx)
143
+
144
+ new_idx = MultiIndex.from_tuples(
145
+ [
146
+ (3, "A"),
147
+ (3, "B"),
148
+ (4, "A"),
149
+ (4, "B"),
150
+ (4, "C"),
151
+ (5, "B"),
152
+ (5, "C"),
153
+ (6, "B"),
154
+ (6, "C"),
155
+ ]
156
+ )
157
+
158
+ with pytest.raises(
159
+ ValueError,
160
+ match="limit argument only valid if doing pad, backfill or nearest reindexing",
161
+ ):
162
+ df.reindex(new_idx, fill_value=0, limit=1)
163
+
164
+
165
+ def test_reindex_with_none_in_nested_multiindex():
166
+ # GH42883
167
+ index = MultiIndex.from_tuples([(("a", None), 1), (("b", None), 2)])
168
+ index2 = MultiIndex.from_tuples([(("b", None), 2), (("a", None), 1)])
169
+ df1_dtype = pd.DataFrame([1, 2], index=index)
170
+ df2_dtype = pd.DataFrame([2, 1], index=index2)
171
+
172
+ result = df1_dtype.reindex_like(df2_dtype)
173
+ expected = df2_dtype
174
+ tm.assert_frame_equal(result, expected)
py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_reshape.py ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime
2
+
3
+ import numpy as np
4
+ import pytest
5
+ import pytz
6
+
7
+ import pandas as pd
8
+ from pandas import (
9
+ Index,
10
+ MultiIndex,
11
+ )
12
+ import pandas._testing as tm
13
+
14
+
15
+ def test_insert(idx):
16
+ # key contained in all levels
17
+ new_index = idx.insert(0, ("bar", "two"))
18
+ assert new_index.equal_levels(idx)
19
+ assert new_index[0] == ("bar", "two")
20
+
21
+ # key not contained in all levels
22
+ new_index = idx.insert(0, ("abc", "three"))
23
+
24
+ exp0 = Index(list(idx.levels[0]) + ["abc"], name="first")
25
+ tm.assert_index_equal(new_index.levels[0], exp0)
26
+ assert new_index.names == ["first", "second"]
27
+
28
+ exp1 = Index(list(idx.levels[1]) + ["three"], name="second")
29
+ tm.assert_index_equal(new_index.levels[1], exp1)
30
+ assert new_index[0] == ("abc", "three")
31
+
32
+ # key wrong length
33
+ msg = "Item must have length equal to number of levels"
34
+ with pytest.raises(ValueError, match=msg):
35
+ idx.insert(0, ("foo2",))
36
+
37
+ left = pd.DataFrame([["a", "b", 0], ["b", "d", 1]], columns=["1st", "2nd", "3rd"])
38
+ left.set_index(["1st", "2nd"], inplace=True)
39
+ ts = left["3rd"].copy(deep=True)
40
+
41
+ left.loc[("b", "x"), "3rd"] = 2
42
+ left.loc[("b", "a"), "3rd"] = -1
43
+ left.loc[("b", "b"), "3rd"] = 3
44
+ left.loc[("a", "x"), "3rd"] = 4
45
+ left.loc[("a", "w"), "3rd"] = 5
46
+ left.loc[("a", "a"), "3rd"] = 6
47
+
48
+ ts.loc[("b", "x")] = 2
49
+ ts.loc["b", "a"] = -1
50
+ ts.loc[("b", "b")] = 3
51
+ ts.loc["a", "x"] = 4
52
+ ts.loc[("a", "w")] = 5
53
+ ts.loc["a", "a"] = 6
54
+
55
+ right = pd.DataFrame(
56
+ [
57
+ ["a", "b", 0],
58
+ ["b", "d", 1],
59
+ ["b", "x", 2],
60
+ ["b", "a", -1],
61
+ ["b", "b", 3],
62
+ ["a", "x", 4],
63
+ ["a", "w", 5],
64
+ ["a", "a", 6],
65
+ ],
66
+ columns=["1st", "2nd", "3rd"],
67
+ )
68
+ right.set_index(["1st", "2nd"], inplace=True)
69
+ # FIXME data types changes to float because
70
+ # of intermediate nan insertion;
71
+ tm.assert_frame_equal(left, right, check_dtype=False)
72
+ tm.assert_series_equal(ts, right["3rd"])
73
+
74
+
75
+ def test_insert2():
76
+ # GH9250
77
+ idx = (
78
+ [("test1", i) for i in range(5)]
79
+ + [("test2", i) for i in range(6)]
80
+ + [("test", 17), ("test", 18)]
81
+ )
82
+
83
+ left = pd.Series(np.linspace(0, 10, 11), MultiIndex.from_tuples(idx[:-2]))
84
+
85
+ left.loc[("test", 17)] = 11
86
+ left.loc[("test", 18)] = 12
87
+
88
+ right = pd.Series(np.linspace(0, 12, 13), MultiIndex.from_tuples(idx))
89
+
90
+ tm.assert_series_equal(left, right)
91
+
92
+
93
+ def test_append(idx):
94
+ result = idx[:3].append(idx[3:])
95
+ assert result.equals(idx)
96
+
97
+ foos = [idx[:1], idx[1:3], idx[3:]]
98
+ result = foos[0].append(foos[1:])
99
+ assert result.equals(idx)
100
+
101
+ # empty
102
+ result = idx.append([])
103
+ assert result.equals(idx)
104
+
105
+
106
+ def test_append_index():
107
+ idx1 = Index([1.1, 1.2, 1.3])
108
+ idx2 = pd.date_range("2011-01-01", freq="D", periods=3, tz="Asia/Tokyo")
109
+ idx3 = Index(["A", "B", "C"])
110
+
111
+ midx_lv2 = MultiIndex.from_arrays([idx1, idx2])
112
+ midx_lv3 = MultiIndex.from_arrays([idx1, idx2, idx3])
113
+
114
+ result = idx1.append(midx_lv2)
115
+
116
+ # see gh-7112
117
+ tz = pytz.timezone("Asia/Tokyo")
118
+ expected_tuples = [
119
+ (1.1, tz.localize(datetime(2011, 1, 1))),
120
+ (1.2, tz.localize(datetime(2011, 1, 2))),
121
+ (1.3, tz.localize(datetime(2011, 1, 3))),
122
+ ]
123
+ expected = Index([1.1, 1.2, 1.3] + expected_tuples)
124
+ tm.assert_index_equal(result, expected)
125
+
126
+ result = midx_lv2.append(idx1)
127
+ expected = Index(expected_tuples + [1.1, 1.2, 1.3])
128
+ tm.assert_index_equal(result, expected)
129
+
130
+ result = midx_lv2.append(midx_lv2)
131
+ expected = MultiIndex.from_arrays([idx1.append(idx1), idx2.append(idx2)])
132
+ tm.assert_index_equal(result, expected)
133
+
134
+ result = midx_lv2.append(midx_lv3)
135
+ tm.assert_index_equal(result, expected)
136
+
137
+ result = midx_lv3.append(midx_lv2)
138
+ expected = Index._simple_new(
139
+ np.array(
140
+ [
141
+ (1.1, tz.localize(datetime(2011, 1, 1)), "A"),
142
+ (1.2, tz.localize(datetime(2011, 1, 2)), "B"),
143
+ (1.3, tz.localize(datetime(2011, 1, 3)), "C"),
144
+ ]
145
+ + expected_tuples,
146
+ dtype=object,
147
+ ),
148
+ None,
149
+ )
150
+ tm.assert_index_equal(result, expected)
151
+
152
+
153
+ @pytest.mark.parametrize("name, exp", [("b", "b"), ("c", None)])
154
+ def test_append_names_match(name, exp):
155
+ # GH#48288
156
+ midx = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"])
157
+ midx2 = MultiIndex.from_arrays([[3], [5]], names=["a", name])
158
+ result = midx.append(midx2)
159
+ expected = MultiIndex.from_arrays([[1, 2, 3], [3, 4, 5]], names=["a", exp])
160
+ tm.assert_index_equal(result, expected)
161
+
162
+
163
+ def test_append_names_dont_match():
164
+ # GH#48288
165
+ midx = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"])
166
+ midx2 = MultiIndex.from_arrays([[3], [5]], names=["x", "y"])
167
+ result = midx.append(midx2)
168
+ expected = MultiIndex.from_arrays([[1, 2, 3], [3, 4, 5]], names=None)
169
+ tm.assert_index_equal(result, expected)
170
+
171
+
172
+ def test_append_overlapping_interval_levels():
173
+ # GH 54934
174
+ ivl1 = pd.IntervalIndex.from_breaks([0.0, 1.0, 2.0])
175
+ ivl2 = pd.IntervalIndex.from_breaks([0.5, 1.5, 2.5])
176
+ mi1 = MultiIndex.from_product([ivl1, ivl1])
177
+ mi2 = MultiIndex.from_product([ivl2, ivl2])
178
+ result = mi1.append(mi2)
179
+ expected = MultiIndex.from_tuples(
180
+ [
181
+ (pd.Interval(0.0, 1.0), pd.Interval(0.0, 1.0)),
182
+ (pd.Interval(0.0, 1.0), pd.Interval(1.0, 2.0)),
183
+ (pd.Interval(1.0, 2.0), pd.Interval(0.0, 1.0)),
184
+ (pd.Interval(1.0, 2.0), pd.Interval(1.0, 2.0)),
185
+ (pd.Interval(0.5, 1.5), pd.Interval(0.5, 1.5)),
186
+ (pd.Interval(0.5, 1.5), pd.Interval(1.5, 2.5)),
187
+ (pd.Interval(1.5, 2.5), pd.Interval(0.5, 1.5)),
188
+ (pd.Interval(1.5, 2.5), pd.Interval(1.5, 2.5)),
189
+ ]
190
+ )
191
+ tm.assert_index_equal(result, expected)
192
+
193
+
194
+ def test_repeat():
195
+ reps = 2
196
+ numbers = [1, 2, 3]
197
+ names = np.array(["foo", "bar"])
198
+
199
+ m = MultiIndex.from_product([numbers, names], names=names)
200
+ expected = MultiIndex.from_product([numbers, names.repeat(reps)], names=names)
201
+ tm.assert_index_equal(m.repeat(reps), expected)
202
+
203
+
204
+ def test_insert_base(idx):
205
+ result = idx[1:4]
206
+
207
+ # test 0th element
208
+ assert idx[0:4].equals(result.insert(0, idx[0]))
209
+
210
+
211
+ def test_delete_base(idx):
212
+ expected = idx[1:]
213
+ result = idx.delete(0)
214
+ assert result.equals(expected)
215
+ assert result.name == expected.name
216
+
217
+ expected = idx[:-1]
218
+ result = idx.delete(-1)
219
+ assert result.equals(expected)
220
+ assert result.name == expected.name
221
+
222
+ msg = "index 6 is out of bounds for axis 0 with size 6"
223
+ with pytest.raises(IndexError, match=msg):
224
+ idx.delete(len(idx))
py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_setops.py ADDED
@@ -0,0 +1,772 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ import pandas as pd
5
+ from pandas import (
6
+ CategoricalIndex,
7
+ DataFrame,
8
+ Index,
9
+ IntervalIndex,
10
+ MultiIndex,
11
+ Series,
12
+ )
13
+ import pandas._testing as tm
14
+ from pandas.api.types import (
15
+ is_float_dtype,
16
+ is_unsigned_integer_dtype,
17
+ )
18
+
19
+
20
+ @pytest.mark.parametrize("case", [0.5, "xxx"])
21
+ @pytest.mark.parametrize(
22
+ "method", ["intersection", "union", "difference", "symmetric_difference"]
23
+ )
24
+ def test_set_ops_error_cases(idx, case, sort, method):
25
+ # non-iterable input
26
+ msg = "Input must be Index or array-like"
27
+ with pytest.raises(TypeError, match=msg):
28
+ getattr(idx, method)(case, sort=sort)
29
+
30
+
31
+ @pytest.mark.parametrize("klass", [MultiIndex, np.array, Series, list])
32
+ def test_intersection_base(idx, sort, klass):
33
+ first = idx[2::-1] # first 3 elements reversed
34
+ second = idx[:5]
35
+
36
+ if klass is not MultiIndex:
37
+ second = klass(second.values)
38
+
39
+ intersect = first.intersection(second, sort=sort)
40
+ if sort is None:
41
+ expected = first.sort_values()
42
+ else:
43
+ expected = first
44
+ tm.assert_index_equal(intersect, expected)
45
+
46
+ msg = "other must be a MultiIndex or a list of tuples"
47
+ with pytest.raises(TypeError, match=msg):
48
+ first.intersection([1, 2, 3], sort=sort)
49
+
50
+
51
+ @pytest.mark.arm_slow
52
+ @pytest.mark.parametrize("klass", [MultiIndex, np.array, Series, list])
53
+ def test_union_base(idx, sort, klass):
54
+ first = idx[::-1]
55
+ second = idx[:5]
56
+
57
+ if klass is not MultiIndex:
58
+ second = klass(second.values)
59
+
60
+ union = first.union(second, sort=sort)
61
+ if sort is None:
62
+ expected = first.sort_values()
63
+ else:
64
+ expected = first
65
+ tm.assert_index_equal(union, expected)
66
+
67
+ msg = "other must be a MultiIndex or a list of tuples"
68
+ with pytest.raises(TypeError, match=msg):
69
+ first.union([1, 2, 3], sort=sort)
70
+
71
+
72
+ def test_difference_base(idx, sort):
73
+ second = idx[4:]
74
+ answer = idx[:4]
75
+ result = idx.difference(second, sort=sort)
76
+
77
+ if sort is None:
78
+ answer = answer.sort_values()
79
+
80
+ assert result.equals(answer)
81
+ tm.assert_index_equal(result, answer)
82
+
83
+ # GH 10149
84
+ cases = [klass(second.values) for klass in [np.array, Series, list]]
85
+ for case in cases:
86
+ result = idx.difference(case, sort=sort)
87
+ tm.assert_index_equal(result, answer)
88
+
89
+ msg = "other must be a MultiIndex or a list of tuples"
90
+ with pytest.raises(TypeError, match=msg):
91
+ idx.difference([1, 2, 3], sort=sort)
92
+
93
+
94
+ def test_symmetric_difference(idx, sort):
95
+ first = idx[1:]
96
+ second = idx[:-1]
97
+ answer = idx[[-1, 0]]
98
+ result = first.symmetric_difference(second, sort=sort)
99
+
100
+ if sort is None:
101
+ answer = answer.sort_values()
102
+
103
+ tm.assert_index_equal(result, answer)
104
+
105
+ # GH 10149
106
+ cases = [klass(second.values) for klass in [np.array, Series, list]]
107
+ for case in cases:
108
+ result = first.symmetric_difference(case, sort=sort)
109
+ tm.assert_index_equal(result, answer)
110
+
111
+ msg = "other must be a MultiIndex or a list of tuples"
112
+ with pytest.raises(TypeError, match=msg):
113
+ first.symmetric_difference([1, 2, 3], sort=sort)
114
+
115
+
116
+ def test_multiindex_symmetric_difference():
117
+ # GH 13490
118
+ idx = MultiIndex.from_product([["a", "b"], ["A", "B"]], names=["a", "b"])
119
+ result = idx.symmetric_difference(idx)
120
+ assert result.names == idx.names
121
+
122
+ idx2 = idx.copy().rename(["A", "B"])
123
+ result = idx.symmetric_difference(idx2)
124
+ assert result.names == [None, None]
125
+
126
+
127
+ def test_empty(idx):
128
+ # GH 15270
129
+ assert not idx.empty
130
+ assert idx[:0].empty
131
+
132
+
133
+ def test_difference(idx, sort):
134
+ first = idx
135
+ result = first.difference(idx[-3:], sort=sort)
136
+ vals = idx[:-3].values
137
+
138
+ if sort is None:
139
+ vals = sorted(vals)
140
+
141
+ expected = MultiIndex.from_tuples(vals, sortorder=0, names=idx.names)
142
+
143
+ assert isinstance(result, MultiIndex)
144
+ assert result.equals(expected)
145
+ assert result.names == idx.names
146
+ tm.assert_index_equal(result, expected)
147
+
148
+ # empty difference: reflexive
149
+ result = idx.difference(idx, sort=sort)
150
+ expected = idx[:0]
151
+ assert result.equals(expected)
152
+ assert result.names == idx.names
153
+
154
+ # empty difference: superset
155
+ result = idx[-3:].difference(idx, sort=sort)
156
+ expected = idx[:0]
157
+ assert result.equals(expected)
158
+ assert result.names == idx.names
159
+
160
+ # empty difference: degenerate
161
+ result = idx[:0].difference(idx, sort=sort)
162
+ expected = idx[:0]
163
+ assert result.equals(expected)
164
+ assert result.names == idx.names
165
+
166
+ # names not the same
167
+ chunklet = idx[-3:]
168
+ chunklet.names = ["foo", "baz"]
169
+ result = first.difference(chunklet, sort=sort)
170
+ assert result.names == (None, None)
171
+
172
+ # empty, but non-equal
173
+ result = idx.difference(idx.sortlevel(1)[0], sort=sort)
174
+ assert len(result) == 0
175
+
176
+ # raise Exception called with non-MultiIndex
177
+ result = first.difference(first.values, sort=sort)
178
+ assert result.equals(first[:0])
179
+
180
+ # name from empty array
181
+ result = first.difference([], sort=sort)
182
+ assert first.equals(result)
183
+ assert first.names == result.names
184
+
185
+ # name from non-empty array
186
+ result = first.difference([("foo", "one")], sort=sort)
187
+ expected = MultiIndex.from_tuples(
188
+ [("bar", "one"), ("baz", "two"), ("foo", "two"), ("qux", "one"), ("qux", "two")]
189
+ )
190
+ expected.names = first.names
191
+ assert first.names == result.names
192
+
193
+ msg = "other must be a MultiIndex or a list of tuples"
194
+ with pytest.raises(TypeError, match=msg):
195
+ first.difference([1, 2, 3, 4, 5], sort=sort)
196
+
197
+
198
+ def test_difference_sort_special():
199
+ # GH-24959
200
+ idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
201
+ # sort=None, the default
202
+ result = idx.difference([])
203
+ tm.assert_index_equal(result, idx)
204
+
205
+
206
+ def test_difference_sort_special_true():
207
+ idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
208
+ result = idx.difference([], sort=True)
209
+ expected = MultiIndex.from_product([[0, 1], ["a", "b"]])
210
+ tm.assert_index_equal(result, expected)
211
+
212
+
213
+ def test_difference_sort_incomparable():
214
+ # GH-24959
215
+ idx = MultiIndex.from_product([[1, pd.Timestamp("2000"), 2], ["a", "b"]])
216
+
217
+ other = MultiIndex.from_product([[3, pd.Timestamp("2000"), 4], ["c", "d"]])
218
+ # sort=None, the default
219
+ msg = "sort order is undefined for incomparable objects"
220
+ with tm.assert_produces_warning(RuntimeWarning, match=msg):
221
+ result = idx.difference(other)
222
+ tm.assert_index_equal(result, idx)
223
+
224
+ # sort=False
225
+ result = idx.difference(other, sort=False)
226
+ tm.assert_index_equal(result, idx)
227
+
228
+
229
+ def test_difference_sort_incomparable_true():
230
+ idx = MultiIndex.from_product([[1, pd.Timestamp("2000"), 2], ["a", "b"]])
231
+ other = MultiIndex.from_product([[3, pd.Timestamp("2000"), 4], ["c", "d"]])
232
+
233
+ # TODO: this is raising in constructing a Categorical when calling
234
+ # algos.safe_sort. Should we catch and re-raise with a better message?
235
+ msg = "'values' is not ordered, please explicitly specify the categories order "
236
+ with pytest.raises(TypeError, match=msg):
237
+ idx.difference(other, sort=True)
238
+
239
+
240
+ def test_union(idx, sort):
241
+ piece1 = idx[:5][::-1]
242
+ piece2 = idx[3:]
243
+
244
+ the_union = piece1.union(piece2, sort=sort)
245
+
246
+ if sort in (None, False):
247
+ tm.assert_index_equal(the_union.sort_values(), idx.sort_values())
248
+ else:
249
+ tm.assert_index_equal(the_union, idx)
250
+
251
+ # corner case, pass self or empty thing:
252
+ the_union = idx.union(idx, sort=sort)
253
+ tm.assert_index_equal(the_union, idx)
254
+
255
+ the_union = idx.union(idx[:0], sort=sort)
256
+ tm.assert_index_equal(the_union, idx)
257
+
258
+ tuples = idx.values
259
+ result = idx[:4].union(tuples[4:], sort=sort)
260
+ if sort is None:
261
+ tm.assert_index_equal(result.sort_values(), idx.sort_values())
262
+ else:
263
+ assert result.equals(idx)
264
+
265
+
266
+ def test_union_with_regular_index(idx, using_infer_string):
267
+ other = Index(["A", "B", "C"])
268
+
269
+ result = other.union(idx)
270
+ assert ("foo", "one") in result
271
+ assert "B" in result
272
+
273
+ if using_infer_string:
274
+ with pytest.raises(NotImplementedError, match="Can only union"):
275
+ idx.union(other)
276
+ else:
277
+ msg = "The values in the array are unorderable"
278
+ with tm.assert_produces_warning(RuntimeWarning, match=msg):
279
+ result2 = idx.union(other)
280
+ # This is more consistent now, if sorting fails then we don't sort at all
281
+ # in the MultiIndex case.
282
+ assert not result.equals(result2)
283
+
284
+
285
+ def test_intersection(idx, sort):
286
+ piece1 = idx[:5][::-1]
287
+ piece2 = idx[3:]
288
+
289
+ the_int = piece1.intersection(piece2, sort=sort)
290
+
291
+ if sort in (None, True):
292
+ tm.assert_index_equal(the_int, idx[3:5])
293
+ else:
294
+ tm.assert_index_equal(the_int.sort_values(), idx[3:5])
295
+
296
+ # corner case, pass self
297
+ the_int = idx.intersection(idx, sort=sort)
298
+ tm.assert_index_equal(the_int, idx)
299
+
300
+ # empty intersection: disjoint
301
+ empty = idx[:2].intersection(idx[2:], sort=sort)
302
+ expected = idx[:0]
303
+ assert empty.equals(expected)
304
+
305
+ tuples = idx.values
306
+ result = idx.intersection(tuples)
307
+ assert result.equals(idx)
308
+
309
+
310
+ @pytest.mark.parametrize(
311
+ "method", ["intersection", "union", "difference", "symmetric_difference"]
312
+ )
313
+ def test_setop_with_categorical(idx, sort, method):
314
+ other = idx.to_flat_index().astype("category")
315
+ res_names = [None] * idx.nlevels
316
+
317
+ result = getattr(idx, method)(other, sort=sort)
318
+ expected = getattr(idx, method)(idx, sort=sort).rename(res_names)
319
+ tm.assert_index_equal(result, expected)
320
+
321
+ result = getattr(idx, method)(other[:5], sort=sort)
322
+ expected = getattr(idx, method)(idx[:5], sort=sort).rename(res_names)
323
+ tm.assert_index_equal(result, expected)
324
+
325
+
326
+ def test_intersection_non_object(idx, sort):
327
+ other = Index(range(3), name="foo")
328
+
329
+ result = idx.intersection(other, sort=sort)
330
+ expected = MultiIndex(levels=idx.levels, codes=[[]] * idx.nlevels, names=None)
331
+ tm.assert_index_equal(result, expected, exact=True)
332
+
333
+ # if we pass a length-0 ndarray (i.e. no name, we retain our idx.name)
334
+ result = idx.intersection(np.asarray(other)[:0], sort=sort)
335
+ expected = MultiIndex(levels=idx.levels, codes=[[]] * idx.nlevels, names=idx.names)
336
+ tm.assert_index_equal(result, expected, exact=True)
337
+
338
+ msg = "other must be a MultiIndex or a list of tuples"
339
+ with pytest.raises(TypeError, match=msg):
340
+ # With non-zero length non-index, we try and fail to convert to tuples
341
+ idx.intersection(np.asarray(other), sort=sort)
342
+
343
+
344
+ def test_intersect_equal_sort():
345
+ # GH-24959
346
+ idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
347
+ tm.assert_index_equal(idx.intersection(idx, sort=False), idx)
348
+ tm.assert_index_equal(idx.intersection(idx, sort=None), idx)
349
+
350
+
351
+ def test_intersect_equal_sort_true():
352
+ idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
353
+ expected = MultiIndex.from_product([[0, 1], ["a", "b"]])
354
+ result = idx.intersection(idx, sort=True)
355
+ tm.assert_index_equal(result, expected)
356
+
357
+
358
+ @pytest.mark.parametrize("slice_", [slice(None), slice(0)])
359
+ def test_union_sort_other_empty(slice_):
360
+ # https://github.com/pandas-dev/pandas/issues/24959
361
+ idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
362
+
363
+ # default, sort=None
364
+ other = idx[slice_]
365
+ tm.assert_index_equal(idx.union(other), idx)
366
+ tm.assert_index_equal(other.union(idx), idx)
367
+
368
+ # sort=False
369
+ tm.assert_index_equal(idx.union(other, sort=False), idx)
370
+
371
+
372
+ def test_union_sort_other_empty_sort():
373
+ idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
374
+ other = idx[:0]
375
+ result = idx.union(other, sort=True)
376
+ expected = MultiIndex.from_product([[0, 1], ["a", "b"]])
377
+ tm.assert_index_equal(result, expected)
378
+
379
+
380
+ def test_union_sort_other_incomparable():
381
+ # https://github.com/pandas-dev/pandas/issues/24959
382
+ idx = MultiIndex.from_product([[1, pd.Timestamp("2000")], ["a", "b"]])
383
+
384
+ # default, sort=None
385
+ with tm.assert_produces_warning(RuntimeWarning):
386
+ result = idx.union(idx[:1])
387
+ tm.assert_index_equal(result, idx)
388
+
389
+ # sort=False
390
+ result = idx.union(idx[:1], sort=False)
391
+ tm.assert_index_equal(result, idx)
392
+
393
+
394
+ def test_union_sort_other_incomparable_sort():
395
+ idx = MultiIndex.from_product([[1, pd.Timestamp("2000")], ["a", "b"]])
396
+ msg = "'<' not supported between instances of 'Timestamp' and 'int'"
397
+ with pytest.raises(TypeError, match=msg):
398
+ idx.union(idx[:1], sort=True)
399
+
400
+
401
+ def test_union_non_object_dtype_raises():
402
+ # GH#32646 raise NotImplementedError instead of less-informative error
403
+ mi = MultiIndex.from_product([["a", "b"], [1, 2]])
404
+
405
+ idx = mi.levels[1]
406
+
407
+ msg = "Can only union MultiIndex with MultiIndex or Index of tuples"
408
+ with pytest.raises(NotImplementedError, match=msg):
409
+ mi.union(idx)
410
+
411
+
412
+ def test_union_empty_self_different_names():
413
+ # GH#38423
414
+ mi = MultiIndex.from_arrays([[]])
415
+ mi2 = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"])
416
+ result = mi.union(mi2)
417
+ expected = MultiIndex.from_arrays([[1, 2], [3, 4]])
418
+ tm.assert_index_equal(result, expected)
419
+
420
+
421
+ def test_union_multiindex_empty_rangeindex():
422
+ # GH#41234
423
+ mi = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"])
424
+ ri = pd.RangeIndex(0)
425
+
426
+ result_left = mi.union(ri)
427
+ tm.assert_index_equal(mi, result_left, check_names=False)
428
+
429
+ result_right = ri.union(mi)
430
+ tm.assert_index_equal(mi, result_right, check_names=False)
431
+
432
+
433
+ @pytest.mark.parametrize(
434
+ "method", ["union", "intersection", "difference", "symmetric_difference"]
435
+ )
436
+ def test_setops_sort_validation(method):
437
+ idx1 = MultiIndex.from_product([["a", "b"], [1, 2]])
438
+ idx2 = MultiIndex.from_product([["b", "c"], [1, 2]])
439
+
440
+ with pytest.raises(ValueError, match="The 'sort' keyword only takes"):
441
+ getattr(idx1, method)(idx2, sort=2)
442
+
443
+ # sort=True is supported as of GH#?
444
+ getattr(idx1, method)(idx2, sort=True)
445
+
446
+
447
+ @pytest.mark.parametrize("val", [pd.NA, 100])
448
+ def test_difference_keep_ea_dtypes(any_numeric_ea_dtype, val):
449
+ # GH#48606
450
+ midx = MultiIndex.from_arrays(
451
+ [Series([1, 2], dtype=any_numeric_ea_dtype), [2, 1]], names=["a", None]
452
+ )
453
+ midx2 = MultiIndex.from_arrays(
454
+ [Series([1, 2, val], dtype=any_numeric_ea_dtype), [1, 1, 3]]
455
+ )
456
+ result = midx.difference(midx2)
457
+ expected = MultiIndex.from_arrays([Series([1], dtype=any_numeric_ea_dtype), [2]])
458
+ tm.assert_index_equal(result, expected)
459
+
460
+ result = midx.difference(midx.sort_values(ascending=False))
461
+ expected = MultiIndex.from_arrays(
462
+ [Series([], dtype=any_numeric_ea_dtype), Series([], dtype=np.int64)],
463
+ names=["a", None],
464
+ )
465
+ tm.assert_index_equal(result, expected)
466
+
467
+
468
+ @pytest.mark.parametrize("val", [pd.NA, 5])
469
+ def test_symmetric_difference_keeping_ea_dtype(any_numeric_ea_dtype, val):
470
+ # GH#48607
471
+ midx = MultiIndex.from_arrays(
472
+ [Series([1, 2], dtype=any_numeric_ea_dtype), [2, 1]], names=["a", None]
473
+ )
474
+ midx2 = MultiIndex.from_arrays(
475
+ [Series([1, 2, val], dtype=any_numeric_ea_dtype), [1, 1, 3]]
476
+ )
477
+ result = midx.symmetric_difference(midx2)
478
+ expected = MultiIndex.from_arrays(
479
+ [Series([1, 1, val], dtype=any_numeric_ea_dtype), [1, 2, 3]]
480
+ )
481
+ tm.assert_index_equal(result, expected)
482
+
483
+
484
+ @pytest.mark.parametrize(
485
+ ("tuples", "exp_tuples"),
486
+ [
487
+ ([("val1", "test1")], [("val1", "test1")]),
488
+ ([("val1", "test1"), ("val1", "test1")], [("val1", "test1")]),
489
+ (
490
+ [("val2", "test2"), ("val1", "test1")],
491
+ [("val2", "test2"), ("val1", "test1")],
492
+ ),
493
+ ],
494
+ )
495
+ def test_intersect_with_duplicates(tuples, exp_tuples):
496
+ # GH#36915
497
+ left = MultiIndex.from_tuples(tuples, names=["first", "second"])
498
+ right = MultiIndex.from_tuples(
499
+ [("val1", "test1"), ("val1", "test1"), ("val2", "test2")],
500
+ names=["first", "second"],
501
+ )
502
+ result = left.intersection(right)
503
+ expected = MultiIndex.from_tuples(exp_tuples, names=["first", "second"])
504
+ tm.assert_index_equal(result, expected)
505
+
506
+
507
+ @pytest.mark.parametrize(
508
+ "data, names, expected",
509
+ [
510
+ ((1,), None, [None, None]),
511
+ ((1,), ["a"], [None, None]),
512
+ ((1,), ["b"], [None, None]),
513
+ ((1, 2), ["c", "d"], [None, None]),
514
+ ((1, 2), ["b", "a"], [None, None]),
515
+ ((1, 2, 3), ["a", "b", "c"], [None, None]),
516
+ ((1, 2), ["a", "c"], ["a", None]),
517
+ ((1, 2), ["c", "b"], [None, "b"]),
518
+ ((1, 2), ["a", "b"], ["a", "b"]),
519
+ ((1, 2), [None, "b"], [None, "b"]),
520
+ ],
521
+ )
522
+ def test_maybe_match_names(data, names, expected):
523
+ # GH#38323
524
+ mi = MultiIndex.from_tuples([], names=["a", "b"])
525
+ mi2 = MultiIndex.from_tuples([data], names=names)
526
+ result = mi._maybe_match_names(mi2)
527
+ assert result == expected
528
+
529
+
530
+ def test_intersection_equal_different_names():
531
+ # GH#30302
532
+ mi1 = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["c", "b"])
533
+ mi2 = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"])
534
+
535
+ result = mi1.intersection(mi2)
536
+ expected = MultiIndex.from_arrays([[1, 2], [3, 4]], names=[None, "b"])
537
+ tm.assert_index_equal(result, expected)
538
+
539
+
540
+ def test_intersection_different_names():
541
+ # GH#38323
542
+ mi = MultiIndex.from_arrays([[1], [3]], names=["c", "b"])
543
+ mi2 = MultiIndex.from_arrays([[1], [3]])
544
+ result = mi.intersection(mi2)
545
+ tm.assert_index_equal(result, mi2)
546
+
547
+
548
+ def test_intersection_with_missing_values_on_both_sides(nulls_fixture):
549
+ # GH#38623
550
+ mi1 = MultiIndex.from_arrays([[3, nulls_fixture, 4, nulls_fixture], [1, 2, 4, 2]])
551
+ mi2 = MultiIndex.from_arrays([[3, nulls_fixture, 3], [1, 2, 4]])
552
+ result = mi1.intersection(mi2)
553
+ expected = MultiIndex.from_arrays([[3, nulls_fixture], [1, 2]])
554
+ tm.assert_index_equal(result, expected)
555
+
556
+
557
+ def test_union_with_missing_values_on_both_sides(nulls_fixture):
558
+ # GH#38623
559
+ mi1 = MultiIndex.from_arrays([[1, nulls_fixture]])
560
+ mi2 = MultiIndex.from_arrays([[1, nulls_fixture, 3]])
561
+ result = mi1.union(mi2)
562
+ expected = MultiIndex.from_arrays([[1, 3, nulls_fixture]])
563
+ tm.assert_index_equal(result, expected)
564
+
565
+
566
+ @pytest.mark.parametrize("dtype", ["float64", "Float64"])
567
+ @pytest.mark.parametrize("sort", [None, False])
568
+ def test_union_nan_got_duplicated(dtype, sort):
569
+ # GH#38977, GH#49010
570
+ mi1 = MultiIndex.from_arrays([pd.array([1.0, np.nan], dtype=dtype), [2, 3]])
571
+ mi2 = MultiIndex.from_arrays([pd.array([1.0, np.nan, 3.0], dtype=dtype), [2, 3, 4]])
572
+ result = mi1.union(mi2, sort=sort)
573
+ if sort is None:
574
+ expected = MultiIndex.from_arrays(
575
+ [pd.array([1.0, 3.0, np.nan], dtype=dtype), [2, 4, 3]]
576
+ )
577
+ else:
578
+ expected = mi2
579
+ tm.assert_index_equal(result, expected)
580
+
581
+
582
+ @pytest.mark.parametrize("val", [4, 1])
583
+ def test_union_keep_ea_dtype(any_numeric_ea_dtype, val):
584
+ # GH#48505
585
+
586
+ arr1 = Series([val, 2], dtype=any_numeric_ea_dtype)
587
+ arr2 = Series([2, 1], dtype=any_numeric_ea_dtype)
588
+ midx = MultiIndex.from_arrays([arr1, [1, 2]], names=["a", None])
589
+ midx2 = MultiIndex.from_arrays([arr2, [2, 1]])
590
+ result = midx.union(midx2)
591
+ if val == 4:
592
+ expected = MultiIndex.from_arrays(
593
+ [Series([1, 2, 4], dtype=any_numeric_ea_dtype), [1, 2, 1]]
594
+ )
595
+ else:
596
+ expected = MultiIndex.from_arrays(
597
+ [Series([1, 2], dtype=any_numeric_ea_dtype), [1, 2]]
598
+ )
599
+ tm.assert_index_equal(result, expected)
600
+
601
+
602
+ @pytest.mark.parametrize("dupe_val", [3, pd.NA])
603
+ def test_union_with_duplicates_keep_ea_dtype(dupe_val, any_numeric_ea_dtype):
604
+ # GH48900
605
+ mi1 = MultiIndex.from_arrays(
606
+ [
607
+ Series([1, dupe_val, 2], dtype=any_numeric_ea_dtype),
608
+ Series([1, dupe_val, 2], dtype=any_numeric_ea_dtype),
609
+ ]
610
+ )
611
+ mi2 = MultiIndex.from_arrays(
612
+ [
613
+ Series([2, dupe_val, dupe_val], dtype=any_numeric_ea_dtype),
614
+ Series([2, dupe_val, dupe_val], dtype=any_numeric_ea_dtype),
615
+ ]
616
+ )
617
+ result = mi1.union(mi2)
618
+ expected = MultiIndex.from_arrays(
619
+ [
620
+ Series([1, 2, dupe_val, dupe_val], dtype=any_numeric_ea_dtype),
621
+ Series([1, 2, dupe_val, dupe_val], dtype=any_numeric_ea_dtype),
622
+ ]
623
+ )
624
+ tm.assert_index_equal(result, expected)
625
+
626
+
627
+ @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
628
+ def test_union_duplicates(index, request):
629
+ # GH#38977
630
+ if index.empty or isinstance(index, (IntervalIndex, CategoricalIndex)):
631
+ pytest.skip(f"No duplicates in an empty {type(index).__name__}")
632
+
633
+ values = index.unique().values.tolist()
634
+ mi1 = MultiIndex.from_arrays([values, [1] * len(values)])
635
+ mi2 = MultiIndex.from_arrays([[values[0]] + values, [1] * (len(values) + 1)])
636
+ result = mi2.union(mi1)
637
+ expected = mi2.sort_values()
638
+ tm.assert_index_equal(result, expected)
639
+
640
+ if (
641
+ is_unsigned_integer_dtype(mi2.levels[0])
642
+ and (mi2.get_level_values(0) < 2**63).all()
643
+ ):
644
+ # GH#47294 - union uses lib.fast_zip, converting data to Python integers
645
+ # and loses type information. Result is then unsigned only when values are
646
+ # sufficiently large to require unsigned dtype. This happens only if other
647
+ # has dups or one of both have missing values
648
+ expected = expected.set_levels(
649
+ [expected.levels[0].astype(np.int64), expected.levels[1]]
650
+ )
651
+ elif is_float_dtype(mi2.levels[0]):
652
+ # mi2 has duplicates witch is a different path than above, Fix that path
653
+ # to use correct float dtype?
654
+ expected = expected.set_levels(
655
+ [expected.levels[0].astype(float), expected.levels[1]]
656
+ )
657
+
658
+ result = mi1.union(mi2)
659
+ tm.assert_index_equal(result, expected)
660
+
661
+
662
+ def test_union_keep_dtype_precision(any_real_numeric_dtype):
663
+ # GH#48498
664
+ arr1 = Series([4, 1, 1], dtype=any_real_numeric_dtype)
665
+ arr2 = Series([1, 4], dtype=any_real_numeric_dtype)
666
+ midx = MultiIndex.from_arrays([arr1, [2, 1, 1]], names=["a", None])
667
+ midx2 = MultiIndex.from_arrays([arr2, [1, 2]], names=["a", None])
668
+
669
+ result = midx.union(midx2)
670
+ expected = MultiIndex.from_arrays(
671
+ ([Series([1, 1, 4], dtype=any_real_numeric_dtype), [1, 1, 2]]),
672
+ names=["a", None],
673
+ )
674
+ tm.assert_index_equal(result, expected)
675
+
676
+
677
+ def test_union_keep_ea_dtype_with_na(any_numeric_ea_dtype):
678
+ # GH#48498
679
+ arr1 = Series([4, pd.NA], dtype=any_numeric_ea_dtype)
680
+ arr2 = Series([1, pd.NA], dtype=any_numeric_ea_dtype)
681
+ midx = MultiIndex.from_arrays([arr1, [2, 1]], names=["a", None])
682
+ midx2 = MultiIndex.from_arrays([arr2, [1, 2]])
683
+ result = midx.union(midx2)
684
+ expected = MultiIndex.from_arrays(
685
+ [Series([1, 4, pd.NA, pd.NA], dtype=any_numeric_ea_dtype), [1, 2, 1, 2]]
686
+ )
687
+ tm.assert_index_equal(result, expected)
688
+
689
+
690
+ @pytest.mark.parametrize(
691
+ "levels1, levels2, codes1, codes2, names",
692
+ [
693
+ (
694
+ [["a", "b", "c"], [0, ""]],
695
+ [["c", "d", "b"], [""]],
696
+ [[0, 1, 2], [1, 1, 1]],
697
+ [[0, 1, 2], [0, 0, 0]],
698
+ ["name1", "name2"],
699
+ ),
700
+ ],
701
+ )
702
+ def test_intersection_lexsort_depth(levels1, levels2, codes1, codes2, names):
703
+ # GH#25169
704
+ mi1 = MultiIndex(levels=levels1, codes=codes1, names=names)
705
+ mi2 = MultiIndex(levels=levels2, codes=codes2, names=names)
706
+ mi_int = mi1.intersection(mi2)
707
+ assert mi_int._lexsort_depth == 2
708
+
709
+
710
+ @pytest.mark.parametrize(
711
+ "a",
712
+ [pd.Categorical(["a", "b"], categories=["a", "b"]), ["a", "b"]],
713
+ )
714
+ @pytest.mark.parametrize(
715
+ "b",
716
+ [
717
+ pd.Categorical(["a", "b"], categories=["b", "a"], ordered=True),
718
+ pd.Categorical(["a", "b"], categories=["b", "a"]),
719
+ ],
720
+ )
721
+ def test_intersection_with_non_lex_sorted_categories(a, b):
722
+ # GH#49974
723
+ other = ["1", "2"]
724
+
725
+ df1 = DataFrame({"x": a, "y": other})
726
+ df2 = DataFrame({"x": b, "y": other})
727
+
728
+ expected = MultiIndex.from_arrays([a, other], names=["x", "y"])
729
+
730
+ res1 = MultiIndex.from_frame(df1).intersection(
731
+ MultiIndex.from_frame(df2.sort_values(["x", "y"]))
732
+ )
733
+ res2 = MultiIndex.from_frame(df1).intersection(MultiIndex.from_frame(df2))
734
+ res3 = MultiIndex.from_frame(df1.sort_values(["x", "y"])).intersection(
735
+ MultiIndex.from_frame(df2)
736
+ )
737
+ res4 = MultiIndex.from_frame(df1.sort_values(["x", "y"])).intersection(
738
+ MultiIndex.from_frame(df2.sort_values(["x", "y"]))
739
+ )
740
+
741
+ tm.assert_index_equal(res1, expected)
742
+ tm.assert_index_equal(res2, expected)
743
+ tm.assert_index_equal(res3, expected)
744
+ tm.assert_index_equal(res4, expected)
745
+
746
+
747
+ @pytest.mark.parametrize("val", [pd.NA, 100])
748
+ def test_intersection_keep_ea_dtypes(val, any_numeric_ea_dtype):
749
+ # GH#48604
750
+ midx = MultiIndex.from_arrays(
751
+ [Series([1, 2], dtype=any_numeric_ea_dtype), [2, 1]], names=["a", None]
752
+ )
753
+ midx2 = MultiIndex.from_arrays(
754
+ [Series([1, 2, val], dtype=any_numeric_ea_dtype), [1, 1, 3]]
755
+ )
756
+ result = midx.intersection(midx2)
757
+ expected = MultiIndex.from_arrays([Series([2], dtype=any_numeric_ea_dtype), [1]])
758
+ tm.assert_index_equal(result, expected)
759
+
760
+
761
+ def test_union_with_na_when_constructing_dataframe():
762
+ # GH43222
763
+ series1 = Series(
764
+ (1,),
765
+ index=MultiIndex.from_arrays(
766
+ [Series([None], dtype="str"), Series([None], dtype="str")]
767
+ ),
768
+ )
769
+ series2 = Series((10, 20), index=MultiIndex.from_tuples(((None, None), ("a", "b"))))
770
+ result = DataFrame([series1, series2])
771
+ expected = DataFrame({(np.nan, np.nan): [1.0, 10.0], ("a", "b"): [np.nan, 20.0]})
772
+ tm.assert_frame_equal(result, expected)
py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_sorting.py ADDED
@@ -0,0 +1,349 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ from pandas.errors import (
5
+ PerformanceWarning,
6
+ UnsortedIndexError,
7
+ )
8
+
9
+ from pandas import (
10
+ CategoricalIndex,
11
+ DataFrame,
12
+ Index,
13
+ MultiIndex,
14
+ RangeIndex,
15
+ Series,
16
+ Timestamp,
17
+ )
18
+ import pandas._testing as tm
19
+ from pandas.core.indexes.frozen import FrozenList
20
+
21
+
22
+ def test_sortlevel(idx):
23
+ tuples = list(idx)
24
+ np.random.default_rng(2).shuffle(tuples)
25
+
26
+ index = MultiIndex.from_tuples(tuples)
27
+
28
+ sorted_idx, _ = index.sortlevel(0)
29
+ expected = MultiIndex.from_tuples(sorted(tuples))
30
+ assert sorted_idx.equals(expected)
31
+
32
+ sorted_idx, _ = index.sortlevel(0, ascending=False)
33
+ assert sorted_idx.equals(expected[::-1])
34
+
35
+ sorted_idx, _ = index.sortlevel(1)
36
+ by1 = sorted(tuples, key=lambda x: (x[1], x[0]))
37
+ expected = MultiIndex.from_tuples(by1)
38
+ assert sorted_idx.equals(expected)
39
+
40
+ sorted_idx, _ = index.sortlevel(1, ascending=False)
41
+ assert sorted_idx.equals(expected[::-1])
42
+
43
+
44
+ def test_sortlevel_not_sort_remaining():
45
+ mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC"))
46
+ sorted_idx, _ = mi.sortlevel("A", sort_remaining=False)
47
+ assert sorted_idx.equals(mi)
48
+
49
+
50
+ def test_sortlevel_deterministic():
51
+ tuples = [
52
+ ("bar", "one"),
53
+ ("foo", "two"),
54
+ ("qux", "two"),
55
+ ("foo", "one"),
56
+ ("baz", "two"),
57
+ ("qux", "one"),
58
+ ]
59
+
60
+ index = MultiIndex.from_tuples(tuples)
61
+
62
+ sorted_idx, _ = index.sortlevel(0)
63
+ expected = MultiIndex.from_tuples(sorted(tuples))
64
+ assert sorted_idx.equals(expected)
65
+
66
+ sorted_idx, _ = index.sortlevel(0, ascending=False)
67
+ assert sorted_idx.equals(expected[::-1])
68
+
69
+ sorted_idx, _ = index.sortlevel(1)
70
+ by1 = sorted(tuples, key=lambda x: (x[1], x[0]))
71
+ expected = MultiIndex.from_tuples(by1)
72
+ assert sorted_idx.equals(expected)
73
+
74
+ sorted_idx, _ = index.sortlevel(1, ascending=False)
75
+ assert sorted_idx.equals(expected[::-1])
76
+
77
+
78
+ def test_sortlevel_na_position():
79
+ # GH#51612
80
+ midx = MultiIndex.from_tuples([(1, np.nan), (1, 1)])
81
+ result = midx.sortlevel(level=[0, 1], na_position="last")[0]
82
+ expected = MultiIndex.from_tuples([(1, 1), (1, np.nan)])
83
+ tm.assert_index_equal(result, expected)
84
+
85
+
86
+ def test_numpy_argsort(idx):
87
+ result = np.argsort(idx)
88
+ expected = idx.argsort()
89
+ tm.assert_numpy_array_equal(result, expected)
90
+
91
+ # these are the only two types that perform
92
+ # pandas compatibility input validation - the
93
+ # rest already perform separate (or no) such
94
+ # validation via their 'values' attribute as
95
+ # defined in pandas.core.indexes/base.py - they
96
+ # cannot be changed at the moment due to
97
+ # backwards compatibility concerns
98
+ if isinstance(type(idx), (CategoricalIndex, RangeIndex)):
99
+ msg = "the 'axis' parameter is not supported"
100
+ with pytest.raises(ValueError, match=msg):
101
+ np.argsort(idx, axis=1)
102
+
103
+ msg = "the 'kind' parameter is not supported"
104
+ with pytest.raises(ValueError, match=msg):
105
+ np.argsort(idx, kind="mergesort")
106
+
107
+ msg = "the 'order' parameter is not supported"
108
+ with pytest.raises(ValueError, match=msg):
109
+ np.argsort(idx, order=("a", "b"))
110
+
111
+
112
+ def test_unsortedindex():
113
+ # GH 11897
114
+ mi = MultiIndex.from_tuples(
115
+ [("z", "a"), ("x", "a"), ("y", "b"), ("x", "b"), ("y", "a"), ("z", "b")],
116
+ names=["one", "two"],
117
+ )
118
+ df = DataFrame([[i, 10 * i] for i in range(6)], index=mi, columns=["one", "two"])
119
+
120
+ # GH 16734: not sorted, but no real slicing
121
+ result = df.loc(axis=0)["z", "a"]
122
+ expected = df.iloc[0]
123
+ tm.assert_series_equal(result, expected)
124
+
125
+ msg = (
126
+ "MultiIndex slicing requires the index to be lexsorted: "
127
+ r"slicing on levels \[1\], lexsort depth 0"
128
+ )
129
+ with pytest.raises(UnsortedIndexError, match=msg):
130
+ df.loc(axis=0)["z", slice("a")]
131
+ df.sort_index(inplace=True)
132
+ assert len(df.loc(axis=0)["z", :]) == 2
133
+
134
+ with pytest.raises(KeyError, match="'q'"):
135
+ df.loc(axis=0)["q", :]
136
+
137
+
138
+ def test_unsortedindex_doc_examples():
139
+ # https://pandas.pydata.org/pandas-docs/stable/advanced.html#sorting-a-multiindex
140
+ dfm = DataFrame(
141
+ {
142
+ "jim": [0, 0, 1, 1],
143
+ "joe": ["x", "x", "z", "y"],
144
+ "jolie": np.random.default_rng(2).random(4),
145
+ }
146
+ )
147
+
148
+ dfm = dfm.set_index(["jim", "joe"])
149
+ with tm.assert_produces_warning(PerformanceWarning):
150
+ dfm.loc[(1, "z")]
151
+
152
+ msg = r"Key length \(2\) was greater than MultiIndex lexsort depth \(1\)"
153
+ with pytest.raises(UnsortedIndexError, match=msg):
154
+ dfm.loc[(0, "y"):(1, "z")]
155
+
156
+ assert not dfm.index._is_lexsorted()
157
+ assert dfm.index._lexsort_depth == 1
158
+
159
+ # sort it
160
+ dfm = dfm.sort_index()
161
+ dfm.loc[(1, "z")]
162
+ dfm.loc[(0, "y"):(1, "z")]
163
+
164
+ assert dfm.index._is_lexsorted()
165
+ assert dfm.index._lexsort_depth == 2
166
+
167
+
168
+ def test_reconstruct_sort():
169
+ # starts off lexsorted & monotonic
170
+ mi = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]])
171
+ assert mi.is_monotonic_increasing
172
+ recons = mi._sort_levels_monotonic()
173
+ assert recons.is_monotonic_increasing
174
+ assert mi is recons
175
+
176
+ assert mi.equals(recons)
177
+ assert Index(mi.values).equals(Index(recons.values))
178
+
179
+ # cannot convert to lexsorted
180
+ mi = MultiIndex.from_tuples(
181
+ [("z", "a"), ("x", "a"), ("y", "b"), ("x", "b"), ("y", "a"), ("z", "b")],
182
+ names=["one", "two"],
183
+ )
184
+ assert not mi.is_monotonic_increasing
185
+ recons = mi._sort_levels_monotonic()
186
+ assert not recons.is_monotonic_increasing
187
+ assert mi.equals(recons)
188
+ assert Index(mi.values).equals(Index(recons.values))
189
+
190
+ # cannot convert to lexsorted
191
+ mi = MultiIndex(
192
+ levels=[["b", "d", "a"], [1, 2, 3]],
193
+ codes=[[0, 1, 0, 2], [2, 0, 0, 1]],
194
+ names=["col1", "col2"],
195
+ )
196
+ assert not mi.is_monotonic_increasing
197
+ recons = mi._sort_levels_monotonic()
198
+ assert not recons.is_monotonic_increasing
199
+ assert mi.equals(recons)
200
+ assert Index(mi.values).equals(Index(recons.values))
201
+
202
+
203
+ def test_reconstruct_remove_unused():
204
+ # xref to GH 2770
205
+ df = DataFrame(
206
+ [["deleteMe", 1, 9], ["keepMe", 2, 9], ["keepMeToo", 3, 9]],
207
+ columns=["first", "second", "third"],
208
+ )
209
+ df2 = df.set_index(["first", "second"], drop=False)
210
+ df2 = df2[df2["first"] != "deleteMe"]
211
+
212
+ # removed levels are there
213
+ expected = MultiIndex(
214
+ levels=[["deleteMe", "keepMe", "keepMeToo"], [1, 2, 3]],
215
+ codes=[[1, 2], [1, 2]],
216
+ names=["first", "second"],
217
+ )
218
+ result = df2.index
219
+ tm.assert_index_equal(result, expected)
220
+
221
+ expected = MultiIndex(
222
+ levels=[["keepMe", "keepMeToo"], [2, 3]],
223
+ codes=[[0, 1], [0, 1]],
224
+ names=["first", "second"],
225
+ )
226
+ result = df2.index.remove_unused_levels()
227
+ tm.assert_index_equal(result, expected)
228
+
229
+ # idempotent
230
+ result2 = result.remove_unused_levels()
231
+ tm.assert_index_equal(result2, expected)
232
+ assert result2.is_(result)
233
+
234
+
235
+ @pytest.mark.parametrize(
236
+ "first_type,second_type", [("int64", "int64"), ("datetime64[D]", "str")]
237
+ )
238
+ def test_remove_unused_levels_large(first_type, second_type):
239
+ # GH16556
240
+
241
+ # because tests should be deterministic (and this test in particular
242
+ # checks that levels are removed, which is not the case for every
243
+ # random input):
244
+ rng = np.random.default_rng(10) # seed is arbitrary value that works
245
+
246
+ size = 1 << 16
247
+ df = DataFrame(
248
+ {
249
+ "first": rng.integers(0, 1 << 13, size).astype(first_type),
250
+ "second": rng.integers(0, 1 << 10, size).astype(second_type),
251
+ "third": rng.random(size),
252
+ }
253
+ )
254
+ df = df.groupby(["first", "second"]).sum()
255
+ df = df[df.third < 0.1]
256
+
257
+ result = df.index.remove_unused_levels()
258
+ assert len(result.levels[0]) < len(df.index.levels[0])
259
+ assert len(result.levels[1]) < len(df.index.levels[1])
260
+ assert result.equals(df.index)
261
+
262
+ expected = df.reset_index().set_index(["first", "second"]).index
263
+ tm.assert_index_equal(result, expected)
264
+
265
+
266
+ @pytest.mark.parametrize("level0", [["a", "d", "b"], ["a", "d", "b", "unused"]])
267
+ @pytest.mark.parametrize(
268
+ "level1", [["w", "x", "y", "z"], ["w", "x", "y", "z", "unused"]]
269
+ )
270
+ def test_remove_unused_nan(level0, level1):
271
+ # GH 18417
272
+ mi = MultiIndex(levels=[level0, level1], codes=[[0, 2, -1, 1, -1], [0, 1, 2, 3, 2]])
273
+
274
+ result = mi.remove_unused_levels()
275
+ tm.assert_index_equal(result, mi)
276
+ for level in 0, 1:
277
+ assert "unused" not in result.levels[level]
278
+
279
+
280
+ def test_argsort(idx):
281
+ result = idx.argsort()
282
+ expected = idx.values.argsort()
283
+ tm.assert_numpy_array_equal(result, expected)
284
+
285
+
286
+ def test_remove_unused_levels_with_nan():
287
+ # GH 37510
288
+ idx = Index([(1, np.nan), (3, 4)]).rename(["id1", "id2"])
289
+ idx = idx.set_levels(["a", np.nan], level="id1")
290
+ idx = idx.remove_unused_levels()
291
+ result = idx.levels
292
+ expected = FrozenList([["a", np.nan], [4]])
293
+ assert str(result) == str(expected)
294
+
295
+
296
+ def test_sort_values_nan():
297
+ # GH48495, GH48626
298
+ midx = MultiIndex(levels=[["A", "B", "C"], ["D"]], codes=[[1, 0, 2], [-1, -1, 0]])
299
+ result = midx.sort_values()
300
+ expected = MultiIndex(
301
+ levels=[["A", "B", "C"], ["D"]], codes=[[0, 1, 2], [-1, -1, 0]]
302
+ )
303
+ tm.assert_index_equal(result, expected)
304
+
305
+
306
+ def test_sort_values_incomparable():
307
+ # GH48495
308
+ mi = MultiIndex.from_arrays(
309
+ [
310
+ [1, Timestamp("2000-01-01")],
311
+ [3, 4],
312
+ ]
313
+ )
314
+ match = "'<' not supported between instances of 'Timestamp' and 'int'"
315
+ with pytest.raises(TypeError, match=match):
316
+ mi.sort_values()
317
+
318
+
319
+ @pytest.mark.parametrize("na_position", ["first", "last"])
320
+ @pytest.mark.parametrize("dtype", ["float64", "Int64", "Float64"])
321
+ def test_sort_values_with_na_na_position(dtype, na_position):
322
+ # 51612
323
+ arrays = [
324
+ Series([1, 1, 2], dtype=dtype),
325
+ Series([1, None, 3], dtype=dtype),
326
+ ]
327
+ index = MultiIndex.from_arrays(arrays)
328
+ result = index.sort_values(na_position=na_position)
329
+ if na_position == "first":
330
+ arrays = [
331
+ Series([1, 1, 2], dtype=dtype),
332
+ Series([None, 1, 3], dtype=dtype),
333
+ ]
334
+ else:
335
+ arrays = [
336
+ Series([1, 1, 2], dtype=dtype),
337
+ Series([1, None, 3], dtype=dtype),
338
+ ]
339
+ expected = MultiIndex.from_arrays(arrays)
340
+ tm.assert_index_equal(result, expected)
341
+
342
+
343
+ def test_sort_unnecessary_warning():
344
+ # GH#55386
345
+ midx = MultiIndex.from_tuples([(1.5, 2), (3.5, 3), (0, 1)])
346
+ midx = midx.set_levels([2.5, np.nan, 1], level=0)
347
+ result = midx.sort_values()
348
+ expected = MultiIndex.from_tuples([(1, 3), (2.5, 1), (np.nan, 2)])
349
+ tm.assert_index_equal(result, expected)
py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_take.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ import pandas as pd
5
+ import pandas._testing as tm
6
+
7
+
8
+ def test_take(idx):
9
+ indexer = [4, 3, 0, 2]
10
+ result = idx.take(indexer)
11
+ expected = idx[indexer]
12
+ assert result.equals(expected)
13
+
14
+ # GH 10791
15
+ msg = "'MultiIndex' object has no attribute 'freq'"
16
+ with pytest.raises(AttributeError, match=msg):
17
+ idx.freq
18
+
19
+
20
+ def test_take_invalid_kwargs(idx):
21
+ indices = [1, 2]
22
+
23
+ msg = r"take\(\) got an unexpected keyword argument 'foo'"
24
+ with pytest.raises(TypeError, match=msg):
25
+ idx.take(indices, foo=2)
26
+
27
+ msg = "the 'out' parameter is not supported"
28
+ with pytest.raises(ValueError, match=msg):
29
+ idx.take(indices, out=indices)
30
+
31
+ msg = "the 'mode' parameter is not supported"
32
+ with pytest.raises(ValueError, match=msg):
33
+ idx.take(indices, mode="clip")
34
+
35
+
36
+ def test_take_fill_value():
37
+ # GH 12631
38
+ vals = [["A", "B"], [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]]
39
+ idx = pd.MultiIndex.from_product(vals, names=["str", "dt"])
40
+
41
+ result = idx.take(np.array([1, 0, -1]))
42
+ exp_vals = [
43
+ ("A", pd.Timestamp("2011-01-02")),
44
+ ("A", pd.Timestamp("2011-01-01")),
45
+ ("B", pd.Timestamp("2011-01-02")),
46
+ ]
47
+ expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"])
48
+ tm.assert_index_equal(result, expected)
49
+
50
+ # fill_value
51
+ result = idx.take(np.array([1, 0, -1]), fill_value=True)
52
+ exp_vals = [
53
+ ("A", pd.Timestamp("2011-01-02")),
54
+ ("A", pd.Timestamp("2011-01-01")),
55
+ (np.nan, pd.NaT),
56
+ ]
57
+ expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"])
58
+ tm.assert_index_equal(result, expected)
59
+
60
+ # allow_fill=False
61
+ result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
62
+ exp_vals = [
63
+ ("A", pd.Timestamp("2011-01-02")),
64
+ ("A", pd.Timestamp("2011-01-01")),
65
+ ("B", pd.Timestamp("2011-01-02")),
66
+ ]
67
+ expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"])
68
+ tm.assert_index_equal(result, expected)
69
+
70
+ msg = "When allow_fill=True and fill_value is not None, all indices must be >= -1"
71
+ with pytest.raises(ValueError, match=msg):
72
+ idx.take(np.array([1, 0, -2]), fill_value=True)
73
+ with pytest.raises(ValueError, match=msg):
74
+ idx.take(np.array([1, 0, -5]), fill_value=True)
75
+
76
+ msg = "index -5 is out of bounds for( axis 0 with)? size 4"
77
+ with pytest.raises(IndexError, match=msg):
78
+ idx.take(np.array([1, -5]))
py311/lib/python3.11/site-packages/pandas/tests/indexes/numeric/__init__.py ADDED
File without changes
py311/lib/python3.11/site-packages/pandas/tests/indexes/numeric/test_astype.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ from pandas import (
5
+ Index,
6
+ to_datetime,
7
+ to_timedelta,
8
+ )
9
+ import pandas._testing as tm
10
+
11
+
12
+ class TestAstype:
13
+ def test_astype_float64_to_uint64(self):
14
+ # GH#45309 used to incorrectly return Index with int64 dtype
15
+ idx = Index([0.0, 5.0, 10.0, 15.0, 20.0], dtype=np.float64)
16
+ result = idx.astype("u8")
17
+ expected = Index([0, 5, 10, 15, 20], dtype=np.uint64)
18
+ tm.assert_index_equal(result, expected, exact=True)
19
+
20
+ idx_with_negatives = idx - 10
21
+ with pytest.raises(ValueError, match="losslessly"):
22
+ idx_with_negatives.astype(np.uint64)
23
+
24
+ def test_astype_float64_to_object(self):
25
+ float_index = Index([0.0, 2.5, 5.0, 7.5, 10.0], dtype=np.float64)
26
+ result = float_index.astype(object)
27
+ assert result.equals(float_index)
28
+ assert float_index.equals(result)
29
+ assert isinstance(result, Index) and result.dtype == object
30
+
31
+ def test_astype_float64_mixed_to_object(self):
32
+ # mixed int-float
33
+ idx = Index([1.5, 2, 3, 4, 5], dtype=np.float64)
34
+ idx.name = "foo"
35
+ result = idx.astype(object)
36
+ assert result.equals(idx)
37
+ assert idx.equals(result)
38
+ assert isinstance(result, Index) and result.dtype == object
39
+
40
+ @pytest.mark.parametrize("dtype", ["int16", "int32", "int64"])
41
+ def test_astype_float64_to_int_dtype(self, dtype):
42
+ # GH#12881
43
+ # a float astype int
44
+ idx = Index([0, 1, 2], dtype=np.float64)
45
+ result = idx.astype(dtype)
46
+ expected = Index([0, 1, 2], dtype=dtype)
47
+ tm.assert_index_equal(result, expected, exact=True)
48
+
49
+ idx = Index([0, 1.1, 2], dtype=np.float64)
50
+ result = idx.astype(dtype)
51
+ expected = Index([0, 1, 2], dtype=dtype)
52
+ tm.assert_index_equal(result, expected, exact=True)
53
+
54
+ @pytest.mark.parametrize("dtype", ["float32", "float64"])
55
+ def test_astype_float64_to_float_dtype(self, dtype):
56
+ # GH#12881
57
+ # a float astype int
58
+ idx = Index([0, 1, 2], dtype=np.float64)
59
+ result = idx.astype(dtype)
60
+ assert isinstance(result, Index) and result.dtype == dtype
61
+
62
+ @pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"])
63
+ def test_astype_float_to_datetimelike(self, dtype):
64
+ # GH#49660 pre-2.0 Index.astype from floating to M8/m8/Period raised,
65
+ # inconsistent with Series.astype
66
+ idx = Index([0, 1.1, 2], dtype=np.float64)
67
+
68
+ result = idx.astype(dtype)
69
+ if dtype[0] == "M":
70
+ expected = to_datetime(idx.values)
71
+ else:
72
+ expected = to_timedelta(idx.values)
73
+ tm.assert_index_equal(result, expected)
74
+
75
+ # check that we match Series behavior
76
+ result = idx.to_series().set_axis(range(3)).astype(dtype)
77
+ expected = expected.to_series().set_axis(range(3))
78
+ tm.assert_series_equal(result, expected)
79
+
80
+ @pytest.mark.parametrize("dtype", [int, "int16", "int32", "int64"])
81
+ @pytest.mark.parametrize("non_finite", [np.inf, np.nan])
82
+ def test_cannot_cast_inf_to_int(self, non_finite, dtype):
83
+ # GH#13149
84
+ idx = Index([1, 2, non_finite], dtype=np.float64)
85
+
86
+ msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
87
+ with pytest.raises(ValueError, match=msg):
88
+ idx.astype(dtype)
89
+
90
+ def test_astype_from_object(self):
91
+ index = Index([1.0, np.nan, 0.2], dtype="object")
92
+ result = index.astype(float)
93
+ expected = Index([1.0, np.nan, 0.2], dtype=np.float64)
94
+ assert result.dtype == expected.dtype
95
+ tm.assert_index_equal(result, expected)
py311/lib/python3.11/site-packages/pandas/tests/indexes/numeric/test_indexing.py ADDED
@@ -0,0 +1,611 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ from pandas.errors import InvalidIndexError
5
+
6
+ from pandas import (
7
+ NA,
8
+ Index,
9
+ RangeIndex,
10
+ Series,
11
+ Timestamp,
12
+ )
13
+ import pandas._testing as tm
14
+ from pandas.core.arrays import (
15
+ ArrowExtensionArray,
16
+ FloatingArray,
17
+ )
18
+
19
+
20
+ @pytest.fixture
21
+ def index_large():
22
+ # large values used in Index[uint64] tests where no compat needed with Int64/Float64
23
+ large = [2**63, 2**63 + 10, 2**63 + 15, 2**63 + 20, 2**63 + 25]
24
+ return Index(large, dtype=np.uint64)
25
+
26
+
27
+ class TestGetLoc:
28
+ def test_get_loc(self):
29
+ index = Index([0, 1, 2])
30
+ assert index.get_loc(1) == 1
31
+
32
+ def test_get_loc_raises_bad_label(self):
33
+ index = Index([0, 1, 2])
34
+ with pytest.raises(InvalidIndexError, match=r"\[1, 2\]"):
35
+ index.get_loc([1, 2])
36
+
37
+ def test_get_loc_float64(self):
38
+ idx = Index([0.0, 1.0, 2.0], dtype=np.float64)
39
+
40
+ with pytest.raises(KeyError, match="^'foo'$"):
41
+ idx.get_loc("foo")
42
+ with pytest.raises(KeyError, match=r"^1\.5$"):
43
+ idx.get_loc(1.5)
44
+ with pytest.raises(KeyError, match="^True$"):
45
+ idx.get_loc(True)
46
+ with pytest.raises(KeyError, match="^False$"):
47
+ idx.get_loc(False)
48
+
49
+ def test_get_loc_na(self):
50
+ idx = Index([np.nan, 1, 2], dtype=np.float64)
51
+ assert idx.get_loc(1) == 1
52
+ assert idx.get_loc(np.nan) == 0
53
+
54
+ idx = Index([np.nan, 1, np.nan], dtype=np.float64)
55
+ assert idx.get_loc(1) == 1
56
+
57
+ # representable by slice [0:2:2]
58
+ msg = "'Cannot get left slice bound for non-unique label: nan'"
59
+ with pytest.raises(KeyError, match=msg):
60
+ idx.slice_locs(np.nan)
61
+ # not representable by slice
62
+ idx = Index([np.nan, 1, np.nan, np.nan], dtype=np.float64)
63
+ assert idx.get_loc(1) == 1
64
+ msg = "'Cannot get left slice bound for non-unique label: nan"
65
+ with pytest.raises(KeyError, match=msg):
66
+ idx.slice_locs(np.nan)
67
+
68
+ def test_get_loc_missing_nan(self):
69
+ # GH#8569
70
+ idx = Index([1, 2], dtype=np.float64)
71
+ assert idx.get_loc(1) == 0
72
+ with pytest.raises(KeyError, match=r"^3$"):
73
+ idx.get_loc(3)
74
+ with pytest.raises(KeyError, match="^nan$"):
75
+ idx.get_loc(np.nan)
76
+ with pytest.raises(InvalidIndexError, match=r"\[nan\]"):
77
+ # listlike/non-hashable raises TypeError
78
+ idx.get_loc([np.nan])
79
+
80
+ @pytest.mark.parametrize("vals", [[1], [1.0], [Timestamp("2019-12-31")], ["test"]])
81
+ def test_get_loc_float_index_nan_with_method(self, vals):
82
+ # GH#39382
83
+ idx = Index(vals)
84
+ with pytest.raises(KeyError, match="nan"):
85
+ idx.get_loc(np.nan)
86
+
87
+ @pytest.mark.parametrize("dtype", ["f8", "i8", "u8"])
88
+ def test_get_loc_numericindex_none_raises(self, dtype):
89
+ # case that goes through searchsorted and key is non-comparable to values
90
+ arr = np.arange(10**7, dtype=dtype)
91
+ idx = Index(arr)
92
+ with pytest.raises(KeyError, match="None"):
93
+ idx.get_loc(None)
94
+
95
+ def test_get_loc_overflows(self):
96
+ # unique but non-monotonic goes through IndexEngine.mapping.get_item
97
+ idx = Index([0, 2, 1])
98
+
99
+ val = np.iinfo(np.int64).max + 1
100
+
101
+ with pytest.raises(KeyError, match=str(val)):
102
+ idx.get_loc(val)
103
+ with pytest.raises(KeyError, match=str(val)):
104
+ idx._engine.get_loc(val)
105
+
106
+
107
+ class TestGetIndexer:
108
+ def test_get_indexer(self):
109
+ index1 = Index([1, 2, 3, 4, 5])
110
+ index2 = Index([2, 4, 6])
111
+
112
+ r1 = index1.get_indexer(index2)
113
+ e1 = np.array([1, 3, -1], dtype=np.intp)
114
+ tm.assert_almost_equal(r1, e1)
115
+
116
+ @pytest.mark.parametrize("reverse", [True, False])
117
+ @pytest.mark.parametrize(
118
+ "expected,method",
119
+ [
120
+ (np.array([-1, 0, 0, 1, 1], dtype=np.intp), "pad"),
121
+ (np.array([-1, 0, 0, 1, 1], dtype=np.intp), "ffill"),
122
+ (np.array([0, 0, 1, 1, 2], dtype=np.intp), "backfill"),
123
+ (np.array([0, 0, 1, 1, 2], dtype=np.intp), "bfill"),
124
+ ],
125
+ )
126
+ def test_get_indexer_methods(self, reverse, expected, method):
127
+ index1 = Index([1, 2, 3, 4, 5])
128
+ index2 = Index([2, 4, 6])
129
+
130
+ if reverse:
131
+ index1 = index1[::-1]
132
+ expected = expected[::-1]
133
+
134
+ result = index2.get_indexer(index1, method=method)
135
+ tm.assert_almost_equal(result, expected)
136
+
137
+ def test_get_indexer_invalid(self):
138
+ # GH10411
139
+ index = Index(np.arange(10))
140
+
141
+ with pytest.raises(ValueError, match="tolerance argument"):
142
+ index.get_indexer([1, 0], tolerance=1)
143
+
144
+ with pytest.raises(ValueError, match="limit argument"):
145
+ index.get_indexer([1, 0], limit=1)
146
+
147
+ @pytest.mark.parametrize(
148
+ "method, tolerance, indexer, expected",
149
+ [
150
+ ("pad", None, [0, 5, 9], [0, 5, 9]),
151
+ ("backfill", None, [0, 5, 9], [0, 5, 9]),
152
+ ("nearest", None, [0, 5, 9], [0, 5, 9]),
153
+ ("pad", 0, [0, 5, 9], [0, 5, 9]),
154
+ ("backfill", 0, [0, 5, 9], [0, 5, 9]),
155
+ ("nearest", 0, [0, 5, 9], [0, 5, 9]),
156
+ ("pad", None, [0.2, 1.8, 8.5], [0, 1, 8]),
157
+ ("backfill", None, [0.2, 1.8, 8.5], [1, 2, 9]),
158
+ ("nearest", None, [0.2, 1.8, 8.5], [0, 2, 9]),
159
+ ("pad", 1, [0.2, 1.8, 8.5], [0, 1, 8]),
160
+ ("backfill", 1, [0.2, 1.8, 8.5], [1, 2, 9]),
161
+ ("nearest", 1, [0.2, 1.8, 8.5], [0, 2, 9]),
162
+ ("pad", 0.2, [0.2, 1.8, 8.5], [0, -1, -1]),
163
+ ("backfill", 0.2, [0.2, 1.8, 8.5], [-1, 2, -1]),
164
+ ("nearest", 0.2, [0.2, 1.8, 8.5], [0, 2, -1]),
165
+ ],
166
+ )
167
+ def test_get_indexer_nearest(self, method, tolerance, indexer, expected):
168
+ index = Index(np.arange(10))
169
+
170
+ actual = index.get_indexer(indexer, method=method, tolerance=tolerance)
171
+ tm.assert_numpy_array_equal(actual, np.array(expected, dtype=np.intp))
172
+
173
+ @pytest.mark.parametrize("listtype", [list, tuple, Series, np.array])
174
+ @pytest.mark.parametrize(
175
+ "tolerance, expected",
176
+ list(
177
+ zip(
178
+ [[0.3, 0.3, 0.1], [0.2, 0.1, 0.1], [0.1, 0.5, 0.5]],
179
+ [[0, 2, -1], [0, -1, -1], [-1, 2, 9]],
180
+ )
181
+ ),
182
+ )
183
+ def test_get_indexer_nearest_listlike_tolerance(
184
+ self, tolerance, expected, listtype
185
+ ):
186
+ index = Index(np.arange(10))
187
+
188
+ actual = index.get_indexer(
189
+ [0.2, 1.8, 8.5], method="nearest", tolerance=listtype(tolerance)
190
+ )
191
+ tm.assert_numpy_array_equal(actual, np.array(expected, dtype=np.intp))
192
+
193
+ def test_get_indexer_nearest_error(self):
194
+ index = Index(np.arange(10))
195
+ with pytest.raises(ValueError, match="limit argument"):
196
+ index.get_indexer([1, 0], method="nearest", limit=1)
197
+
198
+ with pytest.raises(ValueError, match="tolerance size must match"):
199
+ index.get_indexer([1, 0], method="nearest", tolerance=[1, 2, 3])
200
+
201
+ @pytest.mark.parametrize(
202
+ "method,expected",
203
+ [("pad", [8, 7, 0]), ("backfill", [9, 8, 1]), ("nearest", [9, 7, 0])],
204
+ )
205
+ def test_get_indexer_nearest_decreasing(self, method, expected):
206
+ index = Index(np.arange(10))[::-1]
207
+
208
+ actual = index.get_indexer([0, 5, 9], method=method)
209
+ tm.assert_numpy_array_equal(actual, np.array([9, 4, 0], dtype=np.intp))
210
+
211
+ actual = index.get_indexer([0.2, 1.8, 8.5], method=method)
212
+ tm.assert_numpy_array_equal(actual, np.array(expected, dtype=np.intp))
213
+
214
+ @pytest.mark.parametrize("idx_dtype", ["int64", "float64", "uint64", "range"])
215
+ @pytest.mark.parametrize("method", ["get_indexer", "get_indexer_non_unique"])
216
+ def test_get_indexer_numeric_index_boolean_target(self, method, idx_dtype):
217
+ # GH 16877
218
+
219
+ if idx_dtype == "range":
220
+ numeric_index = RangeIndex(4)
221
+ else:
222
+ numeric_index = Index(np.arange(4, dtype=idx_dtype))
223
+
224
+ other = Index([True, False, True])
225
+
226
+ result = getattr(numeric_index, method)(other)
227
+ expected = np.array([-1, -1, -1], dtype=np.intp)
228
+ if method == "get_indexer":
229
+ tm.assert_numpy_array_equal(result, expected)
230
+ else:
231
+ missing = np.arange(3, dtype=np.intp)
232
+ tm.assert_numpy_array_equal(result[0], expected)
233
+ tm.assert_numpy_array_equal(result[1], missing)
234
+
235
+ @pytest.mark.parametrize("method", ["pad", "backfill", "nearest"])
236
+ def test_get_indexer_with_method_numeric_vs_bool(self, method):
237
+ left = Index([1, 2, 3])
238
+ right = Index([True, False])
239
+
240
+ with pytest.raises(TypeError, match="Cannot compare"):
241
+ left.get_indexer(right, method=method)
242
+
243
+ with pytest.raises(TypeError, match="Cannot compare"):
244
+ right.get_indexer(left, method=method)
245
+
246
+ def test_get_indexer_numeric_vs_bool(self):
247
+ left = Index([1, 2, 3])
248
+ right = Index([True, False])
249
+
250
+ res = left.get_indexer(right)
251
+ expected = -1 * np.ones(len(right), dtype=np.intp)
252
+ tm.assert_numpy_array_equal(res, expected)
253
+
254
+ res = right.get_indexer(left)
255
+ expected = -1 * np.ones(len(left), dtype=np.intp)
256
+ tm.assert_numpy_array_equal(res, expected)
257
+
258
+ res = left.get_indexer_non_unique(right)[0]
259
+ expected = -1 * np.ones(len(right), dtype=np.intp)
260
+ tm.assert_numpy_array_equal(res, expected)
261
+
262
+ res = right.get_indexer_non_unique(left)[0]
263
+ expected = -1 * np.ones(len(left), dtype=np.intp)
264
+ tm.assert_numpy_array_equal(res, expected)
265
+
266
+ def test_get_indexer_float64(self):
267
+ idx = Index([0.0, 1.0, 2.0], dtype=np.float64)
268
+ tm.assert_numpy_array_equal(
269
+ idx.get_indexer(idx), np.array([0, 1, 2], dtype=np.intp)
270
+ )
271
+
272
+ target = [-0.1, 0.5, 1.1]
273
+ tm.assert_numpy_array_equal(
274
+ idx.get_indexer(target, "pad"), np.array([-1, 0, 1], dtype=np.intp)
275
+ )
276
+ tm.assert_numpy_array_equal(
277
+ idx.get_indexer(target, "backfill"), np.array([0, 1, 2], dtype=np.intp)
278
+ )
279
+ tm.assert_numpy_array_equal(
280
+ idx.get_indexer(target, "nearest"), np.array([0, 1, 1], dtype=np.intp)
281
+ )
282
+
283
+ def test_get_indexer_nan(self):
284
+ # GH#7820
285
+ result = Index([1, 2, np.nan], dtype=np.float64).get_indexer([np.nan])
286
+ expected = np.array([2], dtype=np.intp)
287
+ tm.assert_numpy_array_equal(result, expected)
288
+
289
+ def test_get_indexer_int64(self):
290
+ index = Index(range(0, 20, 2), dtype=np.int64)
291
+ target = Index(np.arange(10), dtype=np.int64)
292
+ indexer = index.get_indexer(target)
293
+ expected = np.array([0, -1, 1, -1, 2, -1, 3, -1, 4, -1], dtype=np.intp)
294
+ tm.assert_numpy_array_equal(indexer, expected)
295
+
296
+ target = Index(np.arange(10), dtype=np.int64)
297
+ indexer = index.get_indexer(target, method="pad")
298
+ expected = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4], dtype=np.intp)
299
+ tm.assert_numpy_array_equal(indexer, expected)
300
+
301
+ target = Index(np.arange(10), dtype=np.int64)
302
+ indexer = index.get_indexer(target, method="backfill")
303
+ expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5], dtype=np.intp)
304
+ tm.assert_numpy_array_equal(indexer, expected)
305
+
306
+ def test_get_indexer_uint64(self, index_large):
307
+ target = Index(np.arange(10).astype("uint64") * 5 + 2**63)
308
+ indexer = index_large.get_indexer(target)
309
+ expected = np.array([0, -1, 1, 2, 3, 4, -1, -1, -1, -1], dtype=np.intp)
310
+ tm.assert_numpy_array_equal(indexer, expected)
311
+
312
+ target = Index(np.arange(10).astype("uint64") * 5 + 2**63)
313
+ indexer = index_large.get_indexer(target, method="pad")
314
+ expected = np.array([0, 0, 1, 2, 3, 4, 4, 4, 4, 4], dtype=np.intp)
315
+ tm.assert_numpy_array_equal(indexer, expected)
316
+
317
+ target = Index(np.arange(10).astype("uint64") * 5 + 2**63)
318
+ indexer = index_large.get_indexer(target, method="backfill")
319
+ expected = np.array([0, 1, 1, 2, 3, 4, -1, -1, -1, -1], dtype=np.intp)
320
+ tm.assert_numpy_array_equal(indexer, expected)
321
+
322
+ @pytest.mark.parametrize("val, val2", [(4, 5), (4, 4), (4, NA), (NA, NA)])
323
+ def test_get_loc_masked(self, val, val2, any_numeric_ea_and_arrow_dtype):
324
+ # GH#39133
325
+ idx = Index([1, 2, 3, val, val2], dtype=any_numeric_ea_and_arrow_dtype)
326
+ result = idx.get_loc(2)
327
+ assert result == 1
328
+
329
+ with pytest.raises(KeyError, match="9"):
330
+ idx.get_loc(9)
331
+
332
+ def test_get_loc_masked_na(self, any_numeric_ea_and_arrow_dtype):
333
+ # GH#39133
334
+ idx = Index([1, 2, NA], dtype=any_numeric_ea_and_arrow_dtype)
335
+ result = idx.get_loc(NA)
336
+ assert result == 2
337
+
338
+ idx = Index([1, 2, NA, NA], dtype=any_numeric_ea_and_arrow_dtype)
339
+ result = idx.get_loc(NA)
340
+ tm.assert_numpy_array_equal(result, np.array([False, False, True, True]))
341
+
342
+ idx = Index([1, 2, 3], dtype=any_numeric_ea_and_arrow_dtype)
343
+ with pytest.raises(KeyError, match="NA"):
344
+ idx.get_loc(NA)
345
+
346
+ def test_get_loc_masked_na_and_nan(self):
347
+ # GH#39133
348
+ idx = Index(
349
+ FloatingArray(
350
+ np.array([1, 2, 1, np.nan]), mask=np.array([False, False, True, False])
351
+ )
352
+ )
353
+ result = idx.get_loc(NA)
354
+ assert result == 2
355
+ result = idx.get_loc(np.nan)
356
+ assert result == 3
357
+
358
+ idx = Index(
359
+ FloatingArray(np.array([1, 2, 1.0]), mask=np.array([False, False, True]))
360
+ )
361
+ result = idx.get_loc(NA)
362
+ assert result == 2
363
+ with pytest.raises(KeyError, match="nan"):
364
+ idx.get_loc(np.nan)
365
+
366
+ idx = Index(
367
+ FloatingArray(
368
+ np.array([1, 2, np.nan]), mask=np.array([False, False, False])
369
+ )
370
+ )
371
+ result = idx.get_loc(np.nan)
372
+ assert result == 2
373
+ with pytest.raises(KeyError, match="NA"):
374
+ idx.get_loc(NA)
375
+
376
+ @pytest.mark.parametrize("val", [4, 2])
377
+ def test_get_indexer_masked_na(self, any_numeric_ea_and_arrow_dtype, val):
378
+ # GH#39133
379
+ idx = Index([1, 2, NA, 3, val], dtype=any_numeric_ea_and_arrow_dtype)
380
+ result = idx.get_indexer_for([1, NA, 5])
381
+ expected = np.array([0, 2, -1])
382
+ tm.assert_numpy_array_equal(result, expected, check_dtype=False)
383
+
384
+ @pytest.mark.parametrize("dtype", ["boolean", "bool[pyarrow]"])
385
+ def test_get_indexer_masked_na_boolean(self, dtype):
386
+ # GH#39133
387
+ if dtype == "bool[pyarrow]":
388
+ pytest.importorskip("pyarrow")
389
+ idx = Index([True, False, NA], dtype=dtype)
390
+ result = idx.get_loc(False)
391
+ assert result == 1
392
+ result = idx.get_loc(NA)
393
+ assert result == 2
394
+
395
+ def test_get_indexer_arrow_dictionary_target(self):
396
+ pa = pytest.importorskip("pyarrow")
397
+ target = Index(
398
+ ArrowExtensionArray(
399
+ pa.array([1, 2], type=pa.dictionary(pa.int8(), pa.int8()))
400
+ )
401
+ )
402
+ idx = Index([1])
403
+
404
+ result = idx.get_indexer(target)
405
+ expected = np.array([0, -1], dtype=np.int64)
406
+ tm.assert_numpy_array_equal(result, expected)
407
+
408
+ result_1, result_2 = idx.get_indexer_non_unique(target)
409
+ expected_1, expected_2 = np.array([0, -1], dtype=np.int64), np.array(
410
+ [1], dtype=np.int64
411
+ )
412
+ tm.assert_numpy_array_equal(result_1, expected_1)
413
+ tm.assert_numpy_array_equal(result_2, expected_2)
414
+
415
+
416
+ class TestWhere:
417
+ @pytest.mark.parametrize(
418
+ "index",
419
+ [
420
+ Index(np.arange(5, dtype="float64")),
421
+ Index(range(0, 20, 2), dtype=np.int64),
422
+ Index(np.arange(5, dtype="uint64")),
423
+ ],
424
+ )
425
+ def test_where(self, listlike_box, index):
426
+ cond = [True] * len(index)
427
+ expected = index
428
+ result = index.where(listlike_box(cond))
429
+
430
+ cond = [False] + [True] * (len(index) - 1)
431
+ expected = Index([index._na_value] + index[1:].tolist(), dtype=np.float64)
432
+ result = index.where(listlike_box(cond))
433
+ tm.assert_index_equal(result, expected)
434
+
435
+ def test_where_uint64(self):
436
+ idx = Index([0, 6, 2], dtype=np.uint64)
437
+ mask = np.array([False, True, False])
438
+ other = np.array([1], dtype=np.int64)
439
+
440
+ expected = Index([1, 6, 1], dtype=np.uint64)
441
+
442
+ result = idx.where(mask, other)
443
+ tm.assert_index_equal(result, expected)
444
+
445
+ result = idx.putmask(~mask, other)
446
+ tm.assert_index_equal(result, expected)
447
+
448
+ def test_where_infers_type_instead_of_trying_to_convert_string_to_float(self):
449
+ # GH 32413
450
+ index = Index([1, np.nan])
451
+ cond = index.notna()
452
+ other = Index(["a", "b"], dtype="string")
453
+
454
+ expected = Index([1.0, "b"])
455
+ result = index.where(cond, other)
456
+
457
+ tm.assert_index_equal(result, expected)
458
+
459
+
460
+ class TestTake:
461
+ @pytest.mark.parametrize("idx_dtype", [np.float64, np.int64, np.uint64])
462
+ def test_take_preserve_name(self, idx_dtype):
463
+ index = Index([1, 2, 3, 4], dtype=idx_dtype, name="foo")
464
+ taken = index.take([3, 0, 1])
465
+ assert index.name == taken.name
466
+
467
+ def test_take_fill_value_float64(self):
468
+ # GH 12631
469
+ idx = Index([1.0, 2.0, 3.0], name="xxx", dtype=np.float64)
470
+ result = idx.take(np.array([1, 0, -1]))
471
+ expected = Index([2.0, 1.0, 3.0], dtype=np.float64, name="xxx")
472
+ tm.assert_index_equal(result, expected)
473
+
474
+ # fill_value
475
+ result = idx.take(np.array([1, 0, -1]), fill_value=True)
476
+ expected = Index([2.0, 1.0, np.nan], dtype=np.float64, name="xxx")
477
+ tm.assert_index_equal(result, expected)
478
+
479
+ # allow_fill=False
480
+ result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
481
+ expected = Index([2.0, 1.0, 3.0], dtype=np.float64, name="xxx")
482
+ tm.assert_index_equal(result, expected)
483
+
484
+ msg = (
485
+ "When allow_fill=True and fill_value is not None, "
486
+ "all indices must be >= -1"
487
+ )
488
+ with pytest.raises(ValueError, match=msg):
489
+ idx.take(np.array([1, 0, -2]), fill_value=True)
490
+ with pytest.raises(ValueError, match=msg):
491
+ idx.take(np.array([1, 0, -5]), fill_value=True)
492
+
493
+ msg = "index -5 is out of bounds for (axis 0 with )?size 3"
494
+ with pytest.raises(IndexError, match=msg):
495
+ idx.take(np.array([1, -5]))
496
+
497
+ @pytest.mark.parametrize("dtype", [np.int64, np.uint64])
498
+ def test_take_fill_value_ints(self, dtype):
499
+ # see gh-12631
500
+ idx = Index([1, 2, 3], dtype=dtype, name="xxx")
501
+ result = idx.take(np.array([1, 0, -1]))
502
+ expected = Index([2, 1, 3], dtype=dtype, name="xxx")
503
+ tm.assert_index_equal(result, expected)
504
+
505
+ name = type(idx).__name__
506
+ msg = f"Unable to fill values because {name} cannot contain NA"
507
+
508
+ # fill_value=True
509
+ with pytest.raises(ValueError, match=msg):
510
+ idx.take(np.array([1, 0, -1]), fill_value=True)
511
+
512
+ # allow_fill=False
513
+ result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
514
+ expected = Index([2, 1, 3], dtype=dtype, name="xxx")
515
+ tm.assert_index_equal(result, expected)
516
+
517
+ with pytest.raises(ValueError, match=msg):
518
+ idx.take(np.array([1, 0, -2]), fill_value=True)
519
+ with pytest.raises(ValueError, match=msg):
520
+ idx.take(np.array([1, 0, -5]), fill_value=True)
521
+
522
+ msg = "index -5 is out of bounds for (axis 0 with )?size 3"
523
+ with pytest.raises(IndexError, match=msg):
524
+ idx.take(np.array([1, -5]))
525
+
526
+
527
+ class TestContains:
528
+ @pytest.mark.parametrize("dtype", [np.float64, np.int64, np.uint64])
529
+ def test_contains_none(self, dtype):
530
+ # GH#35788 should return False, not raise TypeError
531
+ index = Index([0, 1, 2, 3, 4], dtype=dtype)
532
+ assert None not in index
533
+
534
+ def test_contains_float64_nans(self):
535
+ index = Index([1.0, 2.0, np.nan], dtype=np.float64)
536
+ assert np.nan in index
537
+
538
+ def test_contains_float64_not_nans(self):
539
+ index = Index([1.0, 2.0, np.nan], dtype=np.float64)
540
+ assert 1.0 in index
541
+
542
+
543
+ class TestSliceLocs:
544
+ @pytest.mark.parametrize("dtype", [int, float])
545
+ def test_slice_locs(self, dtype):
546
+ index = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=dtype))
547
+ n = len(index)
548
+
549
+ assert index.slice_locs(start=2) == (2, n)
550
+ assert index.slice_locs(start=3) == (3, n)
551
+ assert index.slice_locs(3, 8) == (3, 6)
552
+ assert index.slice_locs(5, 10) == (3, n)
553
+ assert index.slice_locs(end=8) == (0, 6)
554
+ assert index.slice_locs(end=9) == (0, 7)
555
+
556
+ # reversed
557
+ index2 = index[::-1]
558
+ assert index2.slice_locs(8, 2) == (2, 6)
559
+ assert index2.slice_locs(7, 3) == (2, 5)
560
+
561
+ @pytest.mark.parametrize("dtype", [int, float])
562
+ def test_slice_locs_float_locs(self, dtype):
563
+ index = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=dtype))
564
+ n = len(index)
565
+ assert index.slice_locs(5.0, 10.0) == (3, n)
566
+ assert index.slice_locs(4.5, 10.5) == (3, 8)
567
+
568
+ index2 = index[::-1]
569
+ assert index2.slice_locs(8.5, 1.5) == (2, 6)
570
+ assert index2.slice_locs(10.5, -1) == (0, n)
571
+
572
+ @pytest.mark.parametrize("dtype", [int, float])
573
+ def test_slice_locs_dup_numeric(self, dtype):
574
+ index = Index(np.array([10, 12, 12, 14], dtype=dtype))
575
+ assert index.slice_locs(12, 12) == (1, 3)
576
+ assert index.slice_locs(11, 13) == (1, 3)
577
+
578
+ index2 = index[::-1]
579
+ assert index2.slice_locs(12, 12) == (1, 3)
580
+ assert index2.slice_locs(13, 11) == (1, 3)
581
+
582
+ def test_slice_locs_na(self):
583
+ index = Index([np.nan, 1, 2])
584
+ assert index.slice_locs(1) == (1, 3)
585
+ assert index.slice_locs(np.nan) == (0, 3)
586
+
587
+ index = Index([0, np.nan, np.nan, 1, 2])
588
+ assert index.slice_locs(np.nan) == (1, 5)
589
+
590
+ def test_slice_locs_na_raises(self):
591
+ index = Index([np.nan, 1, 2])
592
+ with pytest.raises(KeyError, match=""):
593
+ index.slice_locs(start=1.5)
594
+
595
+ with pytest.raises(KeyError, match=""):
596
+ index.slice_locs(end=1.5)
597
+
598
+
599
+ class TestGetSliceBounds:
600
+ @pytest.mark.parametrize("side, expected", [("left", 4), ("right", 5)])
601
+ def test_get_slice_bounds_within(self, side, expected):
602
+ index = Index(range(6))
603
+ result = index.get_slice_bound(4, side=side)
604
+ assert result == expected
605
+
606
+ @pytest.mark.parametrize("side", ["left", "right"])
607
+ @pytest.mark.parametrize("bound, expected", [(-1, 0), (10, 6)])
608
+ def test_get_slice_bounds_outside(self, side, expected, bound):
609
+ index = Index(range(6))
610
+ result = index.get_slice_bound(bound, side=side)
611
+ assert result == expected