diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/__init__.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_constructors.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_constructors.py new file mode 100644 index 0000000000000000000000000000000000000000..dcf0165ead6c0edb2073ecd0c17cdd7da37daf78 --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_constructors.py @@ -0,0 +1,78 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Index, + MultiIndex, + Series, +) +import pandas._testing as tm + + +class TestIndexConstructor: + # Tests for the Index constructor, specifically for cases that do + # not return a subclass + + @pytest.mark.parametrize("value", [1, np.int64(1)]) + def test_constructor_corner(self, value): + # corner case + msg = ( + r"Index\(\.\.\.\) must be called with a collection of some " + f"kind, {value} was passed" + ) + with pytest.raises(TypeError, match=msg): + Index(value) + + @pytest.mark.parametrize("index_vals", [[("A", 1), "B"], ["B", ("A", 1)]]) + def test_construction_list_mixed_tuples(self, index_vals): + # see gh-10697: if we are constructing from a mixed list of tuples, + # make sure that we are independent of the sorting order. + index = Index(index_vals) + assert isinstance(index, Index) + assert not isinstance(index, MultiIndex) + + def test_constructor_cast(self): + msg = "could not convert string to float" + with pytest.raises(ValueError, match=msg): + Index(["a", "b", "c"], dtype=float) + + @pytest.mark.parametrize("tuple_list", [[()], [(), ()]]) + def test_construct_empty_tuples(self, tuple_list): + # GH #45608 + result = Index(tuple_list) + expected = MultiIndex.from_tuples(tuple_list) + + tm.assert_index_equal(result, expected) + + def test_index_string_inference(self): + # GH#54430 + expected = Index(["a", "b"], dtype=pd.StringDtype(na_value=np.nan)) + with pd.option_context("future.infer_string", True): + ser = Index(["a", "b"]) + tm.assert_index_equal(ser, expected) + + expected = Index(["a", 1], dtype="object") + with pd.option_context("future.infer_string", True): + ser = Index(["a", 1]) + tm.assert_index_equal(ser, expected) + + def test_inference_on_pandas_objects(self): + # GH#56012 + idx = Index([pd.Timestamp("2019-12-31")], dtype=object) + with tm.assert_produces_warning(FutureWarning, match="Dtype inference"): + result = Index(idx) + assert result.dtype != np.object_ + + ser = Series([pd.Timestamp("2019-12-31")], dtype=object) + + with tm.assert_produces_warning(FutureWarning, match="Dtype inference"): + result = Index(ser) + assert result.dtype != np.object_ + + def test_constructor_not_read_only(self): + # GH#57130 + ser = Series([1, 2], dtype=object) + with pd.option_context("mode.copy_on_write", True): + idx = Index(ser) + assert idx._values.flags.writeable diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_formats.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_formats.py new file mode 100644 index 0000000000000000000000000000000000000000..955e3be107f7514b597f1a961dfc548367613c46 --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_formats.py @@ -0,0 +1,163 @@ +import numpy as np +import pytest + +from pandas._config import using_string_dtype +import pandas._config.config as cf + +from pandas import Index +import pandas._testing as tm + + +class TestIndexRendering: + def test_repr_is_valid_construction_code(self): + # for the case of Index, where the repr is traditional rather than + # stylized + idx = Index(["a", "b"]) + res = eval(repr(idx)) + tm.assert_index_equal(res, idx) + + @pytest.mark.xfail(using_string_dtype(), reason="repr different") + @pytest.mark.parametrize( + "index,expected", + [ + # ASCII + # short + ( + Index(["a", "bb", "ccc"]), + """Index(['a', 'bb', 'ccc'], dtype='object')""", + ), + # multiple lines + ( + Index(["a", "bb", "ccc"] * 10), + "Index(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', " + "'bb', 'ccc', 'a', 'bb', 'ccc',\n" + " 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', " + "'bb', 'ccc', 'a', 'bb', 'ccc',\n" + " 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],\n" + " dtype='object')", + ), + # truncated + ( + Index(["a", "bb", "ccc"] * 100), + "Index(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',\n" + " ...\n" + " 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],\n" + " dtype='object', length=300)", + ), + # Non-ASCII + # short + ( + Index(["あ", "いい", "ううう"]), + """Index(['あ', 'いい', 'ううう'], dtype='object')""", + ), + # multiple lines + ( + Index(["あ", "いい", "ううう"] * 10), + ( + "Index(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', " + "'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',\n" + " 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', " + "'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',\n" + " 'あ', 'いい', 'ううう', 'あ', 'いい', " + "'ううう'],\n" + " dtype='object')" + ), + ), + # truncated + ( + Index(["あ", "いい", "ううう"] * 100), + ( + "Index(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', " + "'あ', 'いい', 'ううう', 'あ',\n" + " ...\n" + " 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', " + "'ううう', 'あ', 'いい', 'ううう'],\n" + " dtype='object', length=300)" + ), + ), + ], + ) + def test_string_index_repr(self, index, expected): + result = repr(index) + assert result == expected + + @pytest.mark.xfail(using_string_dtype(), reason="repr different") + @pytest.mark.parametrize( + "index,expected", + [ + # short + ( + Index(["あ", "いい", "ううう"]), + ("Index(['あ', 'いい', 'ううう'], dtype='object')"), + ), + # multiple lines + ( + Index(["あ", "いい", "ううう"] * 10), + ( + "Index(['あ', 'いい', 'ううう', 'あ', 'いい', " + "'ううう', 'あ', 'いい', 'ううう',\n" + " 'あ', 'いい', 'ううう', 'あ', 'いい', " + "'ううう', 'あ', 'いい', 'ううう',\n" + " 'あ', 'いい', 'ううう', 'あ', 'いい', " + "'ううう', 'あ', 'いい', 'ううう',\n" + " 'あ', 'いい', 'ううう'],\n" + " dtype='object')" + "" + ), + ), + # truncated + ( + Index(["あ", "いい", "ううう"] * 100), + ( + "Index(['あ', 'いい', 'ううう', 'あ', 'いい', " + "'ううう', 'あ', 'いい', 'ううう',\n" + " 'あ',\n" + " ...\n" + " 'ううう', 'あ', 'いい', 'ううう', 'あ', " + "'いい', 'ううう', 'あ', 'いい',\n" + " 'ううう'],\n" + " dtype='object', length=300)" + ), + ), + ], + ) + def test_string_index_repr_with_unicode_option(self, index, expected): + # Enable Unicode option ----------------------------------------- + with cf.option_context("display.unicode.east_asian_width", True): + result = repr(index) + assert result == expected + + def test_repr_summary(self): + with cf.option_context("display.max_seq_items", 10): + result = repr(Index(np.arange(1000))) + assert len(result) < 200 + assert "..." in result + + def test_summary_bug(self): + # GH#3869 + ind = Index(["{other}%s", "~:{range}:0"], name="A") + result = ind._summary() + # shouldn't be formatted accidentally. + assert "~:{range}:0" in result + assert "{other}%s" in result + + def test_index_repr_bool_nan(self): + # GH32146 + arr = Index([True, False, np.nan], dtype=object) + msg = "Index.format is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + exp1 = arr.format() + out1 = ["True", "False", "NaN"] + assert out1 == exp1 + + exp2 = repr(arr) + out2 = "Index([True, False, nan], dtype='object')" + assert out2 == exp2 + + def test_format_different_scalar_lengths(self): + # GH#35439 + idx = Index(["aaaaaaaaa", "b"]) + expected = ["aaaaaaaaa", "b"] + msg = r"Index\.format is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + assert idx.format() == expected diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_indexing.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_indexing.py new file mode 100644 index 0000000000000000000000000000000000000000..2988fa7d1baa1e0bc0f6cc4b6dc32e5d12f332cf --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_indexing.py @@ -0,0 +1,104 @@ +import numpy as np +import pytest + +from pandas._libs import index as libindex + +import pandas as pd +from pandas import ( + Index, + NaT, +) +import pandas._testing as tm + + +class TestGetSliceBounds: + @pytest.mark.parametrize("side, expected", [("left", 4), ("right", 5)]) + def test_get_slice_bounds_within(self, side, expected): + index = Index(list("abcdef")) + result = index.get_slice_bound("e", side=side) + assert result == expected + + @pytest.mark.parametrize("side", ["left", "right"]) + @pytest.mark.parametrize( + "data, bound, expected", [(list("abcdef"), "x", 6), (list("bcdefg"), "a", 0)] + ) + def test_get_slice_bounds_outside(self, side, expected, data, bound): + index = Index(data) + result = index.get_slice_bound(bound, side=side) + assert result == expected + + def test_get_slice_bounds_invalid_side(self): + with pytest.raises(ValueError, match="Invalid value for side kwarg"): + Index([]).get_slice_bound("a", side="middle") + + +class TestGetIndexerNonUnique: + def test_get_indexer_non_unique_dtype_mismatch(self): + # GH#25459 + indexes, missing = Index(["A", "B"]).get_indexer_non_unique(Index([0])) + tm.assert_numpy_array_equal(np.array([-1], dtype=np.intp), indexes) + tm.assert_numpy_array_equal(np.array([0], dtype=np.intp), missing) + + @pytest.mark.parametrize( + "idx_values,idx_non_unique", + [ + ([np.nan, 100, 200, 100], [np.nan, 100]), + ([np.nan, 100.0, 200.0, 100.0], [np.nan, 100.0]), + ], + ) + def test_get_indexer_non_unique_int_index(self, idx_values, idx_non_unique): + indexes, missing = Index(idx_values).get_indexer_non_unique(Index([np.nan])) + tm.assert_numpy_array_equal(np.array([0], dtype=np.intp), indexes) + tm.assert_numpy_array_equal(np.array([], dtype=np.intp), missing) + + indexes, missing = Index(idx_values).get_indexer_non_unique( + Index(idx_non_unique) + ) + tm.assert_numpy_array_equal(np.array([0, 1, 3], dtype=np.intp), indexes) + tm.assert_numpy_array_equal(np.array([], dtype=np.intp), missing) + + +class TestGetLoc: + @pytest.mark.slow # to_flat_index takes a while + def test_get_loc_tuple_monotonic_above_size_cutoff(self, monkeypatch): + # Go through the libindex path for which using + # _bin_search vs ndarray.searchsorted makes a difference + + with monkeypatch.context(): + monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 100) + lev = list("ABCD") + dti = pd.date_range("2016-01-01", periods=10) + + mi = pd.MultiIndex.from_product([lev, range(5), dti]) + oidx = mi.to_flat_index() + + loc = len(oidx) // 2 + tup = oidx[loc] + + res = oidx.get_loc(tup) + assert res == loc + + def test_get_loc_nan_object_dtype_nonmonotonic_nonunique(self): + # case that goes through _maybe_get_bool_indexer + idx = Index(["foo", np.nan, None, "foo", 1.0, None], dtype=object) + + # we dont raise KeyError on nan + res = idx.get_loc(np.nan) + assert res == 1 + + # we only match on None, not on np.nan + res = idx.get_loc(None) + expected = np.array([False, False, True, False, False, True]) + tm.assert_numpy_array_equal(res, expected) + + # we don't match at all on mismatched NA + with pytest.raises(KeyError, match="NaT"): + idx.get_loc(NaT) + + +def test_getitem_boolean_ea_indexer(): + # GH#45806 + ser = pd.Series([True, False, pd.NA], dtype="boolean") + result = ser.index[ser] + expected = Index([0]) + tm.assert_index_equal(result, expected) diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_pickle.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_pickle.py new file mode 100644 index 0000000000000000000000000000000000000000..c670921decb78808fa54a35c45e3d2d15ab57a67 --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_pickle.py @@ -0,0 +1,11 @@ +from pandas import Index +import pandas._testing as tm + + +def test_pickle_preserves_object_dtype(): + # GH#43188, GH#43155 don't infer numeric dtype + index = Index([1, 2, 3], dtype=object) + + result = tm.round_trip_pickle(index) + assert result.dtype == object + tm.assert_index_equal(index, result) diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_reshape.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_reshape.py new file mode 100644 index 0000000000000000000000000000000000000000..548f32fd533232c8a930f2a0763394e76f196a43 --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_reshape.py @@ -0,0 +1,97 @@ +""" +Tests for ndarray-like method on the base Index class +""" +import numpy as np +import pytest + +import pandas as pd +from pandas import Index +import pandas._testing as tm + + +class TestReshape: + def test_repeat(self): + repeats = 2 + index = Index([1, 2, 3]) + expected = Index([1, 1, 2, 2, 3, 3]) + + result = index.repeat(repeats) + tm.assert_index_equal(result, expected) + + def test_insert(self): + # GH 7256 + # validate neg/pos inserts + result = Index(["b", "c", "d"]) + + # test 0th element + tm.assert_index_equal(Index(["a", "b", "c", "d"]), result.insert(0, "a")) + + # test Nth element that follows Python list behavior + tm.assert_index_equal(Index(["b", "c", "e", "d"]), result.insert(-1, "e")) + + # test loc +/- neq (0, -1) + tm.assert_index_equal(result.insert(1, "z"), result.insert(-2, "z")) + + # test empty + null_index = Index([]) + tm.assert_index_equal(Index(["a"]), null_index.insert(0, "a")) + + def test_insert_missing(self, request, nulls_fixture, using_infer_string): + if using_infer_string and nulls_fixture is pd.NA: + request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)")) + # GH#22295 + # test there is no mangling of NA values + expected = Index(["a", nulls_fixture, "b", "c"], dtype=object) + result = Index(list("abc"), dtype=object).insert( + 1, Index([nulls_fixture], dtype=object) + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "val", [(1, 2), np.datetime64("2019-12-31"), np.timedelta64(1, "D")] + ) + @pytest.mark.parametrize("loc", [-1, 2]) + def test_insert_datetime_into_object(self, loc, val): + # GH#44509 + idx = Index(["1", "2", "3"]) + result = idx.insert(loc, val) + expected = Index(["1", "2", val, "3"]) + tm.assert_index_equal(result, expected) + assert type(expected[2]) is type(val) + + def test_insert_none_into_string_numpy(self, string_dtype_no_object): + # GH#55365 + index = Index(["a", "b", "c"], dtype=string_dtype_no_object) + result = index.insert(-1, None) + expected = Index(["a", "b", None, "c"], dtype=string_dtype_no_object) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "pos,expected", + [ + (0, Index(["b", "c", "d"], name="index")), + (-1, Index(["a", "b", "c"], name="index")), + ], + ) + def test_delete(self, pos, expected): + index = Index(["a", "b", "c", "d"], name="index") + result = index.delete(pos) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + + def test_delete_raises(self): + index = Index(["a", "b", "c", "d"], name="index") + msg = "index 5 is out of bounds for axis 0 with size 4" + with pytest.raises(IndexError, match=msg): + index.delete(5) + + def test_append_multiple(self): + index = Index(["a", "b", "c", "d", "e", "f"]) + + foos = [index[:2], index[2:4], index[4:]] + result = foos[0].append(foos[1:]) + tm.assert_index_equal(result, index) + + # empty + result = index.append([]) + tm.assert_index_equal(result, index) diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_setops.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_setops.py new file mode 100644 index 0000000000000000000000000000000000000000..3ef3f3ad4d3a20bd2e6303d781590396cbc00ae0 --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_setops.py @@ -0,0 +1,266 @@ +from datetime import datetime + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Index, + Series, +) +import pandas._testing as tm +from pandas.core.algorithms import safe_sort + + +def equal_contents(arr1, arr2) -> bool: + """ + Checks if the set of unique elements of arr1 and arr2 are equivalent. + """ + return frozenset(arr1) == frozenset(arr2) + + +class TestIndexSetOps: + @pytest.mark.parametrize( + "method", ["union", "intersection", "difference", "symmetric_difference"] + ) + def test_setops_sort_validation(self, method): + idx1 = Index(["a", "b"]) + idx2 = Index(["b", "c"]) + + with pytest.raises(ValueError, match="The 'sort' keyword only takes"): + getattr(idx1, method)(idx2, sort=2) + + # sort=True is supported as of GH#?? + getattr(idx1, method)(idx2, sort=True) + + def test_setops_preserve_object_dtype(self): + idx = Index([1, 2, 3], dtype=object) + result = idx.intersection(idx[1:]) + expected = idx[1:] + tm.assert_index_equal(result, expected) + + # if other is not monotonic increasing, intersection goes through + # a different route + result = idx.intersection(idx[1:][::-1]) + tm.assert_index_equal(result, expected) + + result = idx._union(idx[1:], sort=None) + expected = idx + tm.assert_numpy_array_equal(result, expected.values) + + result = idx.union(idx[1:], sort=None) + tm.assert_index_equal(result, expected) + + # if other is not monotonic increasing, _union goes through + # a different route + result = idx._union(idx[1:][::-1], sort=None) + tm.assert_numpy_array_equal(result, expected.values) + + result = idx.union(idx[1:][::-1], sort=None) + tm.assert_index_equal(result, expected) + + def test_union_base(self): + index = Index([0, "a", 1, "b", 2, "c"]) + first = index[3:] + second = index[:5] + + result = first.union(second) + + expected = Index([0, 1, 2, "a", "b", "c"]) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("klass", [np.array, Series, list]) + def test_union_different_type_base(self, klass): + # GH 10149 + index = Index([0, "a", 1, "b", 2, "c"]) + first = index[3:] + second = index[:5] + + result = first.union(klass(second.values)) + + assert equal_contents(result, index) + + def test_union_sort_other_incomparable(self): + # https://github.com/pandas-dev/pandas/issues/24959 + idx = Index([1, pd.Timestamp("2000")]) + # default (sort=None) + with tm.assert_produces_warning(RuntimeWarning): + result = idx.union(idx[:1]) + + tm.assert_index_equal(result, idx) + + # sort=None + with tm.assert_produces_warning(RuntimeWarning): + result = idx.union(idx[:1], sort=None) + tm.assert_index_equal(result, idx) + + # sort=False + result = idx.union(idx[:1], sort=False) + tm.assert_index_equal(result, idx) + + def test_union_sort_other_incomparable_true(self): + idx = Index([1, pd.Timestamp("2000")]) + with pytest.raises(TypeError, match=".*"): + idx.union(idx[:1], sort=True) + + def test_intersection_equal_sort_true(self): + idx = Index(["c", "a", "b"]) + sorted_ = Index(["a", "b", "c"]) + tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_) + + def test_intersection_base(self, sort): + # (same results for py2 and py3 but sortedness not tested elsewhere) + index = Index([0, "a", 1, "b", 2, "c"]) + first = index[:5] + second = index[:3] + + expected = Index([0, 1, "a"]) if sort is None else Index([0, "a", 1]) + result = first.intersection(second, sort=sort) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("klass", [np.array, Series, list]) + def test_intersection_different_type_base(self, klass, sort): + # GH 10149 + index = Index([0, "a", 1, "b", 2, "c"]) + first = index[:5] + second = index[:3] + + result = first.intersection(klass(second.values), sort=sort) + assert equal_contents(result, second) + + def test_intersection_nosort(self): + result = Index(["c", "b", "a"]).intersection(["b", "a"]) + expected = Index(["b", "a"]) + tm.assert_index_equal(result, expected) + + def test_intersection_equal_sort(self): + idx = Index(["c", "a", "b"]) + tm.assert_index_equal(idx.intersection(idx, sort=False), idx) + tm.assert_index_equal(idx.intersection(idx, sort=None), idx) + + def test_intersection_str_dates(self, sort): + dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] + + i1 = Index(dt_dates, dtype=object) + i2 = Index(["aa"], dtype=object) + result = i2.intersection(i1, sort=sort) + + assert len(result) == 0 + + @pytest.mark.parametrize( + "index2,expected_arr", + [(Index(["B", "D"]), ["B"]), (Index(["B", "D", "A"]), ["A", "B"])], + ) + def test_intersection_non_monotonic_non_unique(self, index2, expected_arr, sort): + # non-monotonic non-unique + index1 = Index(["A", "B", "A", "C"]) + expected = Index(expected_arr) + result = index1.intersection(index2, sort=sort) + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + + def test_difference_base(self, sort): + # (same results for py2 and py3 but sortedness not tested elsewhere) + index = Index([0, "a", 1, "b", 2, "c"]) + first = index[:4] + second = index[3:] + + result = first.difference(second, sort) + expected = Index([0, "a", 1]) + if sort is None: + expected = Index(safe_sort(expected)) + tm.assert_index_equal(result, expected) + + def test_symmetric_difference(self): + # (same results for py2 and py3 but sortedness not tested elsewhere) + index = Index([0, "a", 1, "b", 2, "c"]) + first = index[:4] + second = index[3:] + + result = first.symmetric_difference(second) + expected = Index([0, 1, 2, "a", "c"]) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "method,expected,sort", + [ + ( + "intersection", + np.array( + [(1, "A"), (2, "A"), (1, "B"), (2, "B")], + dtype=[("num", int), ("let", "S1")], + ), + False, + ), + ( + "intersection", + np.array( + [(1, "A"), (1, "B"), (2, "A"), (2, "B")], + dtype=[("num", int), ("let", "S1")], + ), + None, + ), + ( + "union", + np.array( + [(1, "A"), (1, "B"), (1, "C"), (2, "A"), (2, "B"), (2, "C")], + dtype=[("num", int), ("let", "S1")], + ), + None, + ), + ], + ) + def test_tuple_union_bug(self, method, expected, sort): + index1 = Index( + np.array( + [(1, "A"), (2, "A"), (1, "B"), (2, "B")], + dtype=[("num", int), ("let", "S1")], + ) + ) + index2 = Index( + np.array( + [(1, "A"), (2, "A"), (1, "B"), (2, "B"), (1, "C"), (2, "C")], + dtype=[("num", int), ("let", "S1")], + ) + ) + + result = getattr(index1, method)(index2, sort=sort) + assert result.ndim == 1 + + expected = Index(expected) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("first_list", [["b", "a"], []]) + @pytest.mark.parametrize("second_list", [["a", "b"], []]) + @pytest.mark.parametrize( + "first_name, second_name, expected_name", + [("A", "B", None), (None, "B", None), ("A", None, None)], + ) + def test_union_name_preservation( + self, first_list, second_list, first_name, second_name, expected_name, sort + ): + first = Index(first_list, name=first_name) + second = Index(second_list, name=second_name) + union = first.union(second, sort=sort) + + vals = set(first_list).union(second_list) + + if sort is None and len(first_list) > 0 and len(second_list) > 0: + expected = Index(sorted(vals), name=expected_name) + tm.assert_index_equal(union, expected) + else: + expected = Index(vals, name=expected_name) + tm.assert_index_equal(union.sort_values(), expected.sort_values()) + + @pytest.mark.parametrize( + "diff_type, expected", + [["difference", [1, "B"]], ["symmetric_difference", [1, 2, "B", "C"]]], + ) + def test_difference_object_type(self, diff_type, expected): + # GH 13432 + idx1 = Index([0, 1, "A", "B"]) + idx2 = Index([0, 2, "A", "C"]) + result = getattr(idx1, diff_type)(idx2) + expected = Index(expected) + tm.assert_index_equal(result, expected) diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_where.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_where.py new file mode 100644 index 0000000000000000000000000000000000000000..0c8969735e14e2741bc029b499024af3ec378a92 --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/base_class/test_where.py @@ -0,0 +1,13 @@ +import numpy as np + +from pandas import Index +import pandas._testing as tm + + +class TestWhere: + def test_where_intlike_str_doesnt_cast_ints(self): + idx = Index(range(3)) + mask = np.array([True, False, True]) + res = idx.where(mask, "2") + expected = Index([0, "2", 2]) + tm.assert_index_equal(res, expected) diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/__init__.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_drop_duplicates.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_drop_duplicates.py new file mode 100644 index 0000000000000000000000000000000000000000..61a79c4ceabf9d68aab73ffd69e0f15ad842ff74 --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_drop_duplicates.py @@ -0,0 +1,89 @@ +import numpy as np +import pytest + +from pandas import ( + PeriodIndex, + Series, + date_range, + period_range, + timedelta_range, +) +import pandas._testing as tm + + +class DropDuplicates: + def test_drop_duplicates_metadata(self, idx): + # GH#10115 + result = idx.drop_duplicates() + tm.assert_index_equal(idx, result) + assert idx.freq == result.freq + + idx_dup = idx.append(idx) + result = idx_dup.drop_duplicates() + + expected = idx + if not isinstance(idx, PeriodIndex): + # freq is reset except for PeriodIndex + assert idx_dup.freq is None + assert result.freq is None + expected = idx._with_freq(None) + else: + assert result.freq == expected.freq + + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "keep, expected, index", + [ + ( + "first", + np.concatenate(([False] * 10, [True] * 5)), + np.arange(0, 10, dtype=np.int64), + ), + ( + "last", + np.concatenate(([True] * 5, [False] * 10)), + np.arange(5, 15, dtype=np.int64), + ), + ( + False, + np.concatenate(([True] * 5, [False] * 5, [True] * 5)), + np.arange(5, 10, dtype=np.int64), + ), + ], + ) + def test_drop_duplicates(self, keep, expected, index, idx): + # to check Index/Series compat + idx = idx.append(idx[:5]) + + tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected) + expected = idx[~expected] + + result = idx.drop_duplicates(keep=keep) + tm.assert_index_equal(result, expected) + + result = Series(idx).drop_duplicates(keep=keep) + expected = Series(expected, index=index) + tm.assert_series_equal(result, expected) + + +class TestDropDuplicatesPeriodIndex(DropDuplicates): + @pytest.fixture(params=["D", "3D", "h", "2h", "min", "2min", "s", "3s"]) + def freq(self, request): + return request.param + + @pytest.fixture + def idx(self, freq): + return period_range("2011-01-01", periods=10, freq=freq, name="idx") + + +class TestDropDuplicatesDatetimeIndex(DropDuplicates): + @pytest.fixture + def idx(self, freq_sample): + return date_range("2011-01-01", freq=freq_sample, periods=10, name="idx") + + +class TestDropDuplicatesTimedeltaIndex(DropDuplicates): + @pytest.fixture + def idx(self, freq_sample): + return timedelta_range("1 day", periods=10, freq=freq_sample, name="idx") diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_equals.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_equals.py new file mode 100644 index 0000000000000000000000000000000000000000..fc9fbd33d0d285fe7635c23c598318208bb58561 --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_equals.py @@ -0,0 +1,181 @@ +""" +Tests shared for DatetimeIndex/TimedeltaIndex/PeriodIndex +""" +from datetime import ( + datetime, + timedelta, +) + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + CategoricalIndex, + DatetimeIndex, + Index, + PeriodIndex, + TimedeltaIndex, + date_range, + period_range, + timedelta_range, +) +import pandas._testing as tm + + +class EqualsTests: + def test_not_equals_numeric(self, index): + assert not index.equals(Index(index.asi8)) + assert not index.equals(Index(index.asi8.astype("u8"))) + assert not index.equals(Index(index.asi8).astype("f8")) + + def test_equals(self, index): + assert index.equals(index) + assert index.equals(index.astype(object)) + assert index.equals(CategoricalIndex(index)) + assert index.equals(CategoricalIndex(index.astype(object))) + + def test_not_equals_non_arraylike(self, index): + assert not index.equals(list(index)) + + def test_not_equals_strings(self, index): + other = Index([str(x) for x in index], dtype=object) + assert not index.equals(other) + assert not index.equals(CategoricalIndex(other)) + + def test_not_equals_misc_strs(self, index): + other = Index(list("abc")) + assert not index.equals(other) + + +class TestPeriodIndexEquals(EqualsTests): + @pytest.fixture + def index(self): + return period_range("2013-01-01", periods=5, freq="D") + + # TODO: de-duplicate with other test_equals2 methods + @pytest.mark.parametrize("freq", ["D", "M"]) + def test_equals2(self, freq): + # GH#13107 + idx = PeriodIndex(["2011-01-01", "2011-01-02", "NaT"], freq=freq) + assert idx.equals(idx) + assert idx.equals(idx.copy()) + assert idx.equals(idx.astype(object)) + assert idx.astype(object).equals(idx) + assert idx.astype(object).equals(idx.astype(object)) + assert not idx.equals(list(idx)) + assert not idx.equals(pd.Series(idx)) + + idx2 = PeriodIndex(["2011-01-01", "2011-01-02", "NaT"], freq="h") + assert not idx.equals(idx2) + assert not idx.equals(idx2.copy()) + assert not idx.equals(idx2.astype(object)) + assert not idx.astype(object).equals(idx2) + assert not idx.equals(list(idx2)) + assert not idx.equals(pd.Series(idx2)) + + # same internal, different tz + idx3 = PeriodIndex._simple_new( + idx._values._simple_new(idx._values.asi8, dtype=pd.PeriodDtype("h")) + ) + tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) + assert not idx.equals(idx3) + assert not idx.equals(idx3.copy()) + assert not idx.equals(idx3.astype(object)) + assert not idx.astype(object).equals(idx3) + assert not idx.equals(list(idx3)) + assert not idx.equals(pd.Series(idx3)) + + +class TestDatetimeIndexEquals(EqualsTests): + @pytest.fixture + def index(self): + return date_range("2013-01-01", periods=5) + + def test_equals2(self): + # GH#13107 + idx = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"]) + assert idx.equals(idx) + assert idx.equals(idx.copy()) + assert idx.equals(idx.astype(object)) + assert idx.astype(object).equals(idx) + assert idx.astype(object).equals(idx.astype(object)) + assert not idx.equals(list(idx)) + assert not idx.equals(pd.Series(idx)) + + idx2 = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"], tz="US/Pacific") + assert not idx.equals(idx2) + assert not idx.equals(idx2.copy()) + assert not idx.equals(idx2.astype(object)) + assert not idx.astype(object).equals(idx2) + assert not idx.equals(list(idx2)) + assert not idx.equals(pd.Series(idx2)) + + # same internal, different tz + idx3 = DatetimeIndex(idx.asi8, tz="US/Pacific") + tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) + assert not idx.equals(idx3) + assert not idx.equals(idx3.copy()) + assert not idx.equals(idx3.astype(object)) + assert not idx.astype(object).equals(idx3) + assert not idx.equals(list(idx3)) + assert not idx.equals(pd.Series(idx3)) + + # check that we do not raise when comparing with OutOfBounds objects + oob = Index([datetime(2500, 1, 1)] * 3, dtype=object) + assert not idx.equals(oob) + assert not idx2.equals(oob) + assert not idx3.equals(oob) + + # check that we do not raise when comparing with OutOfBounds dt64 + oob2 = oob.map(np.datetime64) + assert not idx.equals(oob2) + assert not idx2.equals(oob2) + assert not idx3.equals(oob2) + + @pytest.mark.parametrize("freq", ["B", "C"]) + def test_not_equals_bday(self, freq): + rng = date_range("2009-01-01", "2010-01-01", freq=freq) + assert not rng.equals(list(rng)) + + +class TestTimedeltaIndexEquals(EqualsTests): + @pytest.fixture + def index(self): + return timedelta_range("1 day", periods=10) + + def test_equals2(self): + # GH#13107 + idx = TimedeltaIndex(["1 days", "2 days", "NaT"]) + assert idx.equals(idx) + assert idx.equals(idx.copy()) + assert idx.equals(idx.astype(object)) + assert idx.astype(object).equals(idx) + assert idx.astype(object).equals(idx.astype(object)) + assert not idx.equals(list(idx)) + assert not idx.equals(pd.Series(idx)) + + idx2 = TimedeltaIndex(["2 days", "1 days", "NaT"]) + assert not idx.equals(idx2) + assert not idx.equals(idx2.copy()) + assert not idx.equals(idx2.astype(object)) + assert not idx.astype(object).equals(idx2) + assert not idx.astype(object).equals(idx2.astype(object)) + assert not idx.equals(list(idx2)) + assert not idx.equals(pd.Series(idx2)) + + # Check that we dont raise OverflowError on comparisons outside the + # implementation range GH#28532 + oob = Index([timedelta(days=10**6)] * 3, dtype=object) + assert not idx.equals(oob) + assert not idx2.equals(oob) + + oob2 = Index([np.timedelta64(x) for x in oob], dtype=object) + assert (oob == oob2).all() + assert not idx.equals(oob2) + assert not idx2.equals(oob2) + + oob3 = oob.map(np.timedelta64) + assert (oob3 == oob).all() + assert not idx.equals(oob3) + assert not idx2.equals(oob3) diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_indexing.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_indexing.py new file mode 100644 index 0000000000000000000000000000000000000000..7b2c81aaf17de3785e62a2d989394259b2496085 --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_indexing.py @@ -0,0 +1,45 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DatetimeIndex, + Index, +) +import pandas._testing as tm + +dtlike_dtypes = [ + np.dtype("timedelta64[ns]"), + np.dtype("datetime64[ns]"), + pd.DatetimeTZDtype("ns", "Asia/Tokyo"), + pd.PeriodDtype("ns"), +] + + +@pytest.mark.parametrize("ldtype", dtlike_dtypes) +@pytest.mark.parametrize("rdtype", dtlike_dtypes) +def test_get_indexer_non_unique_wrong_dtype(ldtype, rdtype): + vals = np.tile(3600 * 10**9 * np.arange(3, dtype=np.int64), 2) + + def construct(dtype): + if dtype is dtlike_dtypes[-1]: + # PeriodArray will try to cast ints to strings + return DatetimeIndex(vals).astype(dtype) + return Index(vals, dtype=dtype) + + left = construct(ldtype) + right = construct(rdtype) + + result = left.get_indexer_non_unique(right) + + if ldtype is rdtype: + ex1 = np.array([0, 3, 1, 4, 2, 5] * 2, dtype=np.intp) + ex2 = np.array([], dtype=np.intp) + tm.assert_numpy_array_equal(result[0], ex1) + tm.assert_numpy_array_equal(result[1], ex2) + + else: + no_matches = np.array([-1] * 6, dtype=np.intp) + missing = np.arange(6, dtype=np.intp) + tm.assert_numpy_array_equal(result[0], no_matches) + tm.assert_numpy_array_equal(result[1], missing) diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_is_monotonic.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_is_monotonic.py new file mode 100644 index 0000000000000000000000000000000000000000..b0e42e660b751cddaca74c4574e9588e8ac8c782 --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_is_monotonic.py @@ -0,0 +1,46 @@ +from pandas import ( + Index, + NaT, + date_range, +) + + +def test_is_monotonic_with_nat(): + # GH#31437 + # PeriodIndex.is_monotonic_increasing should behave analogously to DatetimeIndex, + # in particular never be monotonic when we have NaT + dti = date_range("2016-01-01", periods=3) + pi = dti.to_period("D") + tdi = Index(dti.view("timedelta64[ns]")) + + for obj in [pi, pi._engine, dti, dti._engine, tdi, tdi._engine]: + if isinstance(obj, Index): + # i.e. not Engines + assert obj.is_monotonic_increasing + assert obj.is_monotonic_increasing + assert not obj.is_monotonic_decreasing + assert obj.is_unique + + dti1 = dti.insert(0, NaT) + pi1 = dti1.to_period("D") + tdi1 = Index(dti1.view("timedelta64[ns]")) + + for obj in [pi1, pi1._engine, dti1, dti1._engine, tdi1, tdi1._engine]: + if isinstance(obj, Index): + # i.e. not Engines + assert not obj.is_monotonic_increasing + assert not obj.is_monotonic_increasing + assert not obj.is_monotonic_decreasing + assert obj.is_unique + + dti2 = dti.insert(3, NaT) + pi2 = dti2.to_period("h") + tdi2 = Index(dti2.view("timedelta64[ns]")) + + for obj in [pi2, pi2._engine, dti2, dti2._engine, tdi2, tdi2._engine]: + if isinstance(obj, Index): + # i.e. not Engines + assert not obj.is_monotonic_increasing + assert not obj.is_monotonic_increasing + assert not obj.is_monotonic_decreasing + assert obj.is_unique diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_nat.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_nat.py new file mode 100644 index 0000000000000000000000000000000000000000..50cf29d0163555876eb7b1914bbd4ee45bc2285e --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_nat.py @@ -0,0 +1,53 @@ +import numpy as np +import pytest + +from pandas import ( + DatetimeIndex, + NaT, + PeriodIndex, + TimedeltaIndex, +) +import pandas._testing as tm + + +class NATests: + def test_nat(self, index_without_na): + empty_index = index_without_na[:0] + + index_with_na = index_without_na.copy(deep=True) + index_with_na._data[1] = NaT + + assert empty_index._na_value is NaT + assert index_with_na._na_value is NaT + assert index_without_na._na_value is NaT + + idx = index_without_na + assert idx._can_hold_na + + tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) + assert idx.hasnans is False + + idx = index_with_na + assert idx._can_hold_na + + tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) + assert idx.hasnans is True + + +class TestDatetimeIndexNA(NATests): + @pytest.fixture + def index_without_na(self, tz_naive_fixture): + tz = tz_naive_fixture + return DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz) + + +class TestTimedeltaIndexNA(NATests): + @pytest.fixture + def index_without_na(self): + return TimedeltaIndex(["1 days", "2 days"]) + + +class TestPeriodIndexNA(NATests): + @pytest.fixture + def index_without_na(self): + return PeriodIndex(["2011-01-01", "2011-01-02"], freq="D") diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_sort_values.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_sort_values.py new file mode 100644 index 0000000000000000000000000000000000000000..a2c349c8b0ef679b9d32411efd8a0d393b9d5e9d --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_sort_values.py @@ -0,0 +1,315 @@ +import numpy as np +import pytest + +from pandas import ( + DatetimeIndex, + Index, + NaT, + PeriodIndex, + TimedeltaIndex, + timedelta_range, +) +import pandas._testing as tm + + +def check_freq_ascending(ordered, orig, ascending): + """ + Check the expected freq on a PeriodIndex/DatetimeIndex/TimedeltaIndex + when the original index is generated (or generate-able) with + period_range/date_range/timedelta_range. + """ + if isinstance(ordered, PeriodIndex): + assert ordered.freq == orig.freq + elif isinstance(ordered, (DatetimeIndex, TimedeltaIndex)): + if ascending: + assert ordered.freq.n == orig.freq.n + else: + assert ordered.freq.n == -1 * orig.freq.n + + +def check_freq_nonmonotonic(ordered, orig): + """ + Check the expected freq on a PeriodIndex/DatetimeIndex/TimedeltaIndex + when the original index is _not_ generated (or generate-able) with + period_range/date_range//timedelta_range. + """ + if isinstance(ordered, PeriodIndex): + assert ordered.freq == orig.freq + elif isinstance(ordered, (DatetimeIndex, TimedeltaIndex)): + assert ordered.freq is None + + +class TestSortValues: + @pytest.fixture(params=[DatetimeIndex, TimedeltaIndex, PeriodIndex]) + def non_monotonic_idx(self, request): + if request.param is DatetimeIndex: + return DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"]) + elif request.param is PeriodIndex: + dti = DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"]) + return dti.to_period("D") + else: + return TimedeltaIndex( + ["1 day 00:00:05", "1 day 00:00:01", "1 day 00:00:02"] + ) + + def test_argmin_argmax(self, non_monotonic_idx): + assert non_monotonic_idx.argmin() == 1 + assert non_monotonic_idx.argmax() == 0 + + def test_sort_values(self, non_monotonic_idx): + idx = non_monotonic_idx + ordered = idx.sort_values() + assert ordered.is_monotonic_increasing + ordered = idx.sort_values(ascending=False) + assert ordered[::-1].is_monotonic_increasing + + ordered, dexer = idx.sort_values(return_indexer=True) + assert ordered.is_monotonic_increasing + tm.assert_numpy_array_equal(dexer, np.array([1, 2, 0], dtype=np.intp)) + + ordered, dexer = idx.sort_values(return_indexer=True, ascending=False) + assert ordered[::-1].is_monotonic_increasing + tm.assert_numpy_array_equal(dexer, np.array([0, 2, 1], dtype=np.intp)) + + def check_sort_values_with_freq(self, idx): + ordered = idx.sort_values() + tm.assert_index_equal(ordered, idx) + check_freq_ascending(ordered, idx, True) + + ordered = idx.sort_values(ascending=False) + expected = idx[::-1] + tm.assert_index_equal(ordered, expected) + check_freq_ascending(ordered, idx, False) + + ordered, indexer = idx.sort_values(return_indexer=True) + tm.assert_index_equal(ordered, idx) + tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2], dtype=np.intp)) + check_freq_ascending(ordered, idx, True) + + ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) + expected = idx[::-1] + tm.assert_index_equal(ordered, expected) + tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0], dtype=np.intp)) + check_freq_ascending(ordered, idx, False) + + @pytest.mark.parametrize("freq", ["D", "h"]) + def test_sort_values_with_freq_timedeltaindex(self, freq): + # GH#10295 + idx = timedelta_range(start=f"1{freq}", periods=3, freq=freq).rename("idx") + + self.check_sort_values_with_freq(idx) + + @pytest.mark.parametrize( + "idx", + [ + DatetimeIndex( + ["2011-01-01", "2011-01-02", "2011-01-03"], freq="D", name="idx" + ), + DatetimeIndex( + ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], + freq="h", + name="tzidx", + tz="Asia/Tokyo", + ), + ], + ) + def test_sort_values_with_freq_datetimeindex(self, idx): + self.check_sort_values_with_freq(idx) + + @pytest.mark.parametrize("freq", ["D", "2D", "4D"]) + def test_sort_values_with_freq_periodindex(self, freq): + # here with_freq refers to being period_range-like + idx = PeriodIndex( + ["2011-01-01", "2011-01-02", "2011-01-03"], freq=freq, name="idx" + ) + self.check_sort_values_with_freq(idx) + + @pytest.mark.parametrize( + "idx", + [ + PeriodIndex(["2011", "2012", "2013"], name="pidx", freq="Y"), + Index([2011, 2012, 2013], name="idx"), # for compatibility check + ], + ) + def test_sort_values_with_freq_periodindex2(self, idx): + # here with_freq indicates this is period_range-like + self.check_sort_values_with_freq(idx) + + def check_sort_values_without_freq(self, idx, expected): + ordered = idx.sort_values(na_position="first") + tm.assert_index_equal(ordered, expected) + check_freq_nonmonotonic(ordered, idx) + + if not idx.isna().any(): + ordered = idx.sort_values() + tm.assert_index_equal(ordered, expected) + check_freq_nonmonotonic(ordered, idx) + + ordered = idx.sort_values(ascending=False) + tm.assert_index_equal(ordered, expected[::-1]) + check_freq_nonmonotonic(ordered, idx) + + ordered, indexer = idx.sort_values(return_indexer=True, na_position="first") + tm.assert_index_equal(ordered, expected) + + exp = np.array([0, 4, 3, 1, 2], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, exp) + check_freq_nonmonotonic(ordered, idx) + + if not idx.isna().any(): + ordered, indexer = idx.sort_values(return_indexer=True) + tm.assert_index_equal(ordered, expected) + + exp = np.array([0, 4, 3, 1, 2], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, exp) + check_freq_nonmonotonic(ordered, idx) + + ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) + tm.assert_index_equal(ordered, expected[::-1]) + + exp = np.array([2, 1, 3, 0, 4], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, exp) + check_freq_nonmonotonic(ordered, idx) + + def test_sort_values_without_freq_timedeltaindex(self): + # GH#10295 + + idx = TimedeltaIndex( + ["1 hour", "3 hour", "5 hour", "2 hour ", "1 hour"], name="idx1" + ) + expected = TimedeltaIndex( + ["1 hour", "1 hour", "2 hour", "3 hour", "5 hour"], name="idx1" + ) + self.check_sort_values_without_freq(idx, expected) + + @pytest.mark.parametrize( + "index_dates,expected_dates", + [ + ( + ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"], + ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"], + ), + ( + ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"], + ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"], + ), + ( + [NaT, "2011-01-03", "2011-01-05", "2011-01-02", NaT], + [NaT, NaT, "2011-01-02", "2011-01-03", "2011-01-05"], + ), + ], + ) + def test_sort_values_without_freq_datetimeindex( + self, index_dates, expected_dates, tz_naive_fixture + ): + tz = tz_naive_fixture + + # without freq + idx = DatetimeIndex(index_dates, tz=tz, name="idx") + expected = DatetimeIndex(expected_dates, tz=tz, name="idx") + + self.check_sort_values_without_freq(idx, expected) + + @pytest.mark.parametrize( + "idx,expected", + [ + ( + PeriodIndex( + [ + "2011-01-01", + "2011-01-03", + "2011-01-05", + "2011-01-02", + "2011-01-01", + ], + freq="D", + name="idx1", + ), + PeriodIndex( + [ + "2011-01-01", + "2011-01-01", + "2011-01-02", + "2011-01-03", + "2011-01-05", + ], + freq="D", + name="idx1", + ), + ), + ( + PeriodIndex( + [ + "2011-01-01", + "2011-01-03", + "2011-01-05", + "2011-01-02", + "2011-01-01", + ], + freq="D", + name="idx2", + ), + PeriodIndex( + [ + "2011-01-01", + "2011-01-01", + "2011-01-02", + "2011-01-03", + "2011-01-05", + ], + freq="D", + name="idx2", + ), + ), + ( + PeriodIndex( + [NaT, "2011-01-03", "2011-01-05", "2011-01-02", NaT], + freq="D", + name="idx3", + ), + PeriodIndex( + [NaT, NaT, "2011-01-02", "2011-01-03", "2011-01-05"], + freq="D", + name="idx3", + ), + ), + ( + PeriodIndex( + ["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="Y" + ), + PeriodIndex( + ["2011", "2011", "2012", "2013", "2015"], name="pidx", freq="Y" + ), + ), + ( + # For compatibility check + Index([2011, 2013, 2015, 2012, 2011], name="idx"), + Index([2011, 2011, 2012, 2013, 2015], name="idx"), + ), + ], + ) + def test_sort_values_without_freq_periodindex(self, idx, expected): + # here without_freq means not generateable by period_range + self.check_sort_values_without_freq(idx, expected) + + def test_sort_values_without_freq_periodindex_nat(self): + # doesn't quite fit into check_sort_values_without_freq + idx = PeriodIndex(["2011", "2013", "NaT", "2011"], name="pidx", freq="D") + expected = PeriodIndex(["NaT", "2011", "2011", "2013"], name="pidx", freq="D") + + ordered = idx.sort_values(na_position="first") + tm.assert_index_equal(ordered, expected) + check_freq_nonmonotonic(ordered, idx) + + ordered = idx.sort_values(ascending=False) + tm.assert_index_equal(ordered, expected[::-1]) + check_freq_nonmonotonic(ordered, idx) + + +def test_order_stability_compat(): + # GH#35922. sort_values is stable both for normal and datetime-like Index + pidx = PeriodIndex(["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="Y") + iidx = Index([2011, 2013, 2015, 2012, 2011], name="idx") + ordered1, indexer1 = pidx.sort_values(return_indexer=True, ascending=False) + ordered2, indexer2 = iidx.sort_values(return_indexer=True, ascending=False) + tm.assert_numpy_array_equal(indexer1, indexer2) diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_value_counts.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_value_counts.py new file mode 100644 index 0000000000000000000000000000000000000000..069e354a364c9343c595da9d18ee7c04eec04f43 --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/datetimelike_/test_value_counts.py @@ -0,0 +1,103 @@ +import numpy as np + +from pandas import ( + DatetimeIndex, + NaT, + PeriodIndex, + Series, + TimedeltaIndex, + date_range, + period_range, + timedelta_range, +) +import pandas._testing as tm + + +class TestValueCounts: + # GH#7735 + + def test_value_counts_unique_datetimeindex(self, tz_naive_fixture): + tz = tz_naive_fixture + orig = date_range("2011-01-01 09:00", freq="h", periods=10, tz=tz) + self._check_value_counts_with_repeats(orig) + + def test_value_counts_unique_timedeltaindex(self): + orig = timedelta_range("1 days 09:00:00", freq="h", periods=10) + self._check_value_counts_with_repeats(orig) + + def test_value_counts_unique_periodindex(self): + orig = period_range("2011-01-01 09:00", freq="h", periods=10) + self._check_value_counts_with_repeats(orig) + + def _check_value_counts_with_repeats(self, orig): + # create repeated values, 'n'th element is repeated by n+1 times + idx = type(orig)( + np.repeat(orig._values, range(1, len(orig) + 1)), dtype=orig.dtype + ) + + exp_idx = orig[::-1] + if not isinstance(exp_idx, PeriodIndex): + exp_idx = exp_idx._with_freq(None) + expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64", name="count") + + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(), expected) + + tm.assert_index_equal(idx.unique(), orig) + + def test_value_counts_unique_datetimeindex2(self, tz_naive_fixture): + tz = tz_naive_fixture + idx = DatetimeIndex( + [ + "2013-01-01 09:00", + "2013-01-01 09:00", + "2013-01-01 09:00", + "2013-01-01 08:00", + "2013-01-01 08:00", + NaT, + ], + tz=tz, + ) + self._check_value_counts_dropna(idx) + + def test_value_counts_unique_timedeltaindex2(self): + idx = TimedeltaIndex( + [ + "1 days 09:00:00", + "1 days 09:00:00", + "1 days 09:00:00", + "1 days 08:00:00", + "1 days 08:00:00", + NaT, + ] + ) + self._check_value_counts_dropna(idx) + + def test_value_counts_unique_periodindex2(self): + idx = PeriodIndex( + [ + "2013-01-01 09:00", + "2013-01-01 09:00", + "2013-01-01 09:00", + "2013-01-01 08:00", + "2013-01-01 08:00", + NaT, + ], + freq="h", + ) + self._check_value_counts_dropna(idx) + + def _check_value_counts_dropna(self, idx): + exp_idx = idx[[2, 3]] + expected = Series([3, 2], index=exp_idx, name="count") + + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(), expected) + + exp_idx = idx[[2, 3, -1]] + expected = Series([3, 2, 1], index=exp_idx, name="count") + + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(dropna=False), expected) + + tm.assert_index_equal(idx.unique(), exp_idx) diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/__init__.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_astype.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_astype.py new file mode 100644 index 0000000000000000000000000000000000000000..dde5f38074efb0dda0942e17022d9a22e3d44afa --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_astype.py @@ -0,0 +1,254 @@ +import re + +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + IntervalDtype, +) + +from pandas import ( + CategoricalIndex, + Index, + IntervalIndex, + NaT, + Timedelta, + Timestamp, + interval_range, +) +import pandas._testing as tm + + +class AstypeTests: + """Tests common to IntervalIndex with any subtype""" + + def test_astype_idempotent(self, index): + result = index.astype("interval") + tm.assert_index_equal(result, index) + + result = index.astype(index.dtype) + tm.assert_index_equal(result, index) + + def test_astype_object(self, index): + result = index.astype(object) + expected = Index(index.values, dtype="object") + tm.assert_index_equal(result, expected) + assert not result.equals(index) + + def test_astype_category(self, index): + result = index.astype("category") + expected = CategoricalIndex(index.values) + tm.assert_index_equal(result, expected) + + result = index.astype(CategoricalDtype()) + tm.assert_index_equal(result, expected) + + # non-default params + categories = index.dropna().unique().values[:-1] + dtype = CategoricalDtype(categories=categories, ordered=True) + result = index.astype(dtype) + expected = CategoricalIndex(index.values, categories=categories, ordered=True) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "dtype", + [ + "int64", + "uint64", + "float64", + "complex128", + "period[M]", + "timedelta64", + "timedelta64[ns]", + "datetime64", + "datetime64[ns]", + "datetime64[ns, US/Eastern]", + ], + ) + def test_astype_cannot_cast(self, index, dtype): + msg = "Cannot cast IntervalIndex to dtype" + with pytest.raises(TypeError, match=msg): + index.astype(dtype) + + def test_astype_invalid_dtype(self, index): + msg = "data type [\"']fake_dtype[\"'] not understood" + with pytest.raises(TypeError, match=msg): + index.astype("fake_dtype") + + +class TestIntSubtype(AstypeTests): + """Tests specific to IntervalIndex with integer-like subtype""" + + indexes = [ + IntervalIndex.from_breaks(np.arange(-10, 11, dtype="int64")), + IntervalIndex.from_breaks(np.arange(100, dtype="uint64"), closed="left"), + ] + + @pytest.fixture(params=indexes) + def index(self, request): + return request.param + + @pytest.mark.parametrize( + "subtype", ["float64", "datetime64[ns]", "timedelta64[ns]"] + ) + def test_subtype_conversion(self, index, subtype): + dtype = IntervalDtype(subtype, index.closed) + result = index.astype(dtype) + expected = IntervalIndex.from_arrays( + index.left.astype(subtype), index.right.astype(subtype), closed=index.closed + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "subtype_start, subtype_end", [("int64", "uint64"), ("uint64", "int64")] + ) + def test_subtype_integer(self, subtype_start, subtype_end): + index = IntervalIndex.from_breaks(np.arange(100, dtype=subtype_start)) + dtype = IntervalDtype(subtype_end, index.closed) + result = index.astype(dtype) + expected = IntervalIndex.from_arrays( + index.left.astype(subtype_end), + index.right.astype(subtype_end), + closed=index.closed, + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.xfail(reason="GH#15832") + def test_subtype_integer_errors(self): + # int64 -> uint64 fails with negative values + index = interval_range(-10, 10) + dtype = IntervalDtype("uint64", "right") + + # Until we decide what the exception message _should_ be, we + # assert something that it should _not_ be. + # We should _not_ be getting a message suggesting that the -10 + # has been wrapped around to a large-positive integer + msg = "^(?!(left side of interval must be <= right side))" + with pytest.raises(ValueError, match=msg): + index.astype(dtype) + + +class TestFloatSubtype(AstypeTests): + """Tests specific to IntervalIndex with float subtype""" + + indexes = [ + interval_range(-10.0, 10.0, closed="neither"), + IntervalIndex.from_arrays( + [-1.5, np.nan, 0.0, 0.0, 1.5], [-0.5, np.nan, 1.0, 1.0, 3.0], closed="both" + ), + ] + + @pytest.fixture(params=indexes) + def index(self, request): + return request.param + + @pytest.mark.parametrize("subtype", ["int64", "uint64"]) + def test_subtype_integer(self, subtype): + index = interval_range(0.0, 10.0) + dtype = IntervalDtype(subtype, "right") + result = index.astype(dtype) + expected = IntervalIndex.from_arrays( + index.left.astype(subtype), index.right.astype(subtype), closed=index.closed + ) + tm.assert_index_equal(result, expected) + + # raises with NA + msg = r"Cannot convert non-finite values \(NA or inf\) to integer" + with pytest.raises(ValueError, match=msg): + index.insert(0, np.nan).astype(dtype) + + @pytest.mark.parametrize("subtype", ["int64", "uint64"]) + def test_subtype_integer_with_non_integer_borders(self, subtype): + index = interval_range(0.0, 3.0, freq=0.25) + dtype = IntervalDtype(subtype, "right") + result = index.astype(dtype) + expected = IntervalIndex.from_arrays( + index.left.astype(subtype), index.right.astype(subtype), closed=index.closed + ) + tm.assert_index_equal(result, expected) + + def test_subtype_integer_errors(self): + # float64 -> uint64 fails with negative values + index = interval_range(-10.0, 10.0) + dtype = IntervalDtype("uint64", "right") + msg = re.escape( + "Cannot convert interval[float64, right] to interval[uint64, right]; " + "subtypes are incompatible" + ) + with pytest.raises(TypeError, match=msg): + index.astype(dtype) + + @pytest.mark.parametrize("subtype", ["datetime64[ns]", "timedelta64[ns]"]) + def test_subtype_datetimelike(self, index, subtype): + dtype = IntervalDtype(subtype, "right") + msg = "Cannot convert .* to .*; subtypes are incompatible" + with pytest.raises(TypeError, match=msg): + index.astype(dtype) + + @pytest.mark.filterwarnings( + "ignore:invalid value encountered in cast:RuntimeWarning" + ) + def test_astype_category(self, index): + super().test_astype_category(index) + + +class TestDatetimelikeSubtype(AstypeTests): + """Tests specific to IntervalIndex with datetime-like subtype""" + + indexes = [ + interval_range(Timestamp("2018-01-01"), periods=10, closed="neither"), + interval_range(Timestamp("2018-01-01"), periods=10).insert(2, NaT), + interval_range(Timestamp("2018-01-01", tz="US/Eastern"), periods=10), + interval_range(Timedelta("0 days"), periods=10, closed="both"), + interval_range(Timedelta("0 days"), periods=10).insert(2, NaT), + ] + + @pytest.fixture(params=indexes) + def index(self, request): + return request.param + + @pytest.mark.parametrize("subtype", ["int64", "uint64"]) + def test_subtype_integer(self, index, subtype): + dtype = IntervalDtype(subtype, "right") + + if subtype != "int64": + msg = ( + r"Cannot convert interval\[(timedelta64|datetime64)\[ns.*\], .*\] " + r"to interval\[uint64, .*\]" + ) + with pytest.raises(TypeError, match=msg): + index.astype(dtype) + return + + result = index.astype(dtype) + new_left = index.left.astype(subtype) + new_right = index.right.astype(subtype) + + expected = IntervalIndex.from_arrays(new_left, new_right, closed=index.closed) + tm.assert_index_equal(result, expected) + + def test_subtype_float(self, index): + dtype = IntervalDtype("float64", "right") + msg = "Cannot convert .* to .*; subtypes are incompatible" + with pytest.raises(TypeError, match=msg): + index.astype(dtype) + + def test_subtype_datetimelike(self): + # datetime -> timedelta raises + dtype = IntervalDtype("timedelta64[ns]", "right") + msg = "Cannot convert .* to .*; subtypes are incompatible" + + index = interval_range(Timestamp("2018-01-01"), periods=10) + with pytest.raises(TypeError, match=msg): + index.astype(dtype) + + index = interval_range(Timestamp("2018-01-01", tz="CET"), periods=10) + with pytest.raises(TypeError, match=msg): + index.astype(dtype) + + # timedelta -> datetime raises + dtype = IntervalDtype("datetime64[ns]", "right") + index = interval_range(Timedelta("0 days"), periods=10) + with pytest.raises(TypeError, match=msg): + index.astype(dtype) diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_constructors.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_constructors.py new file mode 100644 index 0000000000000000000000000000000000000000..e47a014f18045ae20fe27805a31b819b4ad229b9 --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_constructors.py @@ -0,0 +1,535 @@ +from functools import partial + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas.core.dtypes.common import is_unsigned_integer_dtype +from pandas.core.dtypes.dtypes import IntervalDtype + +from pandas import ( + Categorical, + CategoricalDtype, + CategoricalIndex, + Index, + Interval, + IntervalIndex, + date_range, + notna, + period_range, + timedelta_range, +) +import pandas._testing as tm +from pandas.core.arrays import IntervalArray +import pandas.core.common as com + + +@pytest.fixture(params=[None, "foo"]) +def name(request): + return request.param + + +class ConstructorTests: + """ + Common tests for all variations of IntervalIndex construction. Input data + to be supplied in breaks format, then converted by the subclass method + get_kwargs_from_breaks to the expected format. + """ + + @pytest.fixture( + params=[ + ([3, 14, 15, 92, 653], np.int64), + (np.arange(10, dtype="int64"), np.int64), + (Index(np.arange(-10, 11, dtype=np.int64)), np.int64), + (Index(np.arange(10, 31, dtype=np.uint64)), np.uint64), + (Index(np.arange(20, 30, 0.5), dtype=np.float64), np.float64), + (date_range("20180101", periods=10), " Interval(0.5, 1.5) + tm.assert_numpy_array_equal(actual, expected) + + actual = self.index == self.index + expected = np.array([True, True]) + tm.assert_numpy_array_equal(actual, expected) + actual = self.index <= self.index + tm.assert_numpy_array_equal(actual, expected) + actual = self.index >= self.index + tm.assert_numpy_array_equal(actual, expected) + + actual = self.index < self.index + expected = np.array([False, False]) + tm.assert_numpy_array_equal(actual, expected) + actual = self.index > self.index + tm.assert_numpy_array_equal(actual, expected) + + actual = self.index == IntervalIndex.from_breaks([0, 1, 2], "left") + tm.assert_numpy_array_equal(actual, expected) + + actual = self.index == self.index.values + tm.assert_numpy_array_equal(actual, np.array([True, True])) + actual = self.index.values == self.index + tm.assert_numpy_array_equal(actual, np.array([True, True])) + actual = self.index <= self.index.values + tm.assert_numpy_array_equal(actual, np.array([True, True])) + actual = self.index != self.index.values + tm.assert_numpy_array_equal(actual, np.array([False, False])) + actual = self.index > self.index.values + tm.assert_numpy_array_equal(actual, np.array([False, False])) + actual = self.index.values > self.index + tm.assert_numpy_array_equal(actual, np.array([False, False])) + + # invalid comparisons + actual = self.index == 0 + tm.assert_numpy_array_equal(actual, np.array([False, False])) + actual = self.index == self.index.left + tm.assert_numpy_array_equal(actual, np.array([False, False])) + + msg = "|".join( + [ + "not supported between instances of 'int' and '.*.Interval'", + r"Invalid comparison between dtype=interval\[int64, right\] and ", + ] + ) + with pytest.raises(TypeError, match=msg): + self.index > 0 + with pytest.raises(TypeError, match=msg): + self.index <= 0 + with pytest.raises(TypeError, match=msg): + self.index > np.arange(2) + + msg = "Lengths must match to compare" + with pytest.raises(ValueError, match=msg): + self.index > np.arange(3) + + def test_missing_values(self, closed): + idx = Index( + [np.nan, Interval(0, 1, closed=closed), Interval(1, 2, closed=closed)] + ) + idx2 = IntervalIndex.from_arrays([np.nan, 0, 1], [np.nan, 1, 2], closed=closed) + assert idx.equals(idx2) + + msg = ( + "missing values must be missing in the same location both left " + "and right sides" + ) + with pytest.raises(ValueError, match=msg): + IntervalIndex.from_arrays( + [np.nan, 0, 1], np.array([0, 1, 2]), closed=closed + ) + + tm.assert_numpy_array_equal(isna(idx), np.array([True, False, False])) + + def test_sort_values(self, closed): + index = self.create_index(closed=closed) + + result = index.sort_values() + tm.assert_index_equal(result, index) + + result = index.sort_values(ascending=False) + tm.assert_index_equal(result, index[::-1]) + + # with nan + index = IntervalIndex([Interval(1, 2), np.nan, Interval(0, 1)]) + + result = index.sort_values() + expected = IntervalIndex([Interval(0, 1), Interval(1, 2), np.nan]) + tm.assert_index_equal(result, expected) + + result = index.sort_values(ascending=False, na_position="first") + expected = IntervalIndex([np.nan, Interval(1, 2), Interval(0, 1)]) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("tz", [None, "US/Eastern"]) + def test_datetime(self, tz): + start = Timestamp("2000-01-01", tz=tz) + dates = date_range(start=start, periods=10) + index = IntervalIndex.from_breaks(dates) + + # test mid + start = Timestamp("2000-01-01T12:00", tz=tz) + expected = date_range(start=start, periods=9) + tm.assert_index_equal(index.mid, expected) + + # __contains__ doesn't check individual points + assert Timestamp("2000-01-01", tz=tz) not in index + assert Timestamp("2000-01-01T12", tz=tz) not in index + assert Timestamp("2000-01-02", tz=tz) not in index + iv_true = Interval( + Timestamp("2000-01-02", tz=tz), Timestamp("2000-01-03", tz=tz) + ) + iv_false = Interval( + Timestamp("1999-12-31", tz=tz), Timestamp("2000-01-01", tz=tz) + ) + assert iv_true in index + assert iv_false not in index + + # .contains does check individual points + assert not index.contains(Timestamp("2000-01-01", tz=tz)).any() + assert index.contains(Timestamp("2000-01-01T12", tz=tz)).any() + assert index.contains(Timestamp("2000-01-02", tz=tz)).any() + + # test get_indexer + start = Timestamp("1999-12-31T12:00", tz=tz) + target = date_range(start=start, periods=7, freq="12h") + actual = index.get_indexer(target) + expected = np.array([-1, -1, 0, 0, 1, 1, 2], dtype="intp") + tm.assert_numpy_array_equal(actual, expected) + + start = Timestamp("2000-01-08T18:00", tz=tz) + target = date_range(start=start, periods=7, freq="6h") + actual = index.get_indexer(target) + expected = np.array([7, 7, 8, 8, 8, 8, -1], dtype="intp") + tm.assert_numpy_array_equal(actual, expected) + + def test_append(self, closed): + index1 = IntervalIndex.from_arrays([0, 1], [1, 2], closed=closed) + index2 = IntervalIndex.from_arrays([1, 2], [2, 3], closed=closed) + + result = index1.append(index2) + expected = IntervalIndex.from_arrays([0, 1, 1, 2], [1, 2, 2, 3], closed=closed) + tm.assert_index_equal(result, expected) + + result = index1.append([index1, index2]) + expected = IntervalIndex.from_arrays( + [0, 1, 0, 1, 1, 2], [1, 2, 1, 2, 2, 3], closed=closed + ) + tm.assert_index_equal(result, expected) + + for other_closed in {"left", "right", "both", "neither"} - {closed}: + index_other_closed = IntervalIndex.from_arrays( + [0, 1], [1, 2], closed=other_closed + ) + result = index1.append(index_other_closed) + expected = index1.astype(object).append(index_other_closed.astype(object)) + tm.assert_index_equal(result, expected) + + def test_is_non_overlapping_monotonic(self, closed): + # Should be True in all cases + tpls = [(0, 1), (2, 3), (4, 5), (6, 7)] + idx = IntervalIndex.from_tuples(tpls, closed=closed) + assert idx.is_non_overlapping_monotonic is True + + idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) + assert idx.is_non_overlapping_monotonic is True + + # Should be False in all cases (overlapping) + tpls = [(0, 2), (1, 3), (4, 5), (6, 7)] + idx = IntervalIndex.from_tuples(tpls, closed=closed) + assert idx.is_non_overlapping_monotonic is False + + idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) + assert idx.is_non_overlapping_monotonic is False + + # Should be False in all cases (non-monotonic) + tpls = [(0, 1), (2, 3), (6, 7), (4, 5)] + idx = IntervalIndex.from_tuples(tpls, closed=closed) + assert idx.is_non_overlapping_monotonic is False + + idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) + assert idx.is_non_overlapping_monotonic is False + + # Should be False for closed='both', otherwise True (GH16560) + if closed == "both": + idx = IntervalIndex.from_breaks(range(4), closed=closed) + assert idx.is_non_overlapping_monotonic is False + else: + idx = IntervalIndex.from_breaks(range(4), closed=closed) + assert idx.is_non_overlapping_monotonic is True + + @pytest.mark.parametrize( + "start, shift, na_value", + [ + (0, 1, np.nan), + (Timestamp("2018-01-01"), Timedelta("1 day"), pd.NaT), + (Timedelta("0 days"), Timedelta("1 day"), pd.NaT), + ], + ) + def test_is_overlapping(self, start, shift, na_value, closed): + # GH 23309 + # see test_interval_tree.py for extensive tests; interface tests here + + # non-overlapping + tuples = [(start + n * shift, start + (n + 1) * shift) for n in (0, 2, 4)] + index = IntervalIndex.from_tuples(tuples, closed=closed) + assert index.is_overlapping is False + + # non-overlapping with NA + tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)] + index = IntervalIndex.from_tuples(tuples, closed=closed) + assert index.is_overlapping is False + + # overlapping + tuples = [(start + n * shift, start + (n + 2) * shift) for n in range(3)] + index = IntervalIndex.from_tuples(tuples, closed=closed) + assert index.is_overlapping is True + + # overlapping with NA + tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)] + index = IntervalIndex.from_tuples(tuples, closed=closed) + assert index.is_overlapping is True + + # common endpoints + tuples = [(start + n * shift, start + (n + 1) * shift) for n in range(3)] + index = IntervalIndex.from_tuples(tuples, closed=closed) + result = index.is_overlapping + expected = closed == "both" + assert result is expected + + # common endpoints with NA + tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)] + index = IntervalIndex.from_tuples(tuples, closed=closed) + result = index.is_overlapping + assert result is expected + + # intervals with duplicate left values + a = [10, 15, 20, 25, 30, 35, 40, 45, 45, 50, 55, 60, 65, 70, 75, 80, 85] + b = [15, 20, 25, 30, 35, 40, 45, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90] + index = IntervalIndex.from_arrays(a, b, closed="right") + result = index.is_overlapping + assert result is False + + @pytest.mark.parametrize( + "tuples", + [ + list(zip(range(10), range(1, 11))), + list( + zip( + date_range("20170101", periods=10), + date_range("20170101", periods=10), + ) + ), + list( + zip( + timedelta_range("0 days", periods=10), + timedelta_range("1 day", periods=10), + ) + ), + ], + ) + def test_to_tuples(self, tuples): + # GH 18756 + idx = IntervalIndex.from_tuples(tuples) + result = idx.to_tuples() + expected = Index(com.asarray_tuplesafe(tuples)) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "tuples", + [ + list(zip(range(10), range(1, 11))) + [np.nan], + list( + zip( + date_range("20170101", periods=10), + date_range("20170101", periods=10), + ) + ) + + [np.nan], + list( + zip( + timedelta_range("0 days", periods=10), + timedelta_range("1 day", periods=10), + ) + ) + + [np.nan], + ], + ) + @pytest.mark.parametrize("na_tuple", [True, False]) + def test_to_tuples_na(self, tuples, na_tuple): + # GH 18756 + idx = IntervalIndex.from_tuples(tuples) + result = idx.to_tuples(na_tuple=na_tuple) + + # check the non-NA portion + expected_notna = Index(com.asarray_tuplesafe(tuples[:-1])) + result_notna = result[:-1] + tm.assert_index_equal(result_notna, expected_notna) + + # check the NA portion + result_na = result[-1] + if na_tuple: + assert isinstance(result_na, tuple) + assert len(result_na) == 2 + assert all(isna(x) for x in result_na) + else: + assert isna(result_na) + + def test_nbytes(self): + # GH 19209 + left = np.arange(0, 4, dtype="i8") + right = np.arange(1, 5, dtype="i8") + + result = IntervalIndex.from_arrays(left, right).nbytes + expected = 64 # 4 * 8 * 2 + assert result == expected + + @pytest.mark.parametrize("new_closed", ["left", "right", "both", "neither"]) + def test_set_closed(self, name, closed, new_closed): + # GH 21670 + index = interval_range(0, 5, closed=closed, name=name) + result = index.set_closed(new_closed) + expected = interval_range(0, 5, closed=new_closed, name=name) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("bad_closed", ["foo", 10, "LEFT", True, False]) + def test_set_closed_errors(self, bad_closed): + # GH 21670 + index = interval_range(0, 5) + msg = f"invalid option for 'closed': {bad_closed}" + with pytest.raises(ValueError, match=msg): + index.set_closed(bad_closed) + + def test_is_all_dates(self): + # GH 23576 + year_2017 = Interval( + Timestamp("2017-01-01 00:00:00"), Timestamp("2018-01-01 00:00:00") + ) + year_2017_index = IntervalIndex([year_2017]) + assert not year_2017_index._is_all_dates + + +def test_dir(): + # GH#27571 dir(interval_index) should not raise + index = IntervalIndex.from_arrays([0, 1], [1, 2]) + result = dir(index) + assert "str" not in result + + +def test_searchsorted_different_argument_classes(listlike_box): + # https://github.com/pandas-dev/pandas/issues/32762 + values = IntervalIndex([Interval(0, 1), Interval(1, 2)]) + result = values.searchsorted(listlike_box(values)) + expected = np.array([0, 1], dtype=result.dtype) + tm.assert_numpy_array_equal(result, expected) + + result = values._data.searchsorted(listlike_box(values)) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize( + "arg", [[1, 2], ["a", "b"], [Timestamp("2020-01-01", tz="Europe/London")] * 2] +) +def test_searchsorted_invalid_argument(arg): + values = IntervalIndex([Interval(0, 1), Interval(1, 2)]) + msg = "'<' not supported between instances of 'pandas._libs.interval.Interval' and " + with pytest.raises(TypeError, match=msg): + values.searchsorted(arg) diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_interval_range.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_interval_range.py new file mode 100644 index 0000000000000000000000000000000000000000..e8de59f84bcc6d6cece2768f942b4599d3ce1a2d --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_interval_range.py @@ -0,0 +1,369 @@ +from datetime import timedelta + +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_integer + +from pandas import ( + DateOffset, + Interval, + IntervalIndex, + Timedelta, + Timestamp, + date_range, + interval_range, + timedelta_range, +) +import pandas._testing as tm + +from pandas.tseries.offsets import Day + + +@pytest.fixture(params=[None, "foo"]) +def name(request): + return request.param + + +class TestIntervalRange: + @pytest.mark.parametrize("freq, periods", [(1, 100), (2.5, 40), (5, 20), (25, 4)]) + def test_constructor_numeric(self, closed, name, freq, periods): + start, end = 0, 100 + breaks = np.arange(101, step=freq) + expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) + + # defined from start/end/freq + result = interval_range( + start=start, end=end, freq=freq, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + # defined from start/periods/freq + result = interval_range( + start=start, periods=periods, freq=freq, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + # defined from end/periods/freq + result = interval_range( + end=end, periods=periods, freq=freq, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + # GH 20976: linspace behavior defined from start/end/periods + result = interval_range( + start=start, end=end, periods=periods, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("tz", [None, "US/Eastern"]) + @pytest.mark.parametrize( + "freq, periods", [("D", 364), ("2D", 182), ("22D18h", 16), ("ME", 11)] + ) + def test_constructor_timestamp(self, closed, name, freq, periods, tz): + start, end = Timestamp("20180101", tz=tz), Timestamp("20181231", tz=tz) + breaks = date_range(start=start, end=end, freq=freq) + expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) + + # defined from start/end/freq + result = interval_range( + start=start, end=end, freq=freq, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + # defined from start/periods/freq + result = interval_range( + start=start, periods=periods, freq=freq, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + # defined from end/periods/freq + result = interval_range( + end=end, periods=periods, freq=freq, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + # GH 20976: linspace behavior defined from start/end/periods + if not breaks.freq.n == 1 and tz is None: + result = interval_range( + start=start, end=end, periods=periods, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "freq, periods", [("D", 100), ("2D12h", 40), ("5D", 20), ("25D", 4)] + ) + def test_constructor_timedelta(self, closed, name, freq, periods): + start, end = Timedelta("0 days"), Timedelta("100 days") + breaks = timedelta_range(start=start, end=end, freq=freq) + expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) + + # defined from start/end/freq + result = interval_range( + start=start, end=end, freq=freq, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + # defined from start/periods/freq + result = interval_range( + start=start, periods=periods, freq=freq, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + # defined from end/periods/freq + result = interval_range( + end=end, periods=periods, freq=freq, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + # GH 20976: linspace behavior defined from start/end/periods + result = interval_range( + start=start, end=end, periods=periods, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "start, end, freq, expected_endpoint", + [ + (0, 10, 3, 9), + (0, 10, 1.5, 9), + (0.5, 10, 3, 9.5), + (Timedelta("0D"), Timedelta("10D"), "2D4h", Timedelta("8D16h")), + ( + Timestamp("2018-01-01"), + Timestamp("2018-02-09"), + "MS", + Timestamp("2018-02-01"), + ), + ( + Timestamp("2018-01-01", tz="US/Eastern"), + Timestamp("2018-01-20", tz="US/Eastern"), + "5D12h", + Timestamp("2018-01-17 12:00:00", tz="US/Eastern"), + ), + ], + ) + def test_early_truncation(self, start, end, freq, expected_endpoint): + # index truncates early if freq causes end to be skipped + result = interval_range(start=start, end=end, freq=freq) + result_endpoint = result.right[-1] + assert result_endpoint == expected_endpoint + + @pytest.mark.parametrize( + "start, end, freq", + [(0.5, None, None), (None, 4.5, None), (0.5, None, 1.5), (None, 6.5, 1.5)], + ) + def test_no_invalid_float_truncation(self, start, end, freq): + # GH 21161 + if freq is None: + breaks = [0.5, 1.5, 2.5, 3.5, 4.5] + else: + breaks = [0.5, 2.0, 3.5, 5.0, 6.5] + expected = IntervalIndex.from_breaks(breaks) + + result = interval_range(start=start, end=end, periods=4, freq=freq) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "start, mid, end", + [ + ( + Timestamp("2018-03-10", tz="US/Eastern"), + Timestamp("2018-03-10 23:30:00", tz="US/Eastern"), + Timestamp("2018-03-12", tz="US/Eastern"), + ), + ( + Timestamp("2018-11-03", tz="US/Eastern"), + Timestamp("2018-11-04 00:30:00", tz="US/Eastern"), + Timestamp("2018-11-05", tz="US/Eastern"), + ), + ], + ) + def test_linspace_dst_transition(self, start, mid, end): + # GH 20976: linspace behavior defined from start/end/periods + # accounts for the hour gained/lost during DST transition + start = start.as_unit("ns") + mid = mid.as_unit("ns") + end = end.as_unit("ns") + result = interval_range(start=start, end=end, periods=2) + expected = IntervalIndex.from_breaks([start, mid, end]) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("freq", [2, 2.0]) + @pytest.mark.parametrize("end", [10, 10.0]) + @pytest.mark.parametrize("start", [0, 0.0]) + def test_float_subtype(self, start, end, freq): + # Has float subtype if any of start/end/freq are float, even if all + # resulting endpoints can safely be upcast to integers + + # defined from start/end/freq + index = interval_range(start=start, end=end, freq=freq) + result = index.dtype.subtype + expected = "int64" if is_integer(start + end + freq) else "float64" + assert result == expected + + # defined from start/periods/freq + index = interval_range(start=start, periods=5, freq=freq) + result = index.dtype.subtype + expected = "int64" if is_integer(start + freq) else "float64" + assert result == expected + + # defined from end/periods/freq + index = interval_range(end=end, periods=5, freq=freq) + result = index.dtype.subtype + expected = "int64" if is_integer(end + freq) else "float64" + assert result == expected + + # GH 20976: linspace behavior defined from start/end/periods + index = interval_range(start=start, end=end, periods=5) + result = index.dtype.subtype + expected = "int64" if is_integer(start + end) else "float64" + assert result == expected + + def test_interval_range_fractional_period(self): + # float value for periods + expected = interval_range(start=0, periods=10) + msg = "Non-integer 'periods' in pd.date_range, .* pd.interval_range" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = interval_range(start=0, periods=10.5) + tm.assert_index_equal(result, expected) + + def test_constructor_coverage(self): + # equivalent timestamp-like start/end + start, end = Timestamp("2017-01-01"), Timestamp("2017-01-15") + expected = interval_range(start=start, end=end) + + result = interval_range(start=start.to_pydatetime(), end=end.to_pydatetime()) + tm.assert_index_equal(result, expected) + + result = interval_range(start=start.asm8, end=end.asm8) + tm.assert_index_equal(result, expected) + + # equivalent freq with timestamp + equiv_freq = [ + "D", + Day(), + Timedelta(days=1), + timedelta(days=1), + DateOffset(days=1), + ] + for freq in equiv_freq: + result = interval_range(start=start, end=end, freq=freq) + tm.assert_index_equal(result, expected) + + # equivalent timedelta-like start/end + start, end = Timedelta(days=1), Timedelta(days=10) + expected = interval_range(start=start, end=end) + + result = interval_range(start=start.to_pytimedelta(), end=end.to_pytimedelta()) + tm.assert_index_equal(result, expected) + + result = interval_range(start=start.asm8, end=end.asm8) + tm.assert_index_equal(result, expected) + + # equivalent freq with timedelta + equiv_freq = ["D", Day(), Timedelta(days=1), timedelta(days=1)] + for freq in equiv_freq: + result = interval_range(start=start, end=end, freq=freq) + tm.assert_index_equal(result, expected) + + def test_errors(self): + # not enough params + msg = ( + "Of the four parameters: start, end, periods, and freq, " + "exactly three must be specified" + ) + + with pytest.raises(ValueError, match=msg): + interval_range(start=0) + + with pytest.raises(ValueError, match=msg): + interval_range(end=5) + + with pytest.raises(ValueError, match=msg): + interval_range(periods=2) + + with pytest.raises(ValueError, match=msg): + interval_range() + + # too many params + with pytest.raises(ValueError, match=msg): + interval_range(start=0, end=5, periods=6, freq=1.5) + + # mixed units + msg = "start, end, freq need to be type compatible" + with pytest.raises(TypeError, match=msg): + interval_range(start=0, end=Timestamp("20130101"), freq=2) + + with pytest.raises(TypeError, match=msg): + interval_range(start=0, end=Timedelta("1 day"), freq=2) + + with pytest.raises(TypeError, match=msg): + interval_range(start=0, end=10, freq="D") + + with pytest.raises(TypeError, match=msg): + interval_range(start=Timestamp("20130101"), end=10, freq="D") + + with pytest.raises(TypeError, match=msg): + interval_range( + start=Timestamp("20130101"), end=Timedelta("1 day"), freq="D" + ) + + with pytest.raises(TypeError, match=msg): + interval_range( + start=Timestamp("20130101"), end=Timestamp("20130110"), freq=2 + ) + + with pytest.raises(TypeError, match=msg): + interval_range(start=Timedelta("1 day"), end=10, freq="D") + + with pytest.raises(TypeError, match=msg): + interval_range( + start=Timedelta("1 day"), end=Timestamp("20130110"), freq="D" + ) + + with pytest.raises(TypeError, match=msg): + interval_range(start=Timedelta("1 day"), end=Timedelta("10 days"), freq=2) + + # invalid periods + msg = "periods must be a number, got foo" + with pytest.raises(TypeError, match=msg): + interval_range(start=0, periods="foo") + + # invalid start + msg = "start must be numeric or datetime-like, got foo" + with pytest.raises(ValueError, match=msg): + interval_range(start="foo", periods=10) + + # invalid end + msg = r"end must be numeric or datetime-like, got \(0, 1\]" + with pytest.raises(ValueError, match=msg): + interval_range(end=Interval(0, 1), periods=10) + + # invalid freq for datetime-like + msg = "freq must be numeric or convertible to DateOffset, got foo" + with pytest.raises(ValueError, match=msg): + interval_range(start=0, end=10, freq="foo") + + with pytest.raises(ValueError, match=msg): + interval_range(start=Timestamp("20130101"), periods=10, freq="foo") + + with pytest.raises(ValueError, match=msg): + interval_range(end=Timedelta("1 day"), periods=10, freq="foo") + + # mixed tz + start = Timestamp("2017-01-01", tz="US/Eastern") + end = Timestamp("2017-01-07", tz="US/Pacific") + msg = "Start and end cannot both be tz-aware with different timezones" + with pytest.raises(TypeError, match=msg): + interval_range(start=start, end=end) + + def test_float_freq(self): + # GH 54477 + result = interval_range(0, 1, freq=0.1) + expected = IntervalIndex.from_breaks([0 + 0.1 * n for n in range(11)]) + tm.assert_index_equal(result, expected) + + result = interval_range(0, 1, freq=0.6) + expected = IntervalIndex.from_breaks([0, 0.6]) + tm.assert_index_equal(result, expected) diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_interval_tree.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_interval_tree.py new file mode 100644 index 0000000000000000000000000000000000000000..78388e84fc6dc1af7dadd78b88a1155ed8cfd812 --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_interval_tree.py @@ -0,0 +1,208 @@ +from itertools import permutations + +import numpy as np +import pytest + +from pandas._libs.interval import IntervalTree +from pandas.compat import IS64 + +import pandas._testing as tm + + +def skipif_32bit(param): + """ + Skip parameters in a parametrize on 32bit systems. Specifically used + here to skip leaf_size parameters related to GH 23440. + """ + marks = pytest.mark.skipif(not IS64, reason="GH 23440: int type mismatch on 32bit") + return pytest.param(param, marks=marks) + + +@pytest.fixture(params=["int64", "float64", "uint64"]) +def dtype(request): + return request.param + + +@pytest.fixture(params=[skipif_32bit(1), skipif_32bit(2), 10]) +def leaf_size(request): + """ + Fixture to specify IntervalTree leaf_size parameter; to be used with the + tree fixture. + """ + return request.param + + +@pytest.fixture( + params=[ + np.arange(5, dtype="int64"), + np.arange(5, dtype="uint64"), + np.arange(5, dtype="float64"), + np.array([0, 1, 2, 3, 4, np.nan], dtype="float64"), + ] +) +def tree(request, leaf_size): + left = request.param + return IntervalTree(left, left + 2, leaf_size=leaf_size) + + +class TestIntervalTree: + def test_get_indexer(self, tree): + result = tree.get_indexer(np.array([1.0, 5.5, 6.5])) + expected = np.array([0, 4, -1], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + with pytest.raises( + KeyError, match="'indexer does not intersect a unique set of intervals'" + ): + tree.get_indexer(np.array([3.0])) + + @pytest.mark.parametrize( + "dtype, target_value, target_dtype", + [("int64", 2**63 + 1, "uint64"), ("uint64", -1, "int64")], + ) + def test_get_indexer_overflow(self, dtype, target_value, target_dtype): + left, right = np.array([0, 1], dtype=dtype), np.array([1, 2], dtype=dtype) + tree = IntervalTree(left, right) + + result = tree.get_indexer(np.array([target_value], dtype=target_dtype)) + expected = np.array([-1], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + def test_get_indexer_non_unique(self, tree): + indexer, missing = tree.get_indexer_non_unique(np.array([1.0, 2.0, 6.5])) + + result = indexer[:1] + expected = np.array([0], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + result = np.sort(indexer[1:3]) + expected = np.array([0, 1], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + result = np.sort(indexer[3:]) + expected = np.array([-1], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + result = missing + expected = np.array([2], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "dtype, target_value, target_dtype", + [("int64", 2**63 + 1, "uint64"), ("uint64", -1, "int64")], + ) + def test_get_indexer_non_unique_overflow(self, dtype, target_value, target_dtype): + left, right = np.array([0, 2], dtype=dtype), np.array([1, 3], dtype=dtype) + tree = IntervalTree(left, right) + target = np.array([target_value], dtype=target_dtype) + + result_indexer, result_missing = tree.get_indexer_non_unique(target) + expected_indexer = np.array([-1], dtype="intp") + tm.assert_numpy_array_equal(result_indexer, expected_indexer) + + expected_missing = np.array([0], dtype="intp") + tm.assert_numpy_array_equal(result_missing, expected_missing) + + def test_duplicates(self, dtype): + left = np.array([0, 0, 0], dtype=dtype) + tree = IntervalTree(left, left + 1) + + with pytest.raises( + KeyError, match="'indexer does not intersect a unique set of intervals'" + ): + tree.get_indexer(np.array([0.5])) + + indexer, missing = tree.get_indexer_non_unique(np.array([0.5])) + result = np.sort(indexer) + expected = np.array([0, 1, 2], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + result = missing + expected = np.array([], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "leaf_size", [skipif_32bit(1), skipif_32bit(10), skipif_32bit(100), 10000] + ) + def test_get_indexer_closed(self, closed, leaf_size): + x = np.arange(1000, dtype="float64") + found = x.astype("intp") + not_found = (-1 * np.ones(1000)).astype("intp") + + tree = IntervalTree(x, x + 0.5, closed=closed, leaf_size=leaf_size) + tm.assert_numpy_array_equal(found, tree.get_indexer(x + 0.25)) + + expected = found if tree.closed_left else not_found + tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.0)) + + expected = found if tree.closed_right else not_found + tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.5)) + + @pytest.mark.parametrize( + "left, right, expected", + [ + (np.array([0, 1, 4], dtype="int64"), np.array([2, 3, 5]), True), + (np.array([0, 1, 2], dtype="int64"), np.array([5, 4, 3]), True), + (np.array([0, 1, np.nan]), np.array([5, 4, np.nan]), True), + (np.array([0, 2, 4], dtype="int64"), np.array([1, 3, 5]), False), + (np.array([0, 2, np.nan]), np.array([1, 3, np.nan]), False), + ], + ) + @pytest.mark.parametrize("order", (list(x) for x in permutations(range(3)))) + def test_is_overlapping(self, closed, order, left, right, expected): + # GH 23309 + tree = IntervalTree(left[order], right[order], closed=closed) + result = tree.is_overlapping + assert result is expected + + @pytest.mark.parametrize("order", (list(x) for x in permutations(range(3)))) + def test_is_overlapping_endpoints(self, closed, order): + """shared endpoints are marked as overlapping""" + # GH 23309 + left, right = np.arange(3, dtype="int64"), np.arange(1, 4) + tree = IntervalTree(left[order], right[order], closed=closed) + result = tree.is_overlapping + expected = closed == "both" + assert result is expected + + @pytest.mark.parametrize( + "left, right", + [ + (np.array([], dtype="int64"), np.array([], dtype="int64")), + (np.array([0], dtype="int64"), np.array([1], dtype="int64")), + (np.array([np.nan]), np.array([np.nan])), + (np.array([np.nan] * 3), np.array([np.nan] * 3)), + ], + ) + def test_is_overlapping_trivial(self, closed, left, right): + # GH 23309 + tree = IntervalTree(left, right, closed=closed) + assert tree.is_overlapping is False + + @pytest.mark.skipif(not IS64, reason="GH 23440") + def test_construction_overflow(self): + # GH 25485 + left, right = np.arange(101, dtype="int64"), [np.iinfo(np.int64).max] * 101 + tree = IntervalTree(left, right) + + # pivot should be average of left/right medians + result = tree.root.pivot + expected = (50 + np.iinfo(np.int64).max) / 2 + assert result == expected + + @pytest.mark.parametrize( + "left, right, expected", + [ + ([-np.inf, 1.0], [1.0, 2.0], 0.0), + ([-np.inf, -2.0], [-2.0, -1.0], -2.0), + ([-2.0, -1.0], [-1.0, np.inf], 0.0), + ([1.0, 2.0], [2.0, np.inf], 2.0), + ], + ) + def test_inf_bound_infinite_recursion(self, left, right, expected): + # GH 46658 + + tree = IntervalTree(left * 101, right * 101) + + result = tree.root.pivot + assert result == expected diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_join.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_join.py new file mode 100644 index 0000000000000000000000000000000000000000..2f42c530a66868fa69b1d449e75f84d42592bb77 --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_join.py @@ -0,0 +1,44 @@ +import pytest + +from pandas import ( + IntervalIndex, + MultiIndex, + RangeIndex, +) +import pandas._testing as tm + + +@pytest.fixture +def range_index(): + return RangeIndex(3, name="range_index") + + +@pytest.fixture +def interval_index(): + return IntervalIndex.from_tuples( + [(0.0, 1.0), (1.0, 2.0), (1.5, 2.5)], name="interval_index" + ) + + +def test_join_overlapping_in_mi_to_same_intervalindex(range_index, interval_index): + # GH-45661 + multi_index = MultiIndex.from_product([interval_index, range_index]) + result = multi_index.join(interval_index) + + tm.assert_index_equal(result, multi_index) + + +def test_join_overlapping_to_multiindex_with_same_interval(range_index, interval_index): + # GH-45661 + multi_index = MultiIndex.from_product([interval_index, range_index]) + result = interval_index.join(multi_index) + + tm.assert_index_equal(result, multi_index) + + +def test_join_overlapping_interval_to_another_intervalindex(interval_index): + # GH-45661 + flipped_interval_index = interval_index[::-1] + result = interval_index.join(flipped_interval_index) + + tm.assert_index_equal(result, interval_index) diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_pickle.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_pickle.py new file mode 100644 index 0000000000000000000000000000000000000000..308a90e72eab5db55f300341212d2c04e82c6900 --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_pickle.py @@ -0,0 +1,13 @@ +import pytest + +from pandas import IntervalIndex +import pandas._testing as tm + + +class TestPickle: + @pytest.mark.parametrize("closed", ["left", "right", "both"]) + def test_pickle_round_trip_closed(self, closed): + # https://github.com/pandas-dev/pandas/issues/35658 + idx = IntervalIndex.from_tuples([(1, 2), (2, 3)], closed=closed) + result = tm.round_trip_pickle(idx) + tm.assert_index_equal(result, idx) diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_setops.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_setops.py new file mode 100644 index 0000000000000000000000000000000000000000..1b0816a9405cb9dd6ed81691e72012c948b898a2 --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/interval/test_setops.py @@ -0,0 +1,208 @@ +import numpy as np +import pytest + +from pandas import ( + Index, + IntervalIndex, + Timestamp, + interval_range, +) +import pandas._testing as tm + + +def monotonic_index(start, end, dtype="int64", closed="right"): + return IntervalIndex.from_breaks(np.arange(start, end, dtype=dtype), closed=closed) + + +def empty_index(dtype="int64", closed="right"): + return IntervalIndex(np.array([], dtype=dtype), closed=closed) + + +class TestIntervalIndex: + def test_union(self, closed, sort): + index = monotonic_index(0, 11, closed=closed) + other = monotonic_index(5, 13, closed=closed) + + expected = monotonic_index(0, 13, closed=closed) + result = index[::-1].union(other, sort=sort) + if sort in (None, True): + tm.assert_index_equal(result, expected) + else: + tm.assert_index_equal(result.sort_values(), expected) + + result = other[::-1].union(index, sort=sort) + if sort in (None, True): + tm.assert_index_equal(result, expected) + else: + tm.assert_index_equal(result.sort_values(), expected) + + tm.assert_index_equal(index.union(index, sort=sort), index) + tm.assert_index_equal(index.union(index[:1], sort=sort), index) + + def test_union_empty_result(self, closed, sort): + # GH 19101: empty result, same dtype + index = empty_index(dtype="int64", closed=closed) + result = index.union(index, sort=sort) + tm.assert_index_equal(result, index) + + # GH 19101: empty result, different numeric dtypes -> common dtype is f8 + other = empty_index(dtype="float64", closed=closed) + result = index.union(other, sort=sort) + expected = other + tm.assert_index_equal(result, expected) + + other = index.union(index, sort=sort) + tm.assert_index_equal(result, expected) + + other = empty_index(dtype="uint64", closed=closed) + result = index.union(other, sort=sort) + tm.assert_index_equal(result, expected) + + result = other.union(index, sort=sort) + tm.assert_index_equal(result, expected) + + def test_intersection(self, closed, sort): + index = monotonic_index(0, 11, closed=closed) + other = monotonic_index(5, 13, closed=closed) + + expected = monotonic_index(5, 11, closed=closed) + result = index[::-1].intersection(other, sort=sort) + if sort in (None, True): + tm.assert_index_equal(result, expected) + else: + tm.assert_index_equal(result.sort_values(), expected) + + result = other[::-1].intersection(index, sort=sort) + if sort in (None, True): + tm.assert_index_equal(result, expected) + else: + tm.assert_index_equal(result.sort_values(), expected) + + tm.assert_index_equal(index.intersection(index, sort=sort), index) + + # GH 26225: nested intervals + index = IntervalIndex.from_tuples([(1, 2), (1, 3), (1, 4), (0, 2)]) + other = IntervalIndex.from_tuples([(1, 2), (1, 3)]) + expected = IntervalIndex.from_tuples([(1, 2), (1, 3)]) + result = index.intersection(other) + tm.assert_index_equal(result, expected) + + # GH 26225 + index = IntervalIndex.from_tuples([(0, 3), (0, 2)]) + other = IntervalIndex.from_tuples([(0, 2), (1, 3)]) + expected = IntervalIndex.from_tuples([(0, 2)]) + result = index.intersection(other) + tm.assert_index_equal(result, expected) + + # GH 26225: duplicate nan element + index = IntervalIndex([np.nan, np.nan]) + other = IntervalIndex([np.nan]) + expected = IntervalIndex([np.nan]) + result = index.intersection(other) + tm.assert_index_equal(result, expected) + + def test_intersection_empty_result(self, closed, sort): + index = monotonic_index(0, 11, closed=closed) + + # GH 19101: empty result, same dtype + other = monotonic_index(300, 314, closed=closed) + expected = empty_index(dtype="int64", closed=closed) + result = index.intersection(other, sort=sort) + tm.assert_index_equal(result, expected) + + # GH 19101: empty result, different numeric dtypes -> common dtype is float64 + other = monotonic_index(300, 314, dtype="float64", closed=closed) + result = index.intersection(other, sort=sort) + expected = other[:0] + tm.assert_index_equal(result, expected) + + other = monotonic_index(300, 314, dtype="uint64", closed=closed) + result = index.intersection(other, sort=sort) + tm.assert_index_equal(result, expected) + + def test_intersection_duplicates(self): + # GH#38743 + index = IntervalIndex.from_tuples([(1, 2), (1, 2), (2, 3), (3, 4)]) + other = IntervalIndex.from_tuples([(1, 2), (2, 3)]) + expected = IntervalIndex.from_tuples([(1, 2), (2, 3)]) + result = index.intersection(other) + tm.assert_index_equal(result, expected) + + def test_difference(self, closed, sort): + index = IntervalIndex.from_arrays([1, 0, 3, 2], [1, 2, 3, 4], closed=closed) + result = index.difference(index[:1], sort=sort) + expected = index[1:] + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + + # GH 19101: empty result, same dtype + result = index.difference(index, sort=sort) + expected = empty_index(dtype="int64", closed=closed) + tm.assert_index_equal(result, expected) + + # GH 19101: empty result, different dtypes + other = IntervalIndex.from_arrays( + index.left.astype("float64"), index.right, closed=closed + ) + result = index.difference(other, sort=sort) + tm.assert_index_equal(result, expected) + + def test_symmetric_difference(self, closed, sort): + index = monotonic_index(0, 11, closed=closed) + result = index[1:].symmetric_difference(index[:-1], sort=sort) + expected = IntervalIndex([index[0], index[-1]]) + if sort in (None, True): + tm.assert_index_equal(result, expected) + else: + tm.assert_index_equal(result.sort_values(), expected) + + # GH 19101: empty result, same dtype + result = index.symmetric_difference(index, sort=sort) + expected = empty_index(dtype="int64", closed=closed) + if sort in (None, True): + tm.assert_index_equal(result, expected) + else: + tm.assert_index_equal(result.sort_values(), expected) + + # GH 19101: empty result, different dtypes + other = IntervalIndex.from_arrays( + index.left.astype("float64"), index.right, closed=closed + ) + result = index.symmetric_difference(other, sort=sort) + expected = empty_index(dtype="float64", closed=closed) + tm.assert_index_equal(result, expected) + + @pytest.mark.filterwarnings("ignore:'<' not supported between:RuntimeWarning") + @pytest.mark.parametrize( + "op_name", ["union", "intersection", "difference", "symmetric_difference"] + ) + def test_set_incompatible_types(self, closed, op_name, sort): + index = monotonic_index(0, 11, closed=closed) + set_op = getattr(index, op_name) + + # TODO: standardize return type of non-union setops type(self vs other) + # non-IntervalIndex + if op_name == "difference": + expected = index + else: + expected = getattr(index.astype("O"), op_name)(Index([1, 2, 3])) + result = set_op(Index([1, 2, 3]), sort=sort) + tm.assert_index_equal(result, expected) + + # mixed closed -> cast to object + for other_closed in {"right", "left", "both", "neither"} - {closed}: + other = monotonic_index(0, 11, closed=other_closed) + expected = getattr(index.astype(object), op_name)(other, sort=sort) + if op_name == "difference": + expected = index + result = set_op(other, sort=sort) + tm.assert_index_equal(result, expected) + + # GH 19016: incompatible dtypes -> cast to object + other = interval_range(Timestamp("20180101"), periods=9, closed=closed) + expected = getattr(index.astype(object), op_name)(other, sort=sort) + if op_name == "difference": + expected = index + result = set_op(other, sort=sort) + tm.assert_index_equal(result, expected) diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/__init__.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/conftest.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/conftest.py new file mode 100644 index 0000000000000000000000000000000000000000..15062aee56e3a1b91d1f6eb76a4f86e381e0ad44 --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/conftest.py @@ -0,0 +1,27 @@ +import numpy as np +import pytest + +from pandas import ( + Index, + MultiIndex, +) + + +# Note: identical the "multi" entry in the top-level "index" fixture +@pytest.fixture +def idx(): + # a MultiIndex used to test the general functionality of the + # general functionality of this object + major_axis = Index(["foo", "bar", "baz", "qux"]) + minor_axis = Index(["one", "two"]) + + major_codes = np.array([0, 0, 1, 2, 3, 3]) + minor_codes = np.array([0, 1, 0, 1, 0, 1]) + index_names = ["first", "second"] + mi = MultiIndex( + levels=[major_axis, minor_axis], + codes=[major_codes, minor_codes], + names=index_names, + verify_integrity=False, + ) + return mi diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_analytics.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_analytics.py new file mode 100644 index 0000000000000000000000000000000000000000..87f1439db5fc87c3be08e3675df1dae0fdb5554d --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_analytics.py @@ -0,0 +1,263 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Index, + MultiIndex, + date_range, + period_range, +) +import pandas._testing as tm + + +def test_infer_objects(idx): + with pytest.raises(NotImplementedError, match="to_frame"): + idx.infer_objects() + + +def test_shift(idx): + # GH8083 test the base class for shift + msg = ( + "This method is only implemented for DatetimeIndex, PeriodIndex and " + "TimedeltaIndex; Got type MultiIndex" + ) + with pytest.raises(NotImplementedError, match=msg): + idx.shift(1) + with pytest.raises(NotImplementedError, match=msg): + idx.shift(1, 2) + + +def test_groupby(idx): + groups = idx.groupby(np.array([1, 1, 1, 2, 2, 2])) + labels = idx.tolist() + exp = {1: labels[:3], 2: labels[3:]} + tm.assert_dict_equal(groups, exp) + + # GH5620 + groups = idx.groupby(idx) + exp = {key: [key] for key in idx} + tm.assert_dict_equal(groups, exp) + + +def test_truncate_multiindex(): + # GH 34564 for MultiIndex level names check + major_axis = Index(list(range(4))) + minor_axis = Index(list(range(2))) + + major_codes = np.array([0, 0, 1, 2, 3, 3]) + minor_codes = np.array([0, 1, 0, 1, 0, 1]) + + index = MultiIndex( + levels=[major_axis, minor_axis], + codes=[major_codes, minor_codes], + names=["L1", "L2"], + ) + + result = index.truncate(before=1) + assert "foo" not in result.levels[0] + assert 1 in result.levels[0] + assert index.names == result.names + + result = index.truncate(after=1) + assert 2 not in result.levels[0] + assert 1 in result.levels[0] + assert index.names == result.names + + result = index.truncate(before=1, after=2) + assert len(result.levels[0]) == 2 + assert index.names == result.names + + msg = "after < before" + with pytest.raises(ValueError, match=msg): + index.truncate(3, 1) + + +# TODO: reshape + + +def test_reorder_levels(idx): + # this blows up + with pytest.raises(IndexError, match="^Too many levels"): + idx.reorder_levels([2, 1, 0]) + + +def test_numpy_repeat(): + reps = 2 + numbers = [1, 2, 3] + names = np.array(["foo", "bar"]) + + m = MultiIndex.from_product([numbers, names], names=names) + expected = MultiIndex.from_product([numbers, names.repeat(reps)], names=names) + tm.assert_index_equal(np.repeat(m, reps), expected) + + msg = "the 'axis' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.repeat(m, reps, axis=1) + + +def test_append_mixed_dtypes(): + # GH 13660 + dti = date_range("2011-01-01", freq="ME", periods=3) + dti_tz = date_range("2011-01-01", freq="ME", periods=3, tz="US/Eastern") + pi = period_range("2011-01", freq="M", periods=3) + + mi = MultiIndex.from_arrays( + [[1, 2, 3], [1.1, np.nan, 3.3], ["a", "b", "c"], dti, dti_tz, pi] + ) + assert mi.nlevels == 6 + + res = mi.append(mi) + exp = MultiIndex.from_arrays( + [ + [1, 2, 3, 1, 2, 3], + [1.1, np.nan, 3.3, 1.1, np.nan, 3.3], + ["a", "b", "c", "a", "b", "c"], + dti.append(dti), + dti_tz.append(dti_tz), + pi.append(pi), + ] + ) + tm.assert_index_equal(res, exp) + + other = MultiIndex.from_arrays( + [ + ["x", "y", "z"], + ["x", "y", "z"], + ["x", "y", "z"], + ["x", "y", "z"], + ["x", "y", "z"], + ["x", "y", "z"], + ] + ) + + res = mi.append(other) + exp = MultiIndex.from_arrays( + [ + [1, 2, 3, "x", "y", "z"], + [1.1, np.nan, 3.3, "x", "y", "z"], + ["a", "b", "c", "x", "y", "z"], + dti.append(Index(["x", "y", "z"])), + dti_tz.append(Index(["x", "y", "z"])), + pi.append(Index(["x", "y", "z"])), + ] + ) + tm.assert_index_equal(res, exp) + + +def test_iter(idx): + result = list(idx) + expected = [ + ("foo", "one"), + ("foo", "two"), + ("bar", "one"), + ("baz", "two"), + ("qux", "one"), + ("qux", "two"), + ] + assert result == expected + + +def test_sub(idx): + first = idx + + # - now raises (previously was set op difference) + msg = "cannot perform __sub__ with this index type: MultiIndex" + with pytest.raises(TypeError, match=msg): + first - idx[-3:] + with pytest.raises(TypeError, match=msg): + idx[-3:] - first + with pytest.raises(TypeError, match=msg): + idx[-3:] - first.tolist() + msg = "cannot perform __rsub__ with this index type: MultiIndex" + with pytest.raises(TypeError, match=msg): + first.tolist() - idx[-3:] + + +def test_map(idx): + # callable + index = idx + + result = index.map(lambda x: x) + tm.assert_index_equal(result, index) + + +@pytest.mark.parametrize( + "mapper", + [ + lambda values, idx: {i: e for e, i in zip(values, idx)}, + lambda values, idx: pd.Series(values, idx), + ], +) +def test_map_dictlike(idx, mapper): + identity = mapper(idx.values, idx) + + # we don't infer to uint64 dtype for a dict + if idx.dtype == np.uint64 and isinstance(identity, dict): + expected = idx.astype("int64") + else: + expected = idx + + result = idx.map(identity) + tm.assert_index_equal(result, expected) + + # empty mappable + expected = Index([np.nan] * len(idx)) + result = idx.map(mapper(expected, idx)) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize( + "func", + [ + np.exp, + np.exp2, + np.expm1, + np.log, + np.log2, + np.log10, + np.log1p, + np.sqrt, + np.sin, + np.cos, + np.tan, + np.arcsin, + np.arccos, + np.arctan, + np.sinh, + np.cosh, + np.tanh, + np.arcsinh, + np.arccosh, + np.arctanh, + np.deg2rad, + np.rad2deg, + ], + ids=lambda func: func.__name__, +) +def test_numpy_ufuncs(idx, func): + # test ufuncs of numpy. see: + # https://numpy.org/doc/stable/reference/ufuncs.html + + expected_exception = TypeError + msg = ( + "loop of ufunc does not support argument 0 of type tuple which " + f"has no callable {func.__name__} method" + ) + with pytest.raises(expected_exception, match=msg): + func(idx) + + +@pytest.mark.parametrize( + "func", + [np.isfinite, np.isinf, np.isnan, np.signbit], + ids=lambda func: func.__name__, +) +def test_numpy_type_funcs(idx, func): + msg = ( + f"ufunc '{func.__name__}' not supported for the input types, and the inputs " + "could not be safely coerced to any supported types according to " + "the casting rule ''safe''" + ) + with pytest.raises(TypeError, match=msg): + func(idx) diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_astype.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_astype.py new file mode 100644 index 0000000000000000000000000000000000000000..29908537fbe590328ac586e05f90f3cc24cab9ab --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_astype.py @@ -0,0 +1,30 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import CategoricalDtype + +import pandas._testing as tm + + +def test_astype(idx): + expected = idx.copy() + actual = idx.astype("O") + tm.assert_copy(actual.levels, expected.levels) + tm.assert_copy(actual.codes, expected.codes) + assert actual.names == list(expected.names) + + with pytest.raises(TypeError, match="^Setting.*dtype.*object"): + idx.astype(np.dtype(int)) + + +@pytest.mark.parametrize("ordered", [True, False]) +def test_astype_category(idx, ordered): + # GH 18630 + msg = "> 1 ndim Categorical are not supported at this time" + with pytest.raises(NotImplementedError, match=msg): + idx.astype(CategoricalDtype(ordered=ordered)) + + if ordered is False: + # dtype='category' defaults to ordered=False, so only test once + with pytest.raises(NotImplementedError, match=msg): + idx.astype("category") diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_constructors.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_constructors.py new file mode 100644 index 0000000000000000000000000000000000000000..b1180f2d7af145dd30592925bfd16a4c2484a88f --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_constructors.py @@ -0,0 +1,860 @@ +from datetime import ( + date, + datetime, +) +import itertools + +import numpy as np +import pytest + +from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike + +import pandas as pd +from pandas import ( + Index, + MultiIndex, + Series, + Timestamp, + date_range, +) +import pandas._testing as tm + + +def test_constructor_single_level(): + result = MultiIndex( + levels=[["foo", "bar", "baz", "qux"]], codes=[[0, 1, 2, 3]], names=["first"] + ) + assert isinstance(result, MultiIndex) + expected = Index(["foo", "bar", "baz", "qux"], name="first") + tm.assert_index_equal(result.levels[0], expected) + assert result.names == ["first"] + + +def test_constructor_no_levels(): + msg = "non-zero number of levels/codes" + with pytest.raises(ValueError, match=msg): + MultiIndex(levels=[], codes=[]) + + msg = "Must pass both levels and codes" + with pytest.raises(TypeError, match=msg): + MultiIndex(levels=[]) + with pytest.raises(TypeError, match=msg): + MultiIndex(codes=[]) + + +def test_constructor_nonhashable_names(): + # GH 20527 + levels = [[1, 2], ["one", "two"]] + codes = [[0, 0, 1, 1], [0, 1, 0, 1]] + names = (["foo"], ["bar"]) + msg = r"MultiIndex\.name must be a hashable type" + with pytest.raises(TypeError, match=msg): + MultiIndex(levels=levels, codes=codes, names=names) + + # With .rename() + mi = MultiIndex( + levels=[[1, 2], ["one", "two"]], + codes=[[0, 0, 1, 1], [0, 1, 0, 1]], + names=("foo", "bar"), + ) + renamed = [["fooo"], ["barr"]] + with pytest.raises(TypeError, match=msg): + mi.rename(names=renamed) + + # With .set_names() + with pytest.raises(TypeError, match=msg): + mi.set_names(names=renamed) + + +def test_constructor_mismatched_codes_levels(idx): + codes = [np.array([1]), np.array([2]), np.array([3])] + levels = ["a"] + + msg = "Length of levels and codes must be the same" + with pytest.raises(ValueError, match=msg): + MultiIndex(levels=levels, codes=codes) + + length_error = ( + r"On level 0, code max \(3\) >= length of level \(1\)\. " + "NOTE: this index is in an inconsistent state" + ) + label_error = r"Unequal code lengths: \[4, 2\]" + code_value_error = r"On level 0, code value \(-2\) < -1" + + # important to check that it's looking at the right thing. + with pytest.raises(ValueError, match=length_error): + MultiIndex(levels=[["a"], ["b"]], codes=[[0, 1, 2, 3], [0, 3, 4, 1]]) + + with pytest.raises(ValueError, match=label_error): + MultiIndex(levels=[["a"], ["b"]], codes=[[0, 0, 0, 0], [0, 0]]) + + # external API + with pytest.raises(ValueError, match=length_error): + idx.copy().set_levels([["a"], ["b"]]) + + with pytest.raises(ValueError, match=label_error): + idx.copy().set_codes([[0, 0, 0, 0], [0, 0]]) + + # test set_codes with verify_integrity=False + # the setting should not raise any value error + idx.copy().set_codes(codes=[[0, 0, 0, 0], [0, 0]], verify_integrity=False) + + # code value smaller than -1 + with pytest.raises(ValueError, match=code_value_error): + MultiIndex(levels=[["a"], ["b"]], codes=[[0, -2], [0, 0]]) + + +def test_na_levels(): + # GH26408 + # test if codes are re-assigned value -1 for levels + # with missing values (NaN, NaT, None) + result = MultiIndex( + levels=[[np.nan, None, pd.NaT, 128, 2]], codes=[[0, -1, 1, 2, 3, 4]] + ) + expected = MultiIndex( + levels=[[np.nan, None, pd.NaT, 128, 2]], codes=[[-1, -1, -1, -1, 3, 4]] + ) + tm.assert_index_equal(result, expected) + + result = MultiIndex( + levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[0, -1, 1, 2, 3, 4]] + ) + expected = MultiIndex( + levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[-1, -1, 1, -1, 3, -1]] + ) + tm.assert_index_equal(result, expected) + + # verify set_levels and set_codes + result = MultiIndex( + levels=[[1, 2, 3, 4, 5]], codes=[[0, -1, 1, 2, 3, 4]] + ).set_levels([[np.nan, "s", pd.NaT, 128, None]]) + tm.assert_index_equal(result, expected) + + result = MultiIndex( + levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[1, 2, 2, 2, 2, 2]] + ).set_codes([[0, -1, 1, 2, 3, 4]]) + tm.assert_index_equal(result, expected) + + +def test_copy_in_constructor(): + levels = np.array(["a", "b", "c"]) + codes = np.array([1, 1, 2, 0, 0, 1, 1]) + val = codes[0] + mi = MultiIndex(levels=[levels, levels], codes=[codes, codes], copy=True) + assert mi.codes[0][0] == val + codes[0] = 15 + assert mi.codes[0][0] == val + val = levels[0] + levels[0] = "PANDA" + assert mi.levels[0][0] == val + + +# ---------------------------------------------------------------------------- +# from_arrays +# ---------------------------------------------------------------------------- +def test_from_arrays(idx): + arrays = [ + np.asarray(lev).take(level_codes) + for lev, level_codes in zip(idx.levels, idx.codes) + ] + + # list of arrays as input + result = MultiIndex.from_arrays(arrays, names=idx.names) + tm.assert_index_equal(result, idx) + + # infer correctly + result = MultiIndex.from_arrays([[pd.NaT, Timestamp("20130101")], ["a", "b"]]) + assert result.levels[0].equals(Index([Timestamp("20130101")])) + assert result.levels[1].equals(Index(["a", "b"])) + + +def test_from_arrays_iterator(idx): + # GH 18434 + arrays = [ + np.asarray(lev).take(level_codes) + for lev, level_codes in zip(idx.levels, idx.codes) + ] + + # iterator as input + result = MultiIndex.from_arrays(iter(arrays), names=idx.names) + tm.assert_index_equal(result, idx) + + # invalid iterator input + msg = "Input must be a list / sequence of array-likes." + with pytest.raises(TypeError, match=msg): + MultiIndex.from_arrays(0) + + +def test_from_arrays_tuples(idx): + arrays = tuple( + tuple(np.asarray(lev).take(level_codes)) + for lev, level_codes in zip(idx.levels, idx.codes) + ) + + # tuple of tuples as input + result = MultiIndex.from_arrays(arrays, names=idx.names) + tm.assert_index_equal(result, idx) + + +@pytest.mark.parametrize( + ("idx1", "idx2"), + [ + ( + pd.period_range("2011-01-01", freq="D", periods=3), + pd.period_range("2015-01-01", freq="h", periods=3), + ), + ( + date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern"), + date_range("2015-01-01 10:00", freq="h", periods=3, tz="Asia/Tokyo"), + ), + ( + pd.timedelta_range("1 days", freq="D", periods=3), + pd.timedelta_range("2 hours", freq="h", periods=3), + ), + ], +) +def test_from_arrays_index_series_period_datetimetz_and_timedelta(idx1, idx2): + result = MultiIndex.from_arrays([idx1, idx2]) + tm.assert_index_equal(result.get_level_values(0), idx1) + tm.assert_index_equal(result.get_level_values(1), idx2) + + result2 = MultiIndex.from_arrays([Series(idx1), Series(idx2)]) + tm.assert_index_equal(result2.get_level_values(0), idx1) + tm.assert_index_equal(result2.get_level_values(1), idx2) + + tm.assert_index_equal(result, result2) + + +def test_from_arrays_index_datetimelike_mixed(): + idx1 = date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern") + idx2 = date_range("2015-01-01 10:00", freq="h", periods=3) + idx3 = pd.timedelta_range("1 days", freq="D", periods=3) + idx4 = pd.period_range("2011-01-01", freq="D", periods=3) + + result = MultiIndex.from_arrays([idx1, idx2, idx3, idx4]) + tm.assert_index_equal(result.get_level_values(0), idx1) + tm.assert_index_equal(result.get_level_values(1), idx2) + tm.assert_index_equal(result.get_level_values(2), idx3) + tm.assert_index_equal(result.get_level_values(3), idx4) + + result2 = MultiIndex.from_arrays( + [Series(idx1), Series(idx2), Series(idx3), Series(idx4)] + ) + tm.assert_index_equal(result2.get_level_values(0), idx1) + tm.assert_index_equal(result2.get_level_values(1), idx2) + tm.assert_index_equal(result2.get_level_values(2), idx3) + tm.assert_index_equal(result2.get_level_values(3), idx4) + + tm.assert_index_equal(result, result2) + + +def test_from_arrays_index_series_categorical(): + # GH13743 + idx1 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=False) + idx2 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=True) + + result = MultiIndex.from_arrays([idx1, idx2]) + tm.assert_index_equal(result.get_level_values(0), idx1) + tm.assert_index_equal(result.get_level_values(1), idx2) + + result2 = MultiIndex.from_arrays([Series(idx1), Series(idx2)]) + tm.assert_index_equal(result2.get_level_values(0), idx1) + tm.assert_index_equal(result2.get_level_values(1), idx2) + + result3 = MultiIndex.from_arrays([idx1.values, idx2.values]) + tm.assert_index_equal(result3.get_level_values(0), idx1) + tm.assert_index_equal(result3.get_level_values(1), idx2) + + +def test_from_arrays_empty(): + # 0 levels + msg = "Must pass non-zero number of levels/codes" + with pytest.raises(ValueError, match=msg): + MultiIndex.from_arrays(arrays=[]) + + # 1 level + result = MultiIndex.from_arrays(arrays=[[]], names=["A"]) + assert isinstance(result, MultiIndex) + expected = Index([], name="A") + tm.assert_index_equal(result.levels[0], expected) + assert result.names == ["A"] + + # N levels + for N in [2, 3]: + arrays = [[]] * N + names = list("ABC")[:N] + result = MultiIndex.from_arrays(arrays=arrays, names=names) + expected = MultiIndex(levels=[[]] * N, codes=[[]] * N, names=names) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize( + "invalid_sequence_of_arrays", + [ + 1, + [1], + [1, 2], + [[1], 2], + [1, [2]], + "a", + ["a"], + ["a", "b"], + [["a"], "b"], + (1,), + (1, 2), + ([1], 2), + (1, [2]), + "a", + ("a",), + ("a", "b"), + (["a"], "b"), + [(1,), 2], + [1, (2,)], + [("a",), "b"], + ((1,), 2), + (1, (2,)), + (("a",), "b"), + ], +) +def test_from_arrays_invalid_input(invalid_sequence_of_arrays): + msg = "Input must be a list / sequence of array-likes" + with pytest.raises(TypeError, match=msg): + MultiIndex.from_arrays(arrays=invalid_sequence_of_arrays) + + +@pytest.mark.parametrize( + "idx1, idx2", [([1, 2, 3], ["a", "b"]), ([], ["a", "b"]), ([1, 2, 3], [])] +) +def test_from_arrays_different_lengths(idx1, idx2): + # see gh-13599 + msg = "^all arrays must be same length$" + with pytest.raises(ValueError, match=msg): + MultiIndex.from_arrays([idx1, idx2]) + + +def test_from_arrays_respects_none_names(): + # GH27292 + a = Series([1, 2, 3], name="foo") + b = Series(["a", "b", "c"], name="bar") + + result = MultiIndex.from_arrays([a, b], names=None) + expected = MultiIndex( + levels=[[1, 2, 3], ["a", "b", "c"]], codes=[[0, 1, 2], [0, 1, 2]], names=None + ) + + tm.assert_index_equal(result, expected) + + +# ---------------------------------------------------------------------------- +# from_tuples +# ---------------------------------------------------------------------------- +def test_from_tuples(): + msg = "Cannot infer number of levels from empty list" + with pytest.raises(TypeError, match=msg): + MultiIndex.from_tuples([]) + + expected = MultiIndex( + levels=[[1, 3], [2, 4]], codes=[[0, 1], [0, 1]], names=["a", "b"] + ) + + # input tuples + result = MultiIndex.from_tuples(((1, 2), (3, 4)), names=["a", "b"]) + tm.assert_index_equal(result, expected) + + +def test_from_tuples_iterator(): + # GH 18434 + # input iterator for tuples + expected = MultiIndex( + levels=[[1, 3], [2, 4]], codes=[[0, 1], [0, 1]], names=["a", "b"] + ) + + result = MultiIndex.from_tuples(zip([1, 3], [2, 4]), names=["a", "b"]) + tm.assert_index_equal(result, expected) + + # input non-iterables + msg = "Input must be a list / sequence of tuple-likes." + with pytest.raises(TypeError, match=msg): + MultiIndex.from_tuples(0) + + +def test_from_tuples_empty(): + # GH 16777 + result = MultiIndex.from_tuples([], names=["a", "b"]) + expected = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"]) + tm.assert_index_equal(result, expected) + + +def test_from_tuples_index_values(idx): + result = MultiIndex.from_tuples(idx) + assert (result.values == idx.values).all() + + +def test_tuples_with_name_string(): + # GH 15110 and GH 14848 + + li = [(0, 0, 1), (0, 1, 0), (1, 0, 0)] + msg = "Names should be list-like for a MultiIndex" + with pytest.raises(ValueError, match=msg): + Index(li, name="abc") + with pytest.raises(ValueError, match=msg): + Index(li, name="a") + + +def test_from_tuples_with_tuple_label(): + # GH 15457 + expected = pd.DataFrame( + [[2, 1, 2], [4, (1, 2), 3]], columns=["a", "b", "c"] + ).set_index(["a", "b"]) + idx = MultiIndex.from_tuples([(2, 1), (4, (1, 2))], names=("a", "b")) + result = pd.DataFrame([2, 3], columns=["c"], index=idx) + tm.assert_frame_equal(expected, result) + + +# ---------------------------------------------------------------------------- +# from_product +# ---------------------------------------------------------------------------- +def test_from_product_empty_zero_levels(): + # 0 levels + msg = "Must pass non-zero number of levels/codes" + with pytest.raises(ValueError, match=msg): + MultiIndex.from_product([]) + + +def test_from_product_empty_one_level(): + result = MultiIndex.from_product([[]], names=["A"]) + expected = Index([], name="A") + tm.assert_index_equal(result.levels[0], expected) + assert result.names == ["A"] + + +@pytest.mark.parametrize( + "first, second", [([], []), (["foo", "bar", "baz"], []), ([], ["a", "b", "c"])] +) +def test_from_product_empty_two_levels(first, second): + names = ["A", "B"] + result = MultiIndex.from_product([first, second], names=names) + expected = MultiIndex(levels=[first, second], codes=[[], []], names=names) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize("N", list(range(4))) +def test_from_product_empty_three_levels(N): + # GH12258 + names = ["A", "B", "C"] + lvl2 = list(range(N)) + result = MultiIndex.from_product([[], lvl2, []], names=names) + expected = MultiIndex(levels=[[], lvl2, []], codes=[[], [], []], names=names) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize( + "invalid_input", [1, [1], [1, 2], [[1], 2], "a", ["a"], ["a", "b"], [["a"], "b"]] +) +def test_from_product_invalid_input(invalid_input): + msg = r"Input must be a list / sequence of iterables|Input must be list-like" + with pytest.raises(TypeError, match=msg): + MultiIndex.from_product(iterables=invalid_input) + + +def test_from_product_datetimeindex(): + dt_index = date_range("2000-01-01", periods=2) + mi = MultiIndex.from_product([[1, 2], dt_index]) + etalon = construct_1d_object_array_from_listlike( + [ + (1, Timestamp("2000-01-01")), + (1, Timestamp("2000-01-02")), + (2, Timestamp("2000-01-01")), + (2, Timestamp("2000-01-02")), + ] + ) + tm.assert_numpy_array_equal(mi.values, etalon) + + +def test_from_product_rangeindex(): + # RangeIndex is preserved by factorize, so preserved in levels + rng = Index(range(5)) + other = ["a", "b"] + mi = MultiIndex.from_product([rng, other]) + tm.assert_index_equal(mi._levels[0], rng, exact=True) + + +@pytest.mark.parametrize("ordered", [False, True]) +@pytest.mark.parametrize("f", [lambda x: x, lambda x: Series(x), lambda x: x.values]) +def test_from_product_index_series_categorical(ordered, f): + # GH13743 + first = ["foo", "bar"] + + idx = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=ordered) + expected = pd.CategoricalIndex( + list("abcaab") + list("abcaab"), categories=list("bac"), ordered=ordered + ) + + result = MultiIndex.from_product([first, f(idx)]) + tm.assert_index_equal(result.get_level_values(1), expected) + + +def test_from_product(): + first = ["foo", "bar", "buz"] + second = ["a", "b", "c"] + names = ["first", "second"] + result = MultiIndex.from_product([first, second], names=names) + + tuples = [ + ("foo", "a"), + ("foo", "b"), + ("foo", "c"), + ("bar", "a"), + ("bar", "b"), + ("bar", "c"), + ("buz", "a"), + ("buz", "b"), + ("buz", "c"), + ] + expected = MultiIndex.from_tuples(tuples, names=names) + + tm.assert_index_equal(result, expected) + + +def test_from_product_iterator(): + # GH 18434 + first = ["foo", "bar", "buz"] + second = ["a", "b", "c"] + names = ["first", "second"] + tuples = [ + ("foo", "a"), + ("foo", "b"), + ("foo", "c"), + ("bar", "a"), + ("bar", "b"), + ("bar", "c"), + ("buz", "a"), + ("buz", "b"), + ("buz", "c"), + ] + expected = MultiIndex.from_tuples(tuples, names=names) + + # iterator as input + result = MultiIndex.from_product(iter([first, second]), names=names) + tm.assert_index_equal(result, expected) + + # Invalid non-iterable input + msg = "Input must be a list / sequence of iterables." + with pytest.raises(TypeError, match=msg): + MultiIndex.from_product(0) + + +@pytest.mark.parametrize( + "a, b, expected_names", + [ + ( + Series([1, 2, 3], name="foo"), + Series(["a", "b"], name="bar"), + ["foo", "bar"], + ), + (Series([1, 2, 3], name="foo"), ["a", "b"], ["foo", None]), + ([1, 2, 3], ["a", "b"], None), + ], +) +def test_from_product_infer_names(a, b, expected_names): + # GH27292 + result = MultiIndex.from_product([a, b]) + expected = MultiIndex( + levels=[[1, 2, 3], ["a", "b"]], + codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], + names=expected_names, + ) + tm.assert_index_equal(result, expected) + + +def test_from_product_respects_none_names(): + # GH27292 + a = Series([1, 2, 3], name="foo") + b = Series(["a", "b"], name="bar") + + result = MultiIndex.from_product([a, b], names=None) + expected = MultiIndex( + levels=[[1, 2, 3], ["a", "b"]], + codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], + names=None, + ) + tm.assert_index_equal(result, expected) + + +def test_from_product_readonly(): + # GH#15286 passing read-only array to from_product + a = np.array(range(3)) + b = ["a", "b"] + expected = MultiIndex.from_product([a, b]) + + a.setflags(write=False) + result = MultiIndex.from_product([a, b]) + tm.assert_index_equal(result, expected) + + +def test_create_index_existing_name(idx): + # GH11193, when an existing index is passed, and a new name is not + # specified, the new index should inherit the previous object name + index = idx + index.names = ["foo", "bar"] + result = Index(index) + expected = Index( + Index( + [ + ("foo", "one"), + ("foo", "two"), + ("bar", "one"), + ("baz", "two"), + ("qux", "one"), + ("qux", "two"), + ], + dtype="object", + ) + ) + tm.assert_index_equal(result, expected) + + result = Index(index, name="A") + expected = Index( + Index( + [ + ("foo", "one"), + ("foo", "two"), + ("bar", "one"), + ("baz", "two"), + ("qux", "one"), + ("qux", "two"), + ], + dtype="object", + ), + name="A", + ) + tm.assert_index_equal(result, expected) + + +# ---------------------------------------------------------------------------- +# from_frame +# ---------------------------------------------------------------------------- +def test_from_frame(): + # GH 22420 + df = pd.DataFrame( + [["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]], columns=["L1", "L2"] + ) + expected = MultiIndex.from_tuples( + [("a", "a"), ("a", "b"), ("b", "a"), ("b", "b")], names=["L1", "L2"] + ) + result = MultiIndex.from_frame(df) + tm.assert_index_equal(expected, result) + + +def test_from_frame_missing_values_multiIndex(): + # GH 39984 + pa = pytest.importorskip("pyarrow") + + df = pd.DataFrame( + { + "a": Series([1, 2, None], dtype="Int64"), + "b": pd.Float64Dtype().__from_arrow__(pa.array([0.2, np.nan, None])), + } + ) + multi_indexed = MultiIndex.from_frame(df) + expected = MultiIndex.from_arrays( + [ + Series([1, 2, None]).astype("Int64"), + pd.Float64Dtype().__from_arrow__(pa.array([0.2, np.nan, None])), + ], + names=["a", "b"], + ) + tm.assert_index_equal(multi_indexed, expected) + + +@pytest.mark.parametrize( + "non_frame", + [ + Series([1, 2, 3, 4]), + [1, 2, 3, 4], + [[1, 2], [3, 4], [5, 6]], + Index([1, 2, 3, 4]), + np.array([[1, 2], [3, 4], [5, 6]]), + 27, + ], +) +def test_from_frame_error(non_frame): + # GH 22420 + with pytest.raises(TypeError, match="Input must be a DataFrame"): + MultiIndex.from_frame(non_frame) + + +def test_from_frame_dtype_fidelity(): + # GH 22420 + df = pd.DataFrame( + { + "dates": date_range("19910905", periods=6, tz="US/Eastern"), + "a": [1, 1, 1, 2, 2, 2], + "b": pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True), + "c": ["x", "x", "y", "z", "x", "y"], + } + ) + original_dtypes = df.dtypes.to_dict() + + expected_mi = MultiIndex.from_arrays( + [ + date_range("19910905", periods=6, tz="US/Eastern"), + [1, 1, 1, 2, 2, 2], + pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True), + ["x", "x", "y", "z", "x", "y"], + ], + names=["dates", "a", "b", "c"], + ) + mi = MultiIndex.from_frame(df) + mi_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)} + + tm.assert_index_equal(expected_mi, mi) + assert original_dtypes == mi_dtypes + + +@pytest.mark.parametrize( + "names_in,names_out", [(None, [("L1", "x"), ("L2", "y")]), (["x", "y"], ["x", "y"])] +) +def test_from_frame_valid_names(names_in, names_out): + # GH 22420 + df = pd.DataFrame( + [["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]], + columns=MultiIndex.from_tuples([("L1", "x"), ("L2", "y")]), + ) + mi = MultiIndex.from_frame(df, names=names_in) + assert mi.names == names_out + + +@pytest.mark.parametrize( + "names,expected_error_msg", + [ + ("bad_input", "Names should be list-like for a MultiIndex"), + (["a", "b", "c"], "Length of names must match number of levels in MultiIndex"), + ], +) +def test_from_frame_invalid_names(names, expected_error_msg): + # GH 22420 + df = pd.DataFrame( + [["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]], + columns=MultiIndex.from_tuples([("L1", "x"), ("L2", "y")]), + ) + with pytest.raises(ValueError, match=expected_error_msg): + MultiIndex.from_frame(df, names=names) + + +def test_index_equal_empty_iterable(): + # #16844 + a = MultiIndex(levels=[[], []], codes=[[], []], names=["a", "b"]) + b = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"]) + tm.assert_index_equal(a, b) + + +def test_raise_invalid_sortorder(): + # Test that the MultiIndex constructor raise when a incorrect sortorder is given + # GH#28518 + + levels = [[0, 1], [0, 1, 2]] + + # Correct sortorder + MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2 + ) + + with pytest.raises(ValueError, match=r".* sortorder 2 with lexsort_depth 1.*"): + MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=2 + ) + + with pytest.raises(ValueError, match=r".* sortorder 1 with lexsort_depth 0.*"): + MultiIndex( + levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=1 + ) + + +def test_datetimeindex(): + idx1 = pd.DatetimeIndex( + ["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"] * 2, tz="Asia/Tokyo" + ) + idx2 = date_range("2010/01/01", periods=6, freq="ME", tz="US/Eastern") + idx = MultiIndex.from_arrays([idx1, idx2]) + + expected1 = pd.DatetimeIndex( + ["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"], tz="Asia/Tokyo" + ) + + tm.assert_index_equal(idx.levels[0], expected1) + tm.assert_index_equal(idx.levels[1], idx2) + + # from datetime combos + # GH 7888 + date1 = np.datetime64("today") + date2 = datetime.today() + date3 = Timestamp.today() + + for d1, d2 in itertools.product([date1, date2, date3], [date1, date2, date3]): + index = MultiIndex.from_product([[d1], [d2]]) + assert isinstance(index.levels[0], pd.DatetimeIndex) + assert isinstance(index.levels[1], pd.DatetimeIndex) + + # but NOT date objects, matching Index behavior + date4 = date.today() + index = MultiIndex.from_product([[date4], [date2]]) + assert not isinstance(index.levels[0], pd.DatetimeIndex) + assert isinstance(index.levels[1], pd.DatetimeIndex) + + +def test_constructor_with_tz(): + index = pd.DatetimeIndex( + ["2013/01/01 09:00", "2013/01/02 09:00"], name="dt1", tz="US/Pacific" + ) + columns = pd.DatetimeIndex( + ["2014/01/01 09:00", "2014/01/02 09:00"], name="dt2", tz="Asia/Tokyo" + ) + + result = MultiIndex.from_arrays([index, columns]) + + assert result.names == ["dt1", "dt2"] + tm.assert_index_equal(result.levels[0], index) + tm.assert_index_equal(result.levels[1], columns) + + result = MultiIndex.from_arrays([Series(index), Series(columns)]) + + assert result.names == ["dt1", "dt2"] + tm.assert_index_equal(result.levels[0], index) + tm.assert_index_equal(result.levels[1], columns) + + +def test_multiindex_inference_consistency(): + # check that inference behavior matches the base class + + v = date.today() + + arr = [v, v] + + idx = Index(arr) + assert idx.dtype == object + + mi = MultiIndex.from_arrays([arr]) + lev = mi.levels[0] + assert lev.dtype == object + + mi = MultiIndex.from_product([arr]) + lev = mi.levels[0] + assert lev.dtype == object + + mi = MultiIndex.from_tuples([(x,) for x in arr]) + lev = mi.levels[0] + assert lev.dtype == object + + +def test_dtype_representation(using_infer_string): + # GH#46900 + pmidx = MultiIndex.from_arrays([[1], ["a"]], names=[("a", "b"), ("c", "d")]) + result = pmidx.dtypes + exp = "object" if not using_infer_string else pd.StringDtype(na_value=np.nan) + expected = Series( + ["int64", exp], + index=MultiIndex.from_tuples([("a", "b"), ("c", "d")]), + dtype=object, + ) + tm.assert_series_equal(result, expected) diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_conversion.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_conversion.py new file mode 100644 index 0000000000000000000000000000000000000000..d62bd5438a1e39e2a371b731f7b74c48cd0cc3e3 --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_conversion.py @@ -0,0 +1,201 @@ +import numpy as np +import pytest + +from pandas.compat.numpy import np_version_gt2 + +import pandas as pd +from pandas import ( + DataFrame, + MultiIndex, +) +import pandas._testing as tm + + +def test_to_numpy(idx): + result = idx.to_numpy() + exp = idx.values + tm.assert_numpy_array_equal(result, exp) + + +def test_array_interface(idx): + # https://github.com/pandas-dev/pandas/pull/60046 + result = np.asarray(idx) + expected = np.empty((6,), dtype=object) + expected[:] = [ + ("foo", "one"), + ("foo", "two"), + ("bar", "one"), + ("baz", "two"), + ("qux", "one"), + ("qux", "two"), + ] + tm.assert_numpy_array_equal(result, expected) + + # it always gives a copy by default, but the values are cached, so results + # are still sharing memory + result_copy1 = np.asarray(idx) + result_copy2 = np.asarray(idx) + assert np.may_share_memory(result_copy1, result_copy2) + + # with explicit copy=True, then it is an actual copy + result_copy1 = np.array(idx, copy=True) + result_copy2 = np.array(idx, copy=True) + assert not np.may_share_memory(result_copy1, result_copy2) + + if not np_version_gt2: + # copy=False semantics are only supported in NumPy>=2. + return + + # for MultiIndex, copy=False is never allowed + msg = "Starting with NumPy 2.0, the behavior of the 'copy' keyword has changed" + with tm.assert_produces_warning(FutureWarning, match=msg): + np.array(idx, copy=False) + + +def test_to_frame(): + tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")] + + index = MultiIndex.from_tuples(tuples) + result = index.to_frame(index=False) + expected = DataFrame(tuples) + tm.assert_frame_equal(result, expected) + + result = index.to_frame() + expected.index = index + tm.assert_frame_equal(result, expected) + + tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")] + index = MultiIndex.from_tuples(tuples, names=["first", "second"]) + result = index.to_frame(index=False) + expected = DataFrame(tuples) + expected.columns = ["first", "second"] + tm.assert_frame_equal(result, expected) + + result = index.to_frame() + expected.index = index + tm.assert_frame_equal(result, expected) + + # See GH-22580 + index = MultiIndex.from_tuples(tuples) + result = index.to_frame(index=False, name=["first", "second"]) + expected = DataFrame(tuples) + expected.columns = ["first", "second"] + tm.assert_frame_equal(result, expected) + + result = index.to_frame(name=["first", "second"]) + expected.index = index + expected.columns = ["first", "second"] + tm.assert_frame_equal(result, expected) + + msg = "'name' must be a list / sequence of column names." + with pytest.raises(TypeError, match=msg): + index.to_frame(name="first") + + msg = "'name' should have same length as number of levels on index." + with pytest.raises(ValueError, match=msg): + index.to_frame(name=["first"]) + + # Tests for datetime index + index = MultiIndex.from_product([range(5), pd.date_range("20130101", periods=3)]) + result = index.to_frame(index=False) + expected = DataFrame( + { + 0: np.repeat(np.arange(5, dtype="int64"), 3), + 1: np.tile(pd.date_range("20130101", periods=3), 5), + } + ) + tm.assert_frame_equal(result, expected) + + result = index.to_frame() + expected.index = index + tm.assert_frame_equal(result, expected) + + # See GH-22580 + result = index.to_frame(index=False, name=["first", "second"]) + expected = DataFrame( + { + "first": np.repeat(np.arange(5, dtype="int64"), 3), + "second": np.tile(pd.date_range("20130101", periods=3), 5), + } + ) + tm.assert_frame_equal(result, expected) + + result = index.to_frame(name=["first", "second"]) + expected.index = index + tm.assert_frame_equal(result, expected) + + +def test_to_frame_dtype_fidelity(): + # GH 22420 + mi = MultiIndex.from_arrays( + [ + pd.date_range("19910905", periods=6, tz="US/Eastern"), + [1, 1, 1, 2, 2, 2], + pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True), + ["x", "x", "y", "z", "x", "y"], + ], + names=["dates", "a", "b", "c"], + ) + original_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)} + + expected_df = DataFrame( + { + "dates": pd.date_range("19910905", periods=6, tz="US/Eastern"), + "a": [1, 1, 1, 2, 2, 2], + "b": pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True), + "c": ["x", "x", "y", "z", "x", "y"], + } + ) + df = mi.to_frame(index=False) + df_dtypes = df.dtypes.to_dict() + + tm.assert_frame_equal(df, expected_df) + assert original_dtypes == df_dtypes + + +def test_to_frame_resulting_column_order(): + # GH 22420 + expected = ["z", 0, "a"] + mi = MultiIndex.from_arrays( + [["a", "b", "c"], ["x", "y", "z"], ["q", "w", "e"]], names=expected + ) + result = mi.to_frame().columns.tolist() + assert result == expected + + +def test_to_frame_duplicate_labels(): + # GH 45245 + data = [(1, 2), (3, 4)] + names = ["a", "a"] + index = MultiIndex.from_tuples(data, names=names) + with pytest.raises(ValueError, match="Cannot create duplicate column labels"): + index.to_frame() + + result = index.to_frame(allow_duplicates=True) + expected = DataFrame(data, index=index, columns=names) + tm.assert_frame_equal(result, expected) + + names = [None, 0] + index = MultiIndex.from_tuples(data, names=names) + with pytest.raises(ValueError, match="Cannot create duplicate column labels"): + index.to_frame() + + result = index.to_frame(allow_duplicates=True) + expected = DataFrame(data, index=index, columns=[0, 0]) + tm.assert_frame_equal(result, expected) + + +def test_to_flat_index(idx): + expected = pd.Index( + ( + ("foo", "one"), + ("foo", "two"), + ("bar", "one"), + ("baz", "two"), + ("qux", "one"), + ("qux", "two"), + ), + tupleize_cols=False, + ) + result = idx.to_flat_index() + tm.assert_index_equal(result, expected) diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_copy.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_copy.py new file mode 100644 index 0000000000000000000000000000000000000000..2e09a580f9528bc8197d55c6a7533098e0129fa2 --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_copy.py @@ -0,0 +1,96 @@ +from copy import ( + copy, + deepcopy, +) + +import pytest + +from pandas import MultiIndex +import pandas._testing as tm + + +def assert_multiindex_copied(copy, original): + # Levels should be (at least, shallow copied) + tm.assert_copy(copy.levels, original.levels) + tm.assert_almost_equal(copy.codes, original.codes) + + # Labels doesn't matter which way copied + tm.assert_almost_equal(copy.codes, original.codes) + assert copy.codes is not original.codes + + # Names doesn't matter which way copied + assert copy.names == original.names + assert copy.names is not original.names + + # Sort order should be copied + assert copy.sortorder == original.sortorder + + +def test_copy(idx): + i_copy = idx.copy() + + assert_multiindex_copied(i_copy, idx) + + +def test_shallow_copy(idx): + i_copy = idx._view() + + assert_multiindex_copied(i_copy, idx) + + +def test_view(idx): + i_view = idx.view() + assert_multiindex_copied(i_view, idx) + + +@pytest.mark.parametrize("func", [copy, deepcopy]) +def test_copy_and_deepcopy(func): + idx = MultiIndex( + levels=[["foo", "bar"], ["fizz", "buzz"]], + codes=[[0, 0, 0, 1], [0, 0, 1, 1]], + names=["first", "second"], + ) + idx_copy = func(idx) + assert idx_copy is not idx + assert idx_copy.equals(idx) + + +@pytest.mark.parametrize("deep", [True, False]) +def test_copy_method(deep): + idx = MultiIndex( + levels=[["foo", "bar"], ["fizz", "buzz"]], + codes=[[0, 0, 0, 1], [0, 0, 1, 1]], + names=["first", "second"], + ) + idx_copy = idx.copy(deep=deep) + assert idx_copy.equals(idx) + + +@pytest.mark.parametrize("deep", [True, False]) +@pytest.mark.parametrize( + "kwarg, value", + [ + ("names", ["third", "fourth"]), + ], +) +def test_copy_method_kwargs(deep, kwarg, value): + # gh-12309: Check that the "name" argument as well other kwargs are honored + idx = MultiIndex( + levels=[["foo", "bar"], ["fizz", "buzz"]], + codes=[[0, 0, 0, 1], [0, 0, 1, 1]], + names=["first", "second"], + ) + idx_copy = idx.copy(**{kwarg: value, "deep": deep}) + assert getattr(idx_copy, kwarg) == value + + +def test_copy_deep_false_retains_id(): + # GH#47878 + idx = MultiIndex( + levels=[["foo", "bar"], ["fizz", "buzz"]], + codes=[[0, 0, 0, 1], [0, 0, 1, 1]], + names=["first", "second"], + ) + + res = idx.copy(deep=False) + assert res._id is idx._id diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_drop.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_drop.py new file mode 100644 index 0000000000000000000000000000000000000000..99c8ebb1e57b22059d5a545a79de7b8348d73b14 --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_drop.py @@ -0,0 +1,190 @@ +import numpy as np +import pytest + +from pandas.errors import PerformanceWarning + +import pandas as pd +from pandas import ( + Index, + MultiIndex, +) +import pandas._testing as tm + + +def test_drop(idx): + dropped = idx.drop([("foo", "two"), ("qux", "one")]) + + index = MultiIndex.from_tuples([("foo", "two"), ("qux", "one")]) + dropped2 = idx.drop(index) + + expected = idx[[0, 2, 3, 5]] + tm.assert_index_equal(dropped, expected) + tm.assert_index_equal(dropped2, expected) + + dropped = idx.drop(["bar"]) + expected = idx[[0, 1, 3, 4, 5]] + tm.assert_index_equal(dropped, expected) + + dropped = idx.drop("foo") + expected = idx[[2, 3, 4, 5]] + tm.assert_index_equal(dropped, expected) + + index = MultiIndex.from_tuples([("bar", "two")]) + with pytest.raises(KeyError, match=r"^\('bar', 'two'\)$"): + idx.drop([("bar", "two")]) + with pytest.raises(KeyError, match=r"^\('bar', 'two'\)$"): + idx.drop(index) + with pytest.raises(KeyError, match=r"^'two'$"): + idx.drop(["foo", "two"]) + + # partially correct argument + mixed_index = MultiIndex.from_tuples([("qux", "one"), ("bar", "two")]) + with pytest.raises(KeyError, match=r"^\('bar', 'two'\)$"): + idx.drop(mixed_index) + + # error='ignore' + dropped = idx.drop(index, errors="ignore") + expected = idx[[0, 1, 2, 3, 4, 5]] + tm.assert_index_equal(dropped, expected) + + dropped = idx.drop(mixed_index, errors="ignore") + expected = idx[[0, 1, 2, 3, 5]] + tm.assert_index_equal(dropped, expected) + + dropped = idx.drop(["foo", "two"], errors="ignore") + expected = idx[[2, 3, 4, 5]] + tm.assert_index_equal(dropped, expected) + + # mixed partial / full drop + dropped = idx.drop(["foo", ("qux", "one")]) + expected = idx[[2, 3, 5]] + tm.assert_index_equal(dropped, expected) + + # mixed partial / full drop / error='ignore' + mixed_index = ["foo", ("qux", "one"), "two"] + with pytest.raises(KeyError, match=r"^'two'$"): + idx.drop(mixed_index) + dropped = idx.drop(mixed_index, errors="ignore") + expected = idx[[2, 3, 5]] + tm.assert_index_equal(dropped, expected) + + +def test_droplevel_with_names(idx): + index = idx[idx.get_loc("foo")] + dropped = index.droplevel(0) + assert dropped.name == "second" + + index = MultiIndex( + levels=[Index(range(4)), Index(range(4)), Index(range(4))], + codes=[ + np.array([0, 0, 1, 2, 2, 2, 3, 3]), + np.array([0, 1, 0, 0, 0, 1, 0, 1]), + np.array([1, 0, 1, 1, 0, 0, 1, 0]), + ], + names=["one", "two", "three"], + ) + dropped = index.droplevel(0) + assert dropped.names == ("two", "three") + + dropped = index.droplevel("two") + expected = index.droplevel(1) + assert dropped.equals(expected) + + +def test_droplevel_list(): + index = MultiIndex( + levels=[Index(range(4)), Index(range(4)), Index(range(4))], + codes=[ + np.array([0, 0, 1, 2, 2, 2, 3, 3]), + np.array([0, 1, 0, 0, 0, 1, 0, 1]), + np.array([1, 0, 1, 1, 0, 0, 1, 0]), + ], + names=["one", "two", "three"], + ) + + dropped = index[:2].droplevel(["three", "one"]) + expected = index[:2].droplevel(2).droplevel(0) + assert dropped.equals(expected) + + dropped = index[:2].droplevel([]) + expected = index[:2] + assert dropped.equals(expected) + + msg = ( + "Cannot remove 3 levels from an index with 3 levels: " + "at least one level must be left" + ) + with pytest.raises(ValueError, match=msg): + index[:2].droplevel(["one", "two", "three"]) + + with pytest.raises(KeyError, match="'Level four not found'"): + index[:2].droplevel(["one", "four"]) + + +def test_drop_not_lexsorted(): + # GH 12078 + + # define the lexsorted version of the multi-index + tuples = [("a", ""), ("b1", "c1"), ("b2", "c2")] + lexsorted_mi = MultiIndex.from_tuples(tuples, names=["b", "c"]) + assert lexsorted_mi._is_lexsorted() + + # and the not-lexsorted version + df = pd.DataFrame( + columns=["a", "b", "c", "d"], data=[[1, "b1", "c1", 3], [1, "b2", "c2", 4]] + ) + df = df.pivot_table(index="a", columns=["b", "c"], values="d") + df = df.reset_index() + not_lexsorted_mi = df.columns + assert not not_lexsorted_mi._is_lexsorted() + + # compare the results + tm.assert_index_equal(lexsorted_mi, not_lexsorted_mi) + with tm.assert_produces_warning(PerformanceWarning): + tm.assert_index_equal(lexsorted_mi.drop("a"), not_lexsorted_mi.drop("a")) + + +def test_drop_with_nan_in_index(nulls_fixture): + # GH#18853 + mi = MultiIndex.from_tuples([("blah", nulls_fixture)], names=["name", "date"]) + msg = r"labels \[Timestamp\('2001-01-01 00:00:00'\)\] not found in level" + with pytest.raises(KeyError, match=msg): + mi.drop(pd.Timestamp("2001"), level="date") + + +@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning") +def test_drop_with_non_monotonic_duplicates(): + # GH#33494 + mi = MultiIndex.from_tuples([(1, 2), (2, 3), (1, 2)]) + result = mi.drop((1, 2)) + expected = MultiIndex.from_tuples([(2, 3)]) + tm.assert_index_equal(result, expected) + + +def test_single_level_drop_partially_missing_elements(): + # GH 37820 + + mi = MultiIndex.from_tuples([(1, 2), (2, 2), (3, 2)]) + msg = r"labels \[4\] not found in level" + with pytest.raises(KeyError, match=msg): + mi.drop(4, level=0) + with pytest.raises(KeyError, match=msg): + mi.drop([1, 4], level=0) + msg = r"labels \[nan\] not found in level" + with pytest.raises(KeyError, match=msg): + mi.drop([np.nan], level=0) + with pytest.raises(KeyError, match=msg): + mi.drop([np.nan, 1, 2, 3], level=0) + + mi = MultiIndex.from_tuples([(np.nan, 1), (1, 2)]) + msg = r"labels \['a'\] not found in level" + with pytest.raises(KeyError, match=msg): + mi.drop([np.nan, 1, "a"], level=0) + + +def test_droplevel_multiindex_one_level(): + # GH#37208 + index = MultiIndex.from_tuples([(2,)], names=("b",)) + result = index.droplevel([]) + expected = Index([2], name="b") + tm.assert_index_equal(result, expected) diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_equivalence.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_equivalence.py new file mode 100644 index 0000000000000000000000000000000000000000..9babbd5b8d56d64d704978758efb81f8d730274f --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_equivalence.py @@ -0,0 +1,284 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_any_real_numeric_dtype + +import pandas as pd +from pandas import ( + Index, + MultiIndex, + Series, +) +import pandas._testing as tm + + +def test_equals(idx): + assert idx.equals(idx) + assert idx.equals(idx.copy()) + assert idx.equals(idx.astype(object)) + assert idx.equals(idx.to_flat_index()) + assert idx.equals(idx.to_flat_index().astype("category")) + + assert not idx.equals(list(idx)) + assert not idx.equals(np.array(idx)) + + same_values = Index(idx, dtype=object) + assert idx.equals(same_values) + assert same_values.equals(idx) + + if idx.nlevels == 1: + # do not test MultiIndex + assert not idx.equals(Series(idx)) + + +def test_equals_op(idx): + # GH9947, GH10637 + index_a = idx + + n = len(index_a) + index_b = index_a[0:-1] + index_c = index_a[0:-1].append(index_a[-2:-1]) + index_d = index_a[0:1] + with pytest.raises(ValueError, match="Lengths must match"): + index_a == index_b + expected1 = np.array([True] * n) + expected2 = np.array([True] * (n - 1) + [False]) + tm.assert_numpy_array_equal(index_a == index_a, expected1) + tm.assert_numpy_array_equal(index_a == index_c, expected2) + + # test comparisons with numpy arrays + array_a = np.array(index_a) + array_b = np.array(index_a[0:-1]) + array_c = np.array(index_a[0:-1].append(index_a[-2:-1])) + array_d = np.array(index_a[0:1]) + with pytest.raises(ValueError, match="Lengths must match"): + index_a == array_b + tm.assert_numpy_array_equal(index_a == array_a, expected1) + tm.assert_numpy_array_equal(index_a == array_c, expected2) + + # test comparisons with Series + series_a = Series(array_a) + series_b = Series(array_b) + series_c = Series(array_c) + series_d = Series(array_d) + with pytest.raises(ValueError, match="Lengths must match"): + index_a == series_b + + tm.assert_numpy_array_equal(index_a == series_a, expected1) + tm.assert_numpy_array_equal(index_a == series_c, expected2) + + # cases where length is 1 for one of them + with pytest.raises(ValueError, match="Lengths must match"): + index_a == index_d + with pytest.raises(ValueError, match="Lengths must match"): + index_a == series_d + with pytest.raises(ValueError, match="Lengths must match"): + index_a == array_d + msg = "Can only compare identically-labeled Series objects" + with pytest.raises(ValueError, match=msg): + series_a == series_d + with pytest.raises(ValueError, match="Lengths must match"): + series_a == array_d + + # comparing with a scalar should broadcast; note that we are excluding + # MultiIndex because in this case each item in the index is a tuple of + # length 2, and therefore is considered an array of length 2 in the + # comparison instead of a scalar + if not isinstance(index_a, MultiIndex): + expected3 = np.array([False] * (len(index_a) - 2) + [True, False]) + # assuming the 2nd to last item is unique in the data + item = index_a[-2] + tm.assert_numpy_array_equal(index_a == item, expected3) + tm.assert_series_equal(series_a == item, Series(expected3)) + + +def test_compare_tuple(): + # GH#21517 + mi = MultiIndex.from_product([[1, 2]] * 2) + + all_false = np.array([False, False, False, False]) + + result = mi == mi[0] + expected = np.array([True, False, False, False]) + tm.assert_numpy_array_equal(result, expected) + + result = mi != mi[0] + tm.assert_numpy_array_equal(result, ~expected) + + result = mi < mi[0] + tm.assert_numpy_array_equal(result, all_false) + + result = mi <= mi[0] + tm.assert_numpy_array_equal(result, expected) + + result = mi > mi[0] + tm.assert_numpy_array_equal(result, ~expected) + + result = mi >= mi[0] + tm.assert_numpy_array_equal(result, ~all_false) + + +def test_compare_tuple_strs(): + # GH#34180 + + mi = MultiIndex.from_tuples([("a", "b"), ("b", "c"), ("c", "a")]) + + result = mi == ("c", "a") + expected = np.array([False, False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = mi == ("c",) + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(result, expected) + + +def test_equals_multi(idx): + assert idx.equals(idx) + assert not idx.equals(idx.values) + assert idx.equals(Index(idx.values)) + + assert idx.equal_levels(idx) + assert not idx.equals(idx[:-1]) + assert not idx.equals(idx[-1]) + + # different number of levels + index = MultiIndex( + levels=[Index(list(range(4))), Index(list(range(4))), Index(list(range(4)))], + codes=[ + np.array([0, 0, 1, 2, 2, 2, 3, 3]), + np.array([0, 1, 0, 0, 0, 1, 0, 1]), + np.array([1, 0, 1, 1, 0, 0, 1, 0]), + ], + ) + + index2 = MultiIndex(levels=index.levels[:-1], codes=index.codes[:-1]) + assert not index.equals(index2) + assert not index.equal_levels(index2) + + # levels are different + major_axis = Index(list(range(4))) + minor_axis = Index(list(range(2))) + + major_codes = np.array([0, 0, 1, 2, 2, 3]) + minor_codes = np.array([0, 1, 0, 0, 1, 0]) + + index = MultiIndex( + levels=[major_axis, minor_axis], codes=[major_codes, minor_codes] + ) + assert not idx.equals(index) + assert not idx.equal_levels(index) + + # some of the labels are different + major_axis = Index(["foo", "bar", "baz", "qux"]) + minor_axis = Index(["one", "two"]) + + major_codes = np.array([0, 0, 2, 2, 3, 3]) + minor_codes = np.array([0, 1, 0, 1, 0, 1]) + + index = MultiIndex( + levels=[major_axis, minor_axis], codes=[major_codes, minor_codes] + ) + assert not idx.equals(index) + + +def test_identical(idx): + mi = idx.copy() + mi2 = idx.copy() + assert mi.identical(mi2) + + mi = mi.set_names(["new1", "new2"]) + assert mi.equals(mi2) + assert not mi.identical(mi2) + + mi2 = mi2.set_names(["new1", "new2"]) + assert mi.identical(mi2) + + mi4 = Index(mi.tolist(), tupleize_cols=False) + assert not mi.identical(mi4) + assert mi.equals(mi4) + + +def test_equals_operator(idx): + # GH9785 + assert (idx == idx).all() + + +def test_equals_missing_values(): + # make sure take is not using -1 + i = MultiIndex.from_tuples([(0, pd.NaT), (0, pd.Timestamp("20130101"))]) + result = i[0:1].equals(i[0]) + assert not result + result = i[1:2].equals(i[1]) + assert not result + + +def test_equals_missing_values_differently_sorted(): + # GH#38439 + mi1 = MultiIndex.from_tuples([(81.0, np.nan), (np.nan, np.nan)]) + mi2 = MultiIndex.from_tuples([(np.nan, np.nan), (81.0, np.nan)]) + assert not mi1.equals(mi2) + + mi2 = MultiIndex.from_tuples([(81.0, np.nan), (np.nan, np.nan)]) + assert mi1.equals(mi2) + + +def test_is_(): + mi = MultiIndex.from_tuples(zip(range(10), range(10))) + assert mi.is_(mi) + assert mi.is_(mi.view()) + assert mi.is_(mi.view().view().view().view()) + mi2 = mi.view() + # names are metadata, they don't change id + mi2.names = ["A", "B"] + assert mi2.is_(mi) + assert mi.is_(mi2) + + assert not mi.is_(mi.set_names(["C", "D"])) + # levels are inherent properties, they change identity + mi3 = mi2.set_levels([list(range(10)), list(range(10))]) + assert not mi3.is_(mi2) + # shouldn't change + assert mi2.is_(mi) + mi4 = mi3.view() + + # GH 17464 - Remove duplicate MultiIndex levels + mi4 = mi4.set_levels([list(range(10)), list(range(10))]) + assert not mi4.is_(mi3) + mi5 = mi.view() + mi5 = mi5.set_levels(mi5.levels) + assert not mi5.is_(mi) + + +def test_is_all_dates(idx): + assert not idx._is_all_dates + + +def test_is_numeric(idx): + # MultiIndex is never numeric + assert not is_any_real_numeric_dtype(idx) + + +def test_multiindex_compare(): + # GH 21149 + # Ensure comparison operations for MultiIndex with nlevels == 1 + # behave consistently with those for MultiIndex with nlevels > 1 + + midx = MultiIndex.from_product([[0, 1]]) + + # Equality self-test: MultiIndex object vs self + expected = Series([True, True]) + result = Series(midx == midx) + tm.assert_series_equal(result, expected) + + # Greater than comparison: MultiIndex object vs self + expected = Series([False, False]) + result = Series(midx > midx) + tm.assert_series_equal(result, expected) + + +def test_equals_ea_int_regular_int(): + # GH#46026 + mi1 = MultiIndex.from_arrays([Index([1, 2], dtype="Int64"), [3, 4]]) + mi2 = MultiIndex.from_arrays([[1, 2], [3, 4]]) + assert not mi1.equals(mi2) + assert not mi2.equals(mi1) diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_formats.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_formats.py new file mode 100644 index 0000000000000000000000000000000000000000..52ff3109128f24f43d9a12527d08770b463459a5 --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_formats.py @@ -0,0 +1,249 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Index, + MultiIndex, +) +import pandas._testing as tm + + +def test_format(idx): + msg = "MultiIndex.format is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + idx.format() + idx[:0].format() + + +def test_format_integer_names(): + index = MultiIndex( + levels=[[0, 1], [0, 1]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]], names=[0, 1] + ) + msg = "MultiIndex.format is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + index.format(names=True) + + +def test_format_sparse_config(idx): + # GH1538 + msg = "MultiIndex.format is deprecated" + with pd.option_context("display.multi_sparse", False): + with tm.assert_produces_warning(FutureWarning, match=msg): + result = idx.format() + assert result[1] == "foo two" + + +def test_format_sparse_display(): + index = MultiIndex( + levels=[[0, 1], [0, 1], [0, 1], [0]], + codes=[ + [0, 0, 0, 1, 1, 1], + [0, 0, 1, 0, 0, 1], + [0, 1, 0, 0, 1, 0], + [0, 0, 0, 0, 0, 0], + ], + ) + msg = "MultiIndex.format is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = index.format() + assert result[3] == "1 0 0 0" + + +def test_repr_with_unicode_data(): + with pd.option_context("display.encoding", "UTF-8"): + d = {"a": ["\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} + index = pd.DataFrame(d).set_index(["a", "b"]).index + assert "\\" not in repr(index) # we don't want unicode-escaped + + +def test_repr_roundtrip_raises(): + mi = MultiIndex.from_product([list("ab"), range(3)], names=["first", "second"]) + msg = "Must pass both levels and codes" + with pytest.raises(TypeError, match=msg): + eval(repr(mi)) + + +def test_unicode_string_with_unicode(): + d = {"a": ["\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} + idx = pd.DataFrame(d).set_index(["a", "b"]).index + str(idx) + + +def test_repr_max_seq_item_setting(idx): + # GH10182 + idx = idx.repeat(50) + with pd.option_context("display.max_seq_items", None): + repr(idx) + assert "..." not in str(idx) + + +class TestRepr: + def test_unicode_repr_issues(self): + levels = [Index(["a/\u03c3", "b/\u03c3", "c/\u03c3"]), Index([0, 1])] + codes = [np.arange(3).repeat(2), np.tile(np.arange(2), 3)] + index = MultiIndex(levels=levels, codes=codes) + + repr(index.levels) + repr(index.get_level_values(1)) + + def test_repr_max_seq_items_equal_to_n(self, idx): + # display.max_seq_items == n + with pd.option_context("display.max_seq_items", 6): + result = idx.__repr__() + expected = """\ +MultiIndex([('foo', 'one'), + ('foo', 'two'), + ('bar', 'one'), + ('baz', 'two'), + ('qux', 'one'), + ('qux', 'two')], + names=['first', 'second'])""" + assert result == expected + + def test_repr(self, idx): + result = idx[:1].__repr__() + expected = """\ +MultiIndex([('foo', 'one')], + names=['first', 'second'])""" + assert result == expected + + result = idx.__repr__() + expected = """\ +MultiIndex([('foo', 'one'), + ('foo', 'two'), + ('bar', 'one'), + ('baz', 'two'), + ('qux', 'one'), + ('qux', 'two')], + names=['first', 'second'])""" + assert result == expected + + with pd.option_context("display.max_seq_items", 5): + result = idx.__repr__() + expected = """\ +MultiIndex([('foo', 'one'), + ('foo', 'two'), + ... + ('qux', 'one'), + ('qux', 'two')], + names=['first', 'second'], length=6)""" + assert result == expected + + # display.max_seq_items == 1 + with pd.option_context("display.max_seq_items", 1): + result = idx.__repr__() + expected = """\ +MultiIndex([... + ('qux', 'two')], + names=['first', ...], length=6)""" + assert result == expected + + def test_rjust(self): + n = 1000 + ci = pd.CategoricalIndex(list("a" * n) + (["abc"] * n)) + dti = pd.date_range("2000-01-01", freq="s", periods=n * 2) + mi = MultiIndex.from_arrays([ci, ci.codes + 9, dti], names=["a", "b", "dti"]) + result = mi[:1].__repr__() + expected = """\ +MultiIndex([('a', 9, '2000-01-01 00:00:00')], + names=['a', 'b', 'dti'])""" + assert result == expected + + result = mi[::500].__repr__() + expected = """\ +MultiIndex([( 'a', 9, '2000-01-01 00:00:00'), + ( 'a', 9, '2000-01-01 00:08:20'), + ('abc', 10, '2000-01-01 00:16:40'), + ('abc', 10, '2000-01-01 00:25:00')], + names=['a', 'b', 'dti'])""" + assert result == expected + + result = mi.__repr__() + expected = """\ +MultiIndex([( 'a', 9, '2000-01-01 00:00:00'), + ( 'a', 9, '2000-01-01 00:00:01'), + ( 'a', 9, '2000-01-01 00:00:02'), + ( 'a', 9, '2000-01-01 00:00:03'), + ( 'a', 9, '2000-01-01 00:00:04'), + ( 'a', 9, '2000-01-01 00:00:05'), + ( 'a', 9, '2000-01-01 00:00:06'), + ( 'a', 9, '2000-01-01 00:00:07'), + ( 'a', 9, '2000-01-01 00:00:08'), + ( 'a', 9, '2000-01-01 00:00:09'), + ... + ('abc', 10, '2000-01-01 00:33:10'), + ('abc', 10, '2000-01-01 00:33:11'), + ('abc', 10, '2000-01-01 00:33:12'), + ('abc', 10, '2000-01-01 00:33:13'), + ('abc', 10, '2000-01-01 00:33:14'), + ('abc', 10, '2000-01-01 00:33:15'), + ('abc', 10, '2000-01-01 00:33:16'), + ('abc', 10, '2000-01-01 00:33:17'), + ('abc', 10, '2000-01-01 00:33:18'), + ('abc', 10, '2000-01-01 00:33:19')], + names=['a', 'b', 'dti'], length=2000)""" + assert result == expected + + def test_tuple_width(self): + n = 1000 + ci = pd.CategoricalIndex(list("a" * n) + (["abc"] * n)) + dti = pd.date_range("2000-01-01", freq="s", periods=n * 2) + levels = [ci, ci.codes + 9, dti, dti, dti] + names = ["a", "b", "dti_1", "dti_2", "dti_3"] + mi = MultiIndex.from_arrays(levels, names=names) + result = mi[:1].__repr__() + expected = """MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...)], + names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])""" # noqa: E501 + assert result == expected + + result = mi[:10].__repr__() + expected = """\ +MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...), + ('a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...), + ('a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...), + ('a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...), + ('a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...), + ('a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...), + ('a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...), + ('a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...), + ('a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...), + ('a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...)], + names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])""" + assert result == expected + + result = mi.__repr__() + expected = """\ +MultiIndex([( 'a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...), + ( 'a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...), + ( 'a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...), + ( 'a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...), + ( 'a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...), + ( 'a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...), + ( 'a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...), + ( 'a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...), + ( 'a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...), + ( 'a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...), + ... + ('abc', 10, '2000-01-01 00:33:10', '2000-01-01 00:33:10', ...), + ('abc', 10, '2000-01-01 00:33:11', '2000-01-01 00:33:11', ...), + ('abc', 10, '2000-01-01 00:33:12', '2000-01-01 00:33:12', ...), + ('abc', 10, '2000-01-01 00:33:13', '2000-01-01 00:33:13', ...), + ('abc', 10, '2000-01-01 00:33:14', '2000-01-01 00:33:14', ...), + ('abc', 10, '2000-01-01 00:33:15', '2000-01-01 00:33:15', ...), + ('abc', 10, '2000-01-01 00:33:16', '2000-01-01 00:33:16', ...), + ('abc', 10, '2000-01-01 00:33:17', '2000-01-01 00:33:17', ...), + ('abc', 10, '2000-01-01 00:33:18', '2000-01-01 00:33:18', ...), + ('abc', 10, '2000-01-01 00:33:19', '2000-01-01 00:33:19', ...)], + names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'], length=2000)""" + assert result == expected + + def test_multiindex_long_element(self): + # Non-regression test towards GH#52960 + data = MultiIndex.from_tuples([("c" * 62,)]) + + expected = ( + "MultiIndex([('cccccccccccccccccccccccccccccccccccccccc" + "cccccccccccccccccccccc',)],\n )" + ) + assert str(data) == expected diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_integrity.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_integrity.py new file mode 100644 index 0000000000000000000000000000000000000000..d956747cbc859f40b69e52ea78c85ebce31f3427 --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_integrity.py @@ -0,0 +1,289 @@ +import re + +import numpy as np +import pytest + +from pandas._libs import index as libindex + +from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike + +import pandas as pd +from pandas import ( + Index, + IntervalIndex, + MultiIndex, + RangeIndex, +) +import pandas._testing as tm + + +def test_labels_dtypes(): + # GH 8456 + i = MultiIndex.from_tuples([("A", 1), ("A", 2)]) + assert i.codes[0].dtype == "int8" + assert i.codes[1].dtype == "int8" + + i = MultiIndex.from_product([["a"], range(40)]) + assert i.codes[1].dtype == "int8" + i = MultiIndex.from_product([["a"], range(400)]) + assert i.codes[1].dtype == "int16" + i = MultiIndex.from_product([["a"], range(40000)]) + assert i.codes[1].dtype == "int32" + + i = MultiIndex.from_product([["a"], range(1000)]) + assert (i.codes[0] >= 0).all() + assert (i.codes[1] >= 0).all() + + +def test_values_boxed(): + tuples = [ + (1, pd.Timestamp("2000-01-01")), + (2, pd.NaT), + (3, pd.Timestamp("2000-01-03")), + (1, pd.Timestamp("2000-01-04")), + (2, pd.Timestamp("2000-01-02")), + (3, pd.Timestamp("2000-01-03")), + ] + result = MultiIndex.from_tuples(tuples) + expected = construct_1d_object_array_from_listlike(tuples) + tm.assert_numpy_array_equal(result.values, expected) + # Check that code branches for boxed values produce identical results + tm.assert_numpy_array_equal(result.values[:4], result[:4].values) + + +def test_values_multiindex_datetimeindex(): + # Test to ensure we hit the boxing / nobox part of MI.values + ints = np.arange(10**18, 10**18 + 5) + naive = pd.DatetimeIndex(ints) + + aware = pd.DatetimeIndex(ints, tz="US/Central") + + idx = MultiIndex.from_arrays([naive, aware]) + result = idx.values + + outer = pd.DatetimeIndex([x[0] for x in result]) + tm.assert_index_equal(outer, naive) + + inner = pd.DatetimeIndex([x[1] for x in result]) + tm.assert_index_equal(inner, aware) + + # n_lev > n_lab + result = idx[:2].values + + outer = pd.DatetimeIndex([x[0] for x in result]) + tm.assert_index_equal(outer, naive[:2]) + + inner = pd.DatetimeIndex([x[1] for x in result]) + tm.assert_index_equal(inner, aware[:2]) + + +def test_values_multiindex_periodindex(): + # Test to ensure we hit the boxing / nobox part of MI.values + ints = np.arange(2007, 2012) + pidx = pd.PeriodIndex(ints, freq="D") + + idx = MultiIndex.from_arrays([ints, pidx]) + result = idx.values + + outer = Index([x[0] for x in result]) + tm.assert_index_equal(outer, Index(ints, dtype=np.int64)) + + inner = pd.PeriodIndex([x[1] for x in result]) + tm.assert_index_equal(inner, pidx) + + # n_lev > n_lab + result = idx[:2].values + + outer = Index([x[0] for x in result]) + tm.assert_index_equal(outer, Index(ints[:2], dtype=np.int64)) + + inner = pd.PeriodIndex([x[1] for x in result]) + tm.assert_index_equal(inner, pidx[:2]) + + +def test_consistency(): + # need to construct an overflow + major_axis = list(range(70000)) + minor_axis = list(range(10)) + + major_codes = np.arange(70000) + minor_codes = np.repeat(range(10), 7000) + + # the fact that is works means it's consistent + index = MultiIndex( + levels=[major_axis, minor_axis], codes=[major_codes, minor_codes] + ) + + # inconsistent + major_codes = np.array([0, 0, 1, 1, 1, 2, 2, 3, 3]) + minor_codes = np.array([0, 1, 0, 1, 1, 0, 1, 0, 1]) + index = MultiIndex( + levels=[major_axis, minor_axis], codes=[major_codes, minor_codes] + ) + + assert index.is_unique is False + + +@pytest.mark.slow +def test_hash_collisions(monkeypatch): + # non-smoke test that we don't get hash collisions + size_cutoff = 50 + with monkeypatch.context() as m: + m.setattr(libindex, "_SIZE_CUTOFF", size_cutoff) + index = MultiIndex.from_product( + [np.arange(8), np.arange(8)], names=["one", "two"] + ) + result = index.get_indexer(index.values) + tm.assert_numpy_array_equal(result, np.arange(len(index), dtype="intp")) + + for i in [0, 1, len(index) - 2, len(index) - 1]: + result = index.get_loc(index[i]) + assert result == i + + +def test_dims(): + pass + + +def test_take_invalid_kwargs(): + vals = [["A", "B"], [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]] + idx = MultiIndex.from_product(vals, names=["str", "dt"]) + indices = [1, 2] + + msg = r"take\(\) got an unexpected keyword argument 'foo'" + with pytest.raises(TypeError, match=msg): + idx.take(indices, foo=2) + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + idx.take(indices, out=indices) + + msg = "the 'mode' parameter is not supported" + with pytest.raises(ValueError, match=msg): + idx.take(indices, mode="clip") + + +def test_isna_behavior(idx): + # should not segfault GH5123 + # NOTE: if MI representation changes, may make sense to allow + # isna(MI) + msg = "isna is not defined for MultiIndex" + with pytest.raises(NotImplementedError, match=msg): + pd.isna(idx) + + +def test_large_multiindex_error(monkeypatch): + # GH12527 + size_cutoff = 50 + with monkeypatch.context() as m: + m.setattr(libindex, "_SIZE_CUTOFF", size_cutoff) + df_below_cutoff = pd.DataFrame( + 1, + index=MultiIndex.from_product([[1, 2], range(size_cutoff - 1)]), + columns=["dest"], + ) + with pytest.raises(KeyError, match=r"^\(-1, 0\)$"): + df_below_cutoff.loc[(-1, 0), "dest"] + with pytest.raises(KeyError, match=r"^\(3, 0\)$"): + df_below_cutoff.loc[(3, 0), "dest"] + df_above_cutoff = pd.DataFrame( + 1, + index=MultiIndex.from_product([[1, 2], range(size_cutoff + 1)]), + columns=["dest"], + ) + with pytest.raises(KeyError, match=r"^\(-1, 0\)$"): + df_above_cutoff.loc[(-1, 0), "dest"] + with pytest.raises(KeyError, match=r"^\(3, 0\)$"): + df_above_cutoff.loc[(3, 0), "dest"] + + +def test_mi_hashtable_populated_attribute_error(monkeypatch): + # GH 18165 + monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 50) + r = range(50) + df = pd.DataFrame({"a": r, "b": r}, index=MultiIndex.from_arrays([r, r])) + + msg = "'Series' object has no attribute 'foo'" + with pytest.raises(AttributeError, match=msg): + df["a"].foo() + + +def test_can_hold_identifiers(idx): + key = idx[0] + assert idx._can_hold_identifiers_and_holds_name(key) is True + + +def test_metadata_immutable(idx): + levels, codes = idx.levels, idx.codes + # shouldn't be able to set at either the top level or base level + mutable_regex = re.compile("does not support mutable operations") + with pytest.raises(TypeError, match=mutable_regex): + levels[0] = levels[0] + with pytest.raises(TypeError, match=mutable_regex): + levels[0][0] = levels[0][0] + # ditto for labels + with pytest.raises(TypeError, match=mutable_regex): + codes[0] = codes[0] + with pytest.raises(ValueError, match="assignment destination is read-only"): + codes[0][0] = codes[0][0] + # and for names + names = idx.names + with pytest.raises(TypeError, match=mutable_regex): + names[0] = names[0] + + +def test_level_setting_resets_attributes(): + ind = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]]) + assert ind.is_monotonic_increasing + ind = ind.set_levels([["A", "B"], [1, 3, 2]]) + # if this fails, probably didn't reset the cache correctly. + assert not ind.is_monotonic_increasing + + +def test_rangeindex_fallback_coercion_bug(): + # GH 12893 + df1 = pd.DataFrame(np.arange(100).reshape((10, 10))) + df2 = pd.DataFrame(np.arange(100).reshape((10, 10))) + df = pd.concat( + {"df1": df1.stack(future_stack=True), "df2": df2.stack(future_stack=True)}, + axis=1, + ) + df.index.names = ["fizz", "buzz"] + + expected = pd.DataFrame( + {"df2": np.arange(100), "df1": np.arange(100)}, + index=MultiIndex.from_product([range(10), range(10)], names=["fizz", "buzz"]), + ) + tm.assert_frame_equal(df, expected, check_like=True) + + result = df.index.get_level_values("fizz") + expected = Index(np.arange(10, dtype=np.int64), name="fizz").repeat(10) + tm.assert_index_equal(result, expected) + + result = df.index.get_level_values("buzz") + expected = Index(np.tile(np.arange(10, dtype=np.int64), 10), name="buzz") + tm.assert_index_equal(result, expected) + + +def test_memory_usage(idx): + result = idx.memory_usage() + if len(idx): + idx.get_loc(idx[0]) + result2 = idx.memory_usage() + result3 = idx.memory_usage(deep=True) + + # RangeIndex, IntervalIndex + # don't have engines + if not isinstance(idx, (RangeIndex, IntervalIndex)): + assert result2 > result + + if idx.inferred_type == "object": + assert result3 > result2 + + else: + # we report 0 for no-length + assert result == 0 + + +def test_nlevels(idx): + assert idx.nlevels == 2 diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_join.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_join.py new file mode 100644 index 0000000000000000000000000000000000000000..edd0feaaa1159ff8340af772d27f2a7af09ceb87 --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_join.py @@ -0,0 +1,268 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + Interval, + MultiIndex, + Series, + StringDtype, +) +import pandas._testing as tm + + +@pytest.mark.parametrize( + "other", [Index(["three", "one", "two"]), Index(["one"]), Index(["one", "three"])] +) +def test_join_level(idx, other, join_type): + join_index, lidx, ridx = other.join( + idx, how=join_type, level="second", return_indexers=True + ) + + exp_level = other.join(idx.levels[1], how=join_type) + assert join_index.levels[0].equals(idx.levels[0]) + assert join_index.levels[1].equals(exp_level) + + # pare down levels + mask = np.array([x[1] in exp_level for x in idx], dtype=bool) + exp_values = idx.values[mask] + tm.assert_numpy_array_equal(join_index.values, exp_values) + + if join_type in ("outer", "inner"): + join_index2, ridx2, lidx2 = idx.join( + other, how=join_type, level="second", return_indexers=True + ) + + assert join_index.equals(join_index2) + tm.assert_numpy_array_equal(lidx, lidx2) + tm.assert_numpy_array_equal(ridx, ridx2) + tm.assert_numpy_array_equal(join_index2.values, exp_values) + + +def test_join_level_corner_case(idx): + # some corner cases + index = Index(["three", "one", "two"]) + result = index.join(idx, level="second") + assert isinstance(result, MultiIndex) + + with pytest.raises(TypeError, match="Join.*MultiIndex.*ambiguous"): + idx.join(idx, level=1) + + +def test_join_self(idx, join_type): + result = idx.join(idx, how=join_type) + expected = idx + if join_type == "outer": + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + + +def test_join_multi(): + # GH 10665 + midx = MultiIndex.from_product([np.arange(4), np.arange(4)], names=["a", "b"]) + idx = Index([1, 2, 5], name="b") + + # inner + jidx, lidx, ridx = midx.join(idx, how="inner", return_indexers=True) + exp_idx = MultiIndex.from_product([np.arange(4), [1, 2]], names=["a", "b"]) + exp_lidx = np.array([1, 2, 5, 6, 9, 10, 13, 14], dtype=np.intp) + exp_ridx = np.array([0, 1, 0, 1, 0, 1, 0, 1], dtype=np.intp) + tm.assert_index_equal(jidx, exp_idx) + tm.assert_numpy_array_equal(lidx, exp_lidx) + tm.assert_numpy_array_equal(ridx, exp_ridx) + # flip + jidx, ridx, lidx = idx.join(midx, how="inner", return_indexers=True) + tm.assert_index_equal(jidx, exp_idx) + tm.assert_numpy_array_equal(lidx, exp_lidx) + tm.assert_numpy_array_equal(ridx, exp_ridx) + + # keep MultiIndex + jidx, lidx, ridx = midx.join(idx, how="left", return_indexers=True) + exp_ridx = np.array( + [-1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1], dtype=np.intp + ) + tm.assert_index_equal(jidx, midx) + assert lidx is None + tm.assert_numpy_array_equal(ridx, exp_ridx) + # flip + jidx, ridx, lidx = idx.join(midx, how="right", return_indexers=True) + tm.assert_index_equal(jidx, midx) + assert lidx is None + tm.assert_numpy_array_equal(ridx, exp_ridx) + + +def test_join_multi_wrong_order(): + # GH 25760 + # GH 28956 + + midx1 = MultiIndex.from_product([[1, 2], [3, 4]], names=["a", "b"]) + midx2 = MultiIndex.from_product([[1, 2], [3, 4]], names=["b", "a"]) + + join_idx, lidx, ridx = midx1.join(midx2, return_indexers=True) + + exp_ridx = np.array([-1, -1, -1, -1], dtype=np.intp) + + tm.assert_index_equal(midx1, join_idx) + assert lidx is None + tm.assert_numpy_array_equal(ridx, exp_ridx) + + +def test_join_multi_return_indexers(): + # GH 34074 + + midx1 = MultiIndex.from_product([[1, 2], [3, 4], [5, 6]], names=["a", "b", "c"]) + midx2 = MultiIndex.from_product([[1, 2], [3, 4]], names=["a", "b"]) + + result = midx1.join(midx2, return_indexers=False) + tm.assert_index_equal(result, midx1) + + +def test_join_overlapping_interval_level(): + # GH 44096 + idx_1 = MultiIndex.from_tuples( + [ + (1, Interval(0.0, 1.0)), + (1, Interval(1.0, 2.0)), + (1, Interval(2.0, 5.0)), + (2, Interval(0.0, 1.0)), + (2, Interval(1.0, 3.0)), # interval limit is here at 3.0, not at 2.0 + (2, Interval(3.0, 5.0)), + ], + names=["num", "interval"], + ) + + idx_2 = MultiIndex.from_tuples( + [ + (1, Interval(2.0, 5.0)), + (1, Interval(0.0, 1.0)), + (1, Interval(1.0, 2.0)), + (2, Interval(3.0, 5.0)), + (2, Interval(0.0, 1.0)), + (2, Interval(1.0, 3.0)), + ], + names=["num", "interval"], + ) + + expected = MultiIndex.from_tuples( + [ + (1, Interval(0.0, 1.0)), + (1, Interval(1.0, 2.0)), + (1, Interval(2.0, 5.0)), + (2, Interval(0.0, 1.0)), + (2, Interval(1.0, 3.0)), + (2, Interval(3.0, 5.0)), + ], + names=["num", "interval"], + ) + result = idx_1.join(idx_2, how="outer") + + tm.assert_index_equal(result, expected) + + +def test_join_midx_ea(): + # GH#49277 + midx = MultiIndex.from_arrays( + [Series([1, 1, 3], dtype="Int64"), Series([1, 2, 3], dtype="Int64")], + names=["a", "b"], + ) + midx2 = MultiIndex.from_arrays( + [Series([1], dtype="Int64"), Series([3], dtype="Int64")], names=["a", "c"] + ) + result = midx.join(midx2, how="inner") + expected = MultiIndex.from_arrays( + [ + Series([1, 1], dtype="Int64"), + Series([1, 2], dtype="Int64"), + Series([3, 3], dtype="Int64"), + ], + names=["a", "b", "c"], + ) + tm.assert_index_equal(result, expected) + + +def test_join_midx_string(): + # GH#49277 + midx = MultiIndex.from_arrays( + [ + Series(["a", "a", "c"], dtype=StringDtype()), + Series(["a", "b", "c"], dtype=StringDtype()), + ], + names=["a", "b"], + ) + midx2 = MultiIndex.from_arrays( + [Series(["a"], dtype=StringDtype()), Series(["c"], dtype=StringDtype())], + names=["a", "c"], + ) + result = midx.join(midx2, how="inner") + expected = MultiIndex.from_arrays( + [ + Series(["a", "a"], dtype=StringDtype()), + Series(["a", "b"], dtype=StringDtype()), + Series(["c", "c"], dtype=StringDtype()), + ], + names=["a", "b", "c"], + ) + tm.assert_index_equal(result, expected) + + +def test_join_multi_with_nan(): + # GH29252 + df1 = DataFrame( + data={"col1": [1.1, 1.2]}, + index=MultiIndex.from_product([["A"], [1.0, 2.0]], names=["id1", "id2"]), + ) + df2 = DataFrame( + data={"col2": [2.1, 2.2]}, + index=MultiIndex.from_product([["A"], [np.nan, 2.0]], names=["id1", "id2"]), + ) + result = df1.join(df2) + expected = DataFrame( + data={"col1": [1.1, 1.2], "col2": [np.nan, 2.2]}, + index=MultiIndex.from_product([["A"], [1.0, 2.0]], names=["id1", "id2"]), + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("val", [0, 5]) +def test_join_dtypes(any_numeric_ea_dtype, val): + # GH#49830 + midx = MultiIndex.from_arrays([Series([1, 2], dtype=any_numeric_ea_dtype), [3, 4]]) + midx2 = MultiIndex.from_arrays( + [Series([1, val, val], dtype=any_numeric_ea_dtype), [3, 4, 4]] + ) + result = midx.join(midx2, how="outer") + expected = MultiIndex.from_arrays( + [Series([val, val, 1, 2], dtype=any_numeric_ea_dtype), [4, 4, 3, 4]] + ).sort_values() + tm.assert_index_equal(result, expected) + + +def test_join_dtypes_all_nan(any_numeric_ea_dtype): + # GH#49830 + midx = MultiIndex.from_arrays( + [Series([1, 2], dtype=any_numeric_ea_dtype), [np.nan, np.nan]] + ) + midx2 = MultiIndex.from_arrays( + [Series([1, 0, 0], dtype=any_numeric_ea_dtype), [np.nan, np.nan, np.nan]] + ) + result = midx.join(midx2, how="outer") + expected = MultiIndex.from_arrays( + [ + Series([0, 0, 1, 2], dtype=any_numeric_ea_dtype), + [np.nan, np.nan, np.nan, np.nan], + ] + ) + tm.assert_index_equal(result, expected) + + +def test_join_index_levels(): + # GH#53093 + midx = midx = MultiIndex.from_tuples([("a", "2019-02-01"), ("a", "2019-02-01")]) + midx2 = MultiIndex.from_tuples([("a", "2019-01-31")]) + result = midx.join(midx2, how="outer") + expected = MultiIndex.from_tuples( + [("a", "2019-01-31"), ("a", "2019-02-01"), ("a", "2019-02-01")] + ) + tm.assert_index_equal(result.levels[1], expected.levels[1]) + tm.assert_index_equal(result, expected) diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_lexsort.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_lexsort.py new file mode 100644 index 0000000000000000000000000000000000000000..fc16a4197a3a4daf65de6f58d85d13883d535d41 --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_lexsort.py @@ -0,0 +1,46 @@ +from pandas import MultiIndex + + +class TestIsLexsorted: + def test_is_lexsorted(self): + levels = [[0, 1], [0, 1, 2]] + + index = MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] + ) + assert index._is_lexsorted() + + index = MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]] + ) + assert not index._is_lexsorted() + + index = MultiIndex( + levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]] + ) + assert not index._is_lexsorted() + assert index._lexsort_depth == 0 + + +class TestLexsortDepth: + def test_lexsort_depth(self): + # Test that lexsort_depth return the correct sortorder + # when it was given to the MultiIndex const. + # GH#28518 + + levels = [[0, 1], [0, 1, 2]] + + index = MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2 + ) + assert index._lexsort_depth == 2 + + index = MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=1 + ) + assert index._lexsort_depth == 1 + + index = MultiIndex( + levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=0 + ) + assert index._lexsort_depth == 0 diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_monotonic.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_monotonic.py new file mode 100644 index 0000000000000000000000000000000000000000..2b0b3f7cb36d72abedc538eda9e6a85eb45067e2 --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_monotonic.py @@ -0,0 +1,188 @@ +import numpy as np +import pytest + +from pandas import ( + Index, + MultiIndex, +) + + +def test_is_monotonic_increasing_lexsorted(lexsorted_two_level_string_multiindex): + # string ordering + mi = lexsorted_two_level_string_multiindex + assert mi.is_monotonic_increasing is False + assert Index(mi.values).is_monotonic_increasing is False + assert mi._is_strictly_monotonic_increasing is False + assert Index(mi.values)._is_strictly_monotonic_increasing is False + + +def test_is_monotonic_increasing(): + i = MultiIndex.from_product([np.arange(10), np.arange(10)], names=["one", "two"]) + assert i.is_monotonic_increasing is True + assert i._is_strictly_monotonic_increasing is True + assert Index(i.values).is_monotonic_increasing is True + assert i._is_strictly_monotonic_increasing is True + + i = MultiIndex.from_product( + [np.arange(10, 0, -1), np.arange(10)], names=["one", "two"] + ) + assert i.is_monotonic_increasing is False + assert i._is_strictly_monotonic_increasing is False + assert Index(i.values).is_monotonic_increasing is False + assert Index(i.values)._is_strictly_monotonic_increasing is False + + i = MultiIndex.from_product( + [np.arange(10), np.arange(10, 0, -1)], names=["one", "two"] + ) + assert i.is_monotonic_increasing is False + assert i._is_strictly_monotonic_increasing is False + assert Index(i.values).is_monotonic_increasing is False + assert Index(i.values)._is_strictly_monotonic_increasing is False + + i = MultiIndex.from_product([[1.0, np.nan, 2.0], ["a", "b", "c"]]) + assert i.is_monotonic_increasing is False + assert i._is_strictly_monotonic_increasing is False + assert Index(i.values).is_monotonic_increasing is False + assert Index(i.values)._is_strictly_monotonic_increasing is False + + i = MultiIndex( + levels=[["bar", "baz", "foo", "qux"], ["mom", "next", "zenith"]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=["first", "second"], + ) + assert i.is_monotonic_increasing is True + assert Index(i.values).is_monotonic_increasing is True + assert i._is_strictly_monotonic_increasing is True + assert Index(i.values)._is_strictly_monotonic_increasing is True + + # mixed levels, hits the TypeError + i = MultiIndex( + levels=[ + [1, 2, 3, 4], + [ + "gb00b03mlx29", + "lu0197800237", + "nl0000289783", + "nl0000289965", + "nl0000301109", + ], + ], + codes=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]], + names=["household_id", "asset_id"], + ) + + assert i.is_monotonic_increasing is False + assert i._is_strictly_monotonic_increasing is False + + # empty + i = MultiIndex.from_arrays([[], []]) + assert i.is_monotonic_increasing is True + assert Index(i.values).is_monotonic_increasing is True + assert i._is_strictly_monotonic_increasing is True + assert Index(i.values)._is_strictly_monotonic_increasing is True + + +def test_is_monotonic_decreasing(): + i = MultiIndex.from_product( + [np.arange(9, -1, -1), np.arange(9, -1, -1)], names=["one", "two"] + ) + assert i.is_monotonic_decreasing is True + assert i._is_strictly_monotonic_decreasing is True + assert Index(i.values).is_monotonic_decreasing is True + assert i._is_strictly_monotonic_decreasing is True + + i = MultiIndex.from_product( + [np.arange(10), np.arange(10, 0, -1)], names=["one", "two"] + ) + assert i.is_monotonic_decreasing is False + assert i._is_strictly_monotonic_decreasing is False + assert Index(i.values).is_monotonic_decreasing is False + assert Index(i.values)._is_strictly_monotonic_decreasing is False + + i = MultiIndex.from_product( + [np.arange(10, 0, -1), np.arange(10)], names=["one", "two"] + ) + assert i.is_monotonic_decreasing is False + assert i._is_strictly_monotonic_decreasing is False + assert Index(i.values).is_monotonic_decreasing is False + assert Index(i.values)._is_strictly_monotonic_decreasing is False + + i = MultiIndex.from_product([[2.0, np.nan, 1.0], ["c", "b", "a"]]) + assert i.is_monotonic_decreasing is False + assert i._is_strictly_monotonic_decreasing is False + assert Index(i.values).is_monotonic_decreasing is False + assert Index(i.values)._is_strictly_monotonic_decreasing is False + + # string ordering + i = MultiIndex( + levels=[["qux", "foo", "baz", "bar"], ["three", "two", "one"]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=["first", "second"], + ) + assert i.is_monotonic_decreasing is False + assert Index(i.values).is_monotonic_decreasing is False + assert i._is_strictly_monotonic_decreasing is False + assert Index(i.values)._is_strictly_monotonic_decreasing is False + + i = MultiIndex( + levels=[["qux", "foo", "baz", "bar"], ["zenith", "next", "mom"]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=["first", "second"], + ) + assert i.is_monotonic_decreasing is True + assert Index(i.values).is_monotonic_decreasing is True + assert i._is_strictly_monotonic_decreasing is True + assert Index(i.values)._is_strictly_monotonic_decreasing is True + + # mixed levels, hits the TypeError + i = MultiIndex( + levels=[ + [4, 3, 2, 1], + [ + "nl0000301109", + "nl0000289965", + "nl0000289783", + "lu0197800237", + "gb00b03mlx29", + ], + ], + codes=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]], + names=["household_id", "asset_id"], + ) + + assert i.is_monotonic_decreasing is False + assert i._is_strictly_monotonic_decreasing is False + + # empty + i = MultiIndex.from_arrays([[], []]) + assert i.is_monotonic_decreasing is True + assert Index(i.values).is_monotonic_decreasing is True + assert i._is_strictly_monotonic_decreasing is True + assert Index(i.values)._is_strictly_monotonic_decreasing is True + + +def test_is_strictly_monotonic_increasing(): + idx = MultiIndex( + levels=[["bar", "baz"], ["mom", "next"]], codes=[[0, 0, 1, 1], [0, 0, 0, 1]] + ) + assert idx.is_monotonic_increasing is True + assert idx._is_strictly_monotonic_increasing is False + + +def test_is_strictly_monotonic_decreasing(): + idx = MultiIndex( + levels=[["baz", "bar"], ["next", "mom"]], codes=[[0, 0, 1, 1], [0, 0, 0, 1]] + ) + assert idx.is_monotonic_decreasing is True + assert idx._is_strictly_monotonic_decreasing is False + + +@pytest.mark.parametrize("attr", ["is_monotonic_increasing", "is_monotonic_decreasing"]) +@pytest.mark.parametrize( + "values", + [[(np.nan,), (1,), (2,)], [(1,), (np.nan,), (2,)], [(1,), (2,), (np.nan,)]], +) +def test_is_monotonic_with_nans(values, attr): + # GH: 37220 + idx = MultiIndex.from_tuples(values, names=["test"]) + assert getattr(idx, attr) is False diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_reindex.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_reindex.py new file mode 100644 index 0000000000000000000000000000000000000000..d1b4fe8b98760a0b776c5d81d471a7745e8407de --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_reindex.py @@ -0,0 +1,174 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Index, + MultiIndex, +) +import pandas._testing as tm + + +def test_reindex(idx): + result, indexer = idx.reindex(list(idx[:4])) + assert isinstance(result, MultiIndex) + assert result.names == ["first", "second"] + assert [level.name for level in result.levels] == ["first", "second"] + + result, indexer = idx.reindex(list(idx)) + assert isinstance(result, MultiIndex) + assert indexer is None + assert result.names == ["first", "second"] + assert [level.name for level in result.levels] == ["first", "second"] + + +def test_reindex_level(idx): + index = Index(["one"]) + + target, indexer = idx.reindex(index, level="second") + target2, indexer2 = index.reindex(idx, level="second") + + exp_index = idx.join(index, level="second", how="right") + exp_index2 = idx.join(index, level="second", how="left") + + assert target.equals(exp_index) + exp_indexer = np.array([0, 2, 4]) + tm.assert_numpy_array_equal(indexer, exp_indexer, check_dtype=False) + + assert target2.equals(exp_index2) + exp_indexer2 = np.array([0, -1, 0, -1, 0, -1]) + tm.assert_numpy_array_equal(indexer2, exp_indexer2, check_dtype=False) + + with pytest.raises(TypeError, match="Fill method not supported"): + idx.reindex(idx, method="pad", level="second") + + +def test_reindex_preserves_names_when_target_is_list_or_ndarray(idx): + # GH6552 + idx = idx.copy() + target = idx.copy() + idx.names = target.names = [None, None] + + other_dtype = MultiIndex.from_product([[1, 2], [3, 4]]) + + # list & ndarray cases + assert idx.reindex([])[0].names == [None, None] + assert idx.reindex(np.array([]))[0].names == [None, None] + assert idx.reindex(target.tolist())[0].names == [None, None] + assert idx.reindex(target.values)[0].names == [None, None] + assert idx.reindex(other_dtype.tolist())[0].names == [None, None] + assert idx.reindex(other_dtype.values)[0].names == [None, None] + + idx.names = ["foo", "bar"] + assert idx.reindex([])[0].names == ["foo", "bar"] + assert idx.reindex(np.array([]))[0].names == ["foo", "bar"] + assert idx.reindex(target.tolist())[0].names == ["foo", "bar"] + assert idx.reindex(target.values)[0].names == ["foo", "bar"] + assert idx.reindex(other_dtype.tolist())[0].names == ["foo", "bar"] + assert idx.reindex(other_dtype.values)[0].names == ["foo", "bar"] + + +def test_reindex_lvl_preserves_names_when_target_is_list_or_array(): + # GH7774 + idx = MultiIndex.from_product([[0, 1], ["a", "b"]], names=["foo", "bar"]) + assert idx.reindex([], level=0)[0].names == ["foo", "bar"] + assert idx.reindex([], level=1)[0].names == ["foo", "bar"] + + +def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array( + using_infer_string, +): + # GH7774 + idx = MultiIndex.from_product([[0, 1], ["a", "b"]]) + assert idx.reindex([], level=0)[0].levels[0].dtype.type == np.int64 + exp = np.object_ if not using_infer_string else str + assert idx.reindex([], level=1)[0].levels[1].dtype.type == exp + + # case with EA levels + cat = pd.Categorical(["foo", "bar"]) + dti = pd.date_range("2016-01-01", periods=2, tz="US/Pacific") + mi = MultiIndex.from_product([cat, dti]) + assert mi.reindex([], level=0)[0].levels[0].dtype == cat.dtype + assert mi.reindex([], level=1)[0].levels[1].dtype == dti.dtype + + +def test_reindex_base(idx): + expected = np.arange(idx.size, dtype=np.intp) + + actual = idx.get_indexer(idx) + tm.assert_numpy_array_equal(expected, actual) + + with pytest.raises(ValueError, match="Invalid fill method"): + idx.get_indexer(idx, method="invalid") + + +def test_reindex_non_unique(): + idx = MultiIndex.from_tuples([(0, 0), (1, 1), (1, 1), (2, 2)]) + a = pd.Series(np.arange(4), index=idx) + new_idx = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)]) + + msg = "cannot handle a non-unique multi-index!" + with pytest.raises(ValueError, match=msg): + a.reindex(new_idx) + + +@pytest.mark.parametrize("values", [[["a"], ["x"]], [[], []]]) +def test_reindex_empty_with_level(values): + # GH41170 + idx = MultiIndex.from_arrays(values) + result, result_indexer = idx.reindex(np.array(["b"]), level=0) + expected = MultiIndex(levels=[["b"], values[1]], codes=[[], []]) + expected_indexer = np.array([], dtype=result_indexer.dtype) + tm.assert_index_equal(result, expected) + tm.assert_numpy_array_equal(result_indexer, expected_indexer) + + +def test_reindex_not_all_tuples(): + keys = [("i", "i"), ("i", "j"), ("j", "i"), "j"] + mi = MultiIndex.from_tuples(keys[:-1]) + idx = Index(keys) + res, indexer = mi.reindex(idx) + + tm.assert_index_equal(res, idx) + expected = np.array([0, 1, 2, -1], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected) + + +def test_reindex_limit_arg_with_multiindex(): + # GH21247 + + idx = MultiIndex.from_tuples([(3, "A"), (4, "A"), (4, "B")]) + + df = pd.Series([0.02, 0.01, 0.012], index=idx) + + new_idx = MultiIndex.from_tuples( + [ + (3, "A"), + (3, "B"), + (4, "A"), + (4, "B"), + (4, "C"), + (5, "B"), + (5, "C"), + (6, "B"), + (6, "C"), + ] + ) + + with pytest.raises( + ValueError, + match="limit argument only valid if doing pad, backfill or nearest reindexing", + ): + df.reindex(new_idx, fill_value=0, limit=1) + + +def test_reindex_with_none_in_nested_multiindex(): + # GH42883 + index = MultiIndex.from_tuples([(("a", None), 1), (("b", None), 2)]) + index2 = MultiIndex.from_tuples([(("b", None), 2), (("a", None), 1)]) + df1_dtype = pd.DataFrame([1, 2], index=index) + df2_dtype = pd.DataFrame([2, 1], index=index2) + + result = df1_dtype.reindex_like(df2_dtype) + expected = df2_dtype + tm.assert_frame_equal(result, expected) diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_reshape.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_reshape.py new file mode 100644 index 0000000000000000000000000000000000000000..06dbb33aadf97a54e4bb283d3aed8fe1169164b3 --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_reshape.py @@ -0,0 +1,224 @@ +from datetime import datetime + +import numpy as np +import pytest +import pytz + +import pandas as pd +from pandas import ( + Index, + MultiIndex, +) +import pandas._testing as tm + + +def test_insert(idx): + # key contained in all levels + new_index = idx.insert(0, ("bar", "two")) + assert new_index.equal_levels(idx) + assert new_index[0] == ("bar", "two") + + # key not contained in all levels + new_index = idx.insert(0, ("abc", "three")) + + exp0 = Index(list(idx.levels[0]) + ["abc"], name="first") + tm.assert_index_equal(new_index.levels[0], exp0) + assert new_index.names == ["first", "second"] + + exp1 = Index(list(idx.levels[1]) + ["three"], name="second") + tm.assert_index_equal(new_index.levels[1], exp1) + assert new_index[0] == ("abc", "three") + + # key wrong length + msg = "Item must have length equal to number of levels" + with pytest.raises(ValueError, match=msg): + idx.insert(0, ("foo2",)) + + left = pd.DataFrame([["a", "b", 0], ["b", "d", 1]], columns=["1st", "2nd", "3rd"]) + left.set_index(["1st", "2nd"], inplace=True) + ts = left["3rd"].copy(deep=True) + + left.loc[("b", "x"), "3rd"] = 2 + left.loc[("b", "a"), "3rd"] = -1 + left.loc[("b", "b"), "3rd"] = 3 + left.loc[("a", "x"), "3rd"] = 4 + left.loc[("a", "w"), "3rd"] = 5 + left.loc[("a", "a"), "3rd"] = 6 + + ts.loc[("b", "x")] = 2 + ts.loc["b", "a"] = -1 + ts.loc[("b", "b")] = 3 + ts.loc["a", "x"] = 4 + ts.loc[("a", "w")] = 5 + ts.loc["a", "a"] = 6 + + right = pd.DataFrame( + [ + ["a", "b", 0], + ["b", "d", 1], + ["b", "x", 2], + ["b", "a", -1], + ["b", "b", 3], + ["a", "x", 4], + ["a", "w", 5], + ["a", "a", 6], + ], + columns=["1st", "2nd", "3rd"], + ) + right.set_index(["1st", "2nd"], inplace=True) + # FIXME data types changes to float because + # of intermediate nan insertion; + tm.assert_frame_equal(left, right, check_dtype=False) + tm.assert_series_equal(ts, right["3rd"]) + + +def test_insert2(): + # GH9250 + idx = ( + [("test1", i) for i in range(5)] + + [("test2", i) for i in range(6)] + + [("test", 17), ("test", 18)] + ) + + left = pd.Series(np.linspace(0, 10, 11), MultiIndex.from_tuples(idx[:-2])) + + left.loc[("test", 17)] = 11 + left.loc[("test", 18)] = 12 + + right = pd.Series(np.linspace(0, 12, 13), MultiIndex.from_tuples(idx)) + + tm.assert_series_equal(left, right) + + +def test_append(idx): + result = idx[:3].append(idx[3:]) + assert result.equals(idx) + + foos = [idx[:1], idx[1:3], idx[3:]] + result = foos[0].append(foos[1:]) + assert result.equals(idx) + + # empty + result = idx.append([]) + assert result.equals(idx) + + +def test_append_index(): + idx1 = Index([1.1, 1.2, 1.3]) + idx2 = pd.date_range("2011-01-01", freq="D", periods=3, tz="Asia/Tokyo") + idx3 = Index(["A", "B", "C"]) + + midx_lv2 = MultiIndex.from_arrays([idx1, idx2]) + midx_lv3 = MultiIndex.from_arrays([idx1, idx2, idx3]) + + result = idx1.append(midx_lv2) + + # see gh-7112 + tz = pytz.timezone("Asia/Tokyo") + expected_tuples = [ + (1.1, tz.localize(datetime(2011, 1, 1))), + (1.2, tz.localize(datetime(2011, 1, 2))), + (1.3, tz.localize(datetime(2011, 1, 3))), + ] + expected = Index([1.1, 1.2, 1.3] + expected_tuples) + tm.assert_index_equal(result, expected) + + result = midx_lv2.append(idx1) + expected = Index(expected_tuples + [1.1, 1.2, 1.3]) + tm.assert_index_equal(result, expected) + + result = midx_lv2.append(midx_lv2) + expected = MultiIndex.from_arrays([idx1.append(idx1), idx2.append(idx2)]) + tm.assert_index_equal(result, expected) + + result = midx_lv2.append(midx_lv3) + tm.assert_index_equal(result, expected) + + result = midx_lv3.append(midx_lv2) + expected = Index._simple_new( + np.array( + [ + (1.1, tz.localize(datetime(2011, 1, 1)), "A"), + (1.2, tz.localize(datetime(2011, 1, 2)), "B"), + (1.3, tz.localize(datetime(2011, 1, 3)), "C"), + ] + + expected_tuples, + dtype=object, + ), + None, + ) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize("name, exp", [("b", "b"), ("c", None)]) +def test_append_names_match(name, exp): + # GH#48288 + midx = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"]) + midx2 = MultiIndex.from_arrays([[3], [5]], names=["a", name]) + result = midx.append(midx2) + expected = MultiIndex.from_arrays([[1, 2, 3], [3, 4, 5]], names=["a", exp]) + tm.assert_index_equal(result, expected) + + +def test_append_names_dont_match(): + # GH#48288 + midx = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"]) + midx2 = MultiIndex.from_arrays([[3], [5]], names=["x", "y"]) + result = midx.append(midx2) + expected = MultiIndex.from_arrays([[1, 2, 3], [3, 4, 5]], names=None) + tm.assert_index_equal(result, expected) + + +def test_append_overlapping_interval_levels(): + # GH 54934 + ivl1 = pd.IntervalIndex.from_breaks([0.0, 1.0, 2.0]) + ivl2 = pd.IntervalIndex.from_breaks([0.5, 1.5, 2.5]) + mi1 = MultiIndex.from_product([ivl1, ivl1]) + mi2 = MultiIndex.from_product([ivl2, ivl2]) + result = mi1.append(mi2) + expected = MultiIndex.from_tuples( + [ + (pd.Interval(0.0, 1.0), pd.Interval(0.0, 1.0)), + (pd.Interval(0.0, 1.0), pd.Interval(1.0, 2.0)), + (pd.Interval(1.0, 2.0), pd.Interval(0.0, 1.0)), + (pd.Interval(1.0, 2.0), pd.Interval(1.0, 2.0)), + (pd.Interval(0.5, 1.5), pd.Interval(0.5, 1.5)), + (pd.Interval(0.5, 1.5), pd.Interval(1.5, 2.5)), + (pd.Interval(1.5, 2.5), pd.Interval(0.5, 1.5)), + (pd.Interval(1.5, 2.5), pd.Interval(1.5, 2.5)), + ] + ) + tm.assert_index_equal(result, expected) + + +def test_repeat(): + reps = 2 + numbers = [1, 2, 3] + names = np.array(["foo", "bar"]) + + m = MultiIndex.from_product([numbers, names], names=names) + expected = MultiIndex.from_product([numbers, names.repeat(reps)], names=names) + tm.assert_index_equal(m.repeat(reps), expected) + + +def test_insert_base(idx): + result = idx[1:4] + + # test 0th element + assert idx[0:4].equals(result.insert(0, idx[0])) + + +def test_delete_base(idx): + expected = idx[1:] + result = idx.delete(0) + assert result.equals(expected) + assert result.name == expected.name + + expected = idx[:-1] + result = idx.delete(-1) + assert result.equals(expected) + assert result.name == expected.name + + msg = "index 6 is out of bounds for axis 0 with size 6" + with pytest.raises(IndexError, match=msg): + idx.delete(len(idx)) diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_setops.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_setops.py new file mode 100644 index 0000000000000000000000000000000000000000..801a813955b41ed6f67f00996e2de371d20fded5 --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_setops.py @@ -0,0 +1,772 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + CategoricalIndex, + DataFrame, + Index, + IntervalIndex, + MultiIndex, + Series, +) +import pandas._testing as tm +from pandas.api.types import ( + is_float_dtype, + is_unsigned_integer_dtype, +) + + +@pytest.mark.parametrize("case", [0.5, "xxx"]) +@pytest.mark.parametrize( + "method", ["intersection", "union", "difference", "symmetric_difference"] +) +def test_set_ops_error_cases(idx, case, sort, method): + # non-iterable input + msg = "Input must be Index or array-like" + with pytest.raises(TypeError, match=msg): + getattr(idx, method)(case, sort=sort) + + +@pytest.mark.parametrize("klass", [MultiIndex, np.array, Series, list]) +def test_intersection_base(idx, sort, klass): + first = idx[2::-1] # first 3 elements reversed + second = idx[:5] + + if klass is not MultiIndex: + second = klass(second.values) + + intersect = first.intersection(second, sort=sort) + if sort is None: + expected = first.sort_values() + else: + expected = first + tm.assert_index_equal(intersect, expected) + + msg = "other must be a MultiIndex or a list of tuples" + with pytest.raises(TypeError, match=msg): + first.intersection([1, 2, 3], sort=sort) + + +@pytest.mark.arm_slow +@pytest.mark.parametrize("klass", [MultiIndex, np.array, Series, list]) +def test_union_base(idx, sort, klass): + first = idx[::-1] + second = idx[:5] + + if klass is not MultiIndex: + second = klass(second.values) + + union = first.union(second, sort=sort) + if sort is None: + expected = first.sort_values() + else: + expected = first + tm.assert_index_equal(union, expected) + + msg = "other must be a MultiIndex or a list of tuples" + with pytest.raises(TypeError, match=msg): + first.union([1, 2, 3], sort=sort) + + +def test_difference_base(idx, sort): + second = idx[4:] + answer = idx[:4] + result = idx.difference(second, sort=sort) + + if sort is None: + answer = answer.sort_values() + + assert result.equals(answer) + tm.assert_index_equal(result, answer) + + # GH 10149 + cases = [klass(second.values) for klass in [np.array, Series, list]] + for case in cases: + result = idx.difference(case, sort=sort) + tm.assert_index_equal(result, answer) + + msg = "other must be a MultiIndex or a list of tuples" + with pytest.raises(TypeError, match=msg): + idx.difference([1, 2, 3], sort=sort) + + +def test_symmetric_difference(idx, sort): + first = idx[1:] + second = idx[:-1] + answer = idx[[-1, 0]] + result = first.symmetric_difference(second, sort=sort) + + if sort is None: + answer = answer.sort_values() + + tm.assert_index_equal(result, answer) + + # GH 10149 + cases = [klass(second.values) for klass in [np.array, Series, list]] + for case in cases: + result = first.symmetric_difference(case, sort=sort) + tm.assert_index_equal(result, answer) + + msg = "other must be a MultiIndex or a list of tuples" + with pytest.raises(TypeError, match=msg): + first.symmetric_difference([1, 2, 3], sort=sort) + + +def test_multiindex_symmetric_difference(): + # GH 13490 + idx = MultiIndex.from_product([["a", "b"], ["A", "B"]], names=["a", "b"]) + result = idx.symmetric_difference(idx) + assert result.names == idx.names + + idx2 = idx.copy().rename(["A", "B"]) + result = idx.symmetric_difference(idx2) + assert result.names == [None, None] + + +def test_empty(idx): + # GH 15270 + assert not idx.empty + assert idx[:0].empty + + +def test_difference(idx, sort): + first = idx + result = first.difference(idx[-3:], sort=sort) + vals = idx[:-3].values + + if sort is None: + vals = sorted(vals) + + expected = MultiIndex.from_tuples(vals, sortorder=0, names=idx.names) + + assert isinstance(result, MultiIndex) + assert result.equals(expected) + assert result.names == idx.names + tm.assert_index_equal(result, expected) + + # empty difference: reflexive + result = idx.difference(idx, sort=sort) + expected = idx[:0] + assert result.equals(expected) + assert result.names == idx.names + + # empty difference: superset + result = idx[-3:].difference(idx, sort=sort) + expected = idx[:0] + assert result.equals(expected) + assert result.names == idx.names + + # empty difference: degenerate + result = idx[:0].difference(idx, sort=sort) + expected = idx[:0] + assert result.equals(expected) + assert result.names == idx.names + + # names not the same + chunklet = idx[-3:] + chunklet.names = ["foo", "baz"] + result = first.difference(chunklet, sort=sort) + assert result.names == (None, None) + + # empty, but non-equal + result = idx.difference(idx.sortlevel(1)[0], sort=sort) + assert len(result) == 0 + + # raise Exception called with non-MultiIndex + result = first.difference(first.values, sort=sort) + assert result.equals(first[:0]) + + # name from empty array + result = first.difference([], sort=sort) + assert first.equals(result) + assert first.names == result.names + + # name from non-empty array + result = first.difference([("foo", "one")], sort=sort) + expected = MultiIndex.from_tuples( + [("bar", "one"), ("baz", "two"), ("foo", "two"), ("qux", "one"), ("qux", "two")] + ) + expected.names = first.names + assert first.names == result.names + + msg = "other must be a MultiIndex or a list of tuples" + with pytest.raises(TypeError, match=msg): + first.difference([1, 2, 3, 4, 5], sort=sort) + + +def test_difference_sort_special(): + # GH-24959 + idx = MultiIndex.from_product([[1, 0], ["a", "b"]]) + # sort=None, the default + result = idx.difference([]) + tm.assert_index_equal(result, idx) + + +def test_difference_sort_special_true(): + idx = MultiIndex.from_product([[1, 0], ["a", "b"]]) + result = idx.difference([], sort=True) + expected = MultiIndex.from_product([[0, 1], ["a", "b"]]) + tm.assert_index_equal(result, expected) + + +def test_difference_sort_incomparable(): + # GH-24959 + idx = MultiIndex.from_product([[1, pd.Timestamp("2000"), 2], ["a", "b"]]) + + other = MultiIndex.from_product([[3, pd.Timestamp("2000"), 4], ["c", "d"]]) + # sort=None, the default + msg = "sort order is undefined for incomparable objects" + with tm.assert_produces_warning(RuntimeWarning, match=msg): + result = idx.difference(other) + tm.assert_index_equal(result, idx) + + # sort=False + result = idx.difference(other, sort=False) + tm.assert_index_equal(result, idx) + + +def test_difference_sort_incomparable_true(): + idx = MultiIndex.from_product([[1, pd.Timestamp("2000"), 2], ["a", "b"]]) + other = MultiIndex.from_product([[3, pd.Timestamp("2000"), 4], ["c", "d"]]) + + # TODO: this is raising in constructing a Categorical when calling + # algos.safe_sort. Should we catch and re-raise with a better message? + msg = "'values' is not ordered, please explicitly specify the categories order " + with pytest.raises(TypeError, match=msg): + idx.difference(other, sort=True) + + +def test_union(idx, sort): + piece1 = idx[:5][::-1] + piece2 = idx[3:] + + the_union = piece1.union(piece2, sort=sort) + + if sort in (None, False): + tm.assert_index_equal(the_union.sort_values(), idx.sort_values()) + else: + tm.assert_index_equal(the_union, idx) + + # corner case, pass self or empty thing: + the_union = idx.union(idx, sort=sort) + tm.assert_index_equal(the_union, idx) + + the_union = idx.union(idx[:0], sort=sort) + tm.assert_index_equal(the_union, idx) + + tuples = idx.values + result = idx[:4].union(tuples[4:], sort=sort) + if sort is None: + tm.assert_index_equal(result.sort_values(), idx.sort_values()) + else: + assert result.equals(idx) + + +def test_union_with_regular_index(idx, using_infer_string): + other = Index(["A", "B", "C"]) + + result = other.union(idx) + assert ("foo", "one") in result + assert "B" in result + + if using_infer_string: + with pytest.raises(NotImplementedError, match="Can only union"): + idx.union(other) + else: + msg = "The values in the array are unorderable" + with tm.assert_produces_warning(RuntimeWarning, match=msg): + result2 = idx.union(other) + # This is more consistent now, if sorting fails then we don't sort at all + # in the MultiIndex case. + assert not result.equals(result2) + + +def test_intersection(idx, sort): + piece1 = idx[:5][::-1] + piece2 = idx[3:] + + the_int = piece1.intersection(piece2, sort=sort) + + if sort in (None, True): + tm.assert_index_equal(the_int, idx[3:5]) + else: + tm.assert_index_equal(the_int.sort_values(), idx[3:5]) + + # corner case, pass self + the_int = idx.intersection(idx, sort=sort) + tm.assert_index_equal(the_int, idx) + + # empty intersection: disjoint + empty = idx[:2].intersection(idx[2:], sort=sort) + expected = idx[:0] + assert empty.equals(expected) + + tuples = idx.values + result = idx.intersection(tuples) + assert result.equals(idx) + + +@pytest.mark.parametrize( + "method", ["intersection", "union", "difference", "symmetric_difference"] +) +def test_setop_with_categorical(idx, sort, method): + other = idx.to_flat_index().astype("category") + res_names = [None] * idx.nlevels + + result = getattr(idx, method)(other, sort=sort) + expected = getattr(idx, method)(idx, sort=sort).rename(res_names) + tm.assert_index_equal(result, expected) + + result = getattr(idx, method)(other[:5], sort=sort) + expected = getattr(idx, method)(idx[:5], sort=sort).rename(res_names) + tm.assert_index_equal(result, expected) + + +def test_intersection_non_object(idx, sort): + other = Index(range(3), name="foo") + + result = idx.intersection(other, sort=sort) + expected = MultiIndex(levels=idx.levels, codes=[[]] * idx.nlevels, names=None) + tm.assert_index_equal(result, expected, exact=True) + + # if we pass a length-0 ndarray (i.e. no name, we retain our idx.name) + result = idx.intersection(np.asarray(other)[:0], sort=sort) + expected = MultiIndex(levels=idx.levels, codes=[[]] * idx.nlevels, names=idx.names) + tm.assert_index_equal(result, expected, exact=True) + + msg = "other must be a MultiIndex or a list of tuples" + with pytest.raises(TypeError, match=msg): + # With non-zero length non-index, we try and fail to convert to tuples + idx.intersection(np.asarray(other), sort=sort) + + +def test_intersect_equal_sort(): + # GH-24959 + idx = MultiIndex.from_product([[1, 0], ["a", "b"]]) + tm.assert_index_equal(idx.intersection(idx, sort=False), idx) + tm.assert_index_equal(idx.intersection(idx, sort=None), idx) + + +def test_intersect_equal_sort_true(): + idx = MultiIndex.from_product([[1, 0], ["a", "b"]]) + expected = MultiIndex.from_product([[0, 1], ["a", "b"]]) + result = idx.intersection(idx, sort=True) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize("slice_", [slice(None), slice(0)]) +def test_union_sort_other_empty(slice_): + # https://github.com/pandas-dev/pandas/issues/24959 + idx = MultiIndex.from_product([[1, 0], ["a", "b"]]) + + # default, sort=None + other = idx[slice_] + tm.assert_index_equal(idx.union(other), idx) + tm.assert_index_equal(other.union(idx), idx) + + # sort=False + tm.assert_index_equal(idx.union(other, sort=False), idx) + + +def test_union_sort_other_empty_sort(): + idx = MultiIndex.from_product([[1, 0], ["a", "b"]]) + other = idx[:0] + result = idx.union(other, sort=True) + expected = MultiIndex.from_product([[0, 1], ["a", "b"]]) + tm.assert_index_equal(result, expected) + + +def test_union_sort_other_incomparable(): + # https://github.com/pandas-dev/pandas/issues/24959 + idx = MultiIndex.from_product([[1, pd.Timestamp("2000")], ["a", "b"]]) + + # default, sort=None + with tm.assert_produces_warning(RuntimeWarning): + result = idx.union(idx[:1]) + tm.assert_index_equal(result, idx) + + # sort=False + result = idx.union(idx[:1], sort=False) + tm.assert_index_equal(result, idx) + + +def test_union_sort_other_incomparable_sort(): + idx = MultiIndex.from_product([[1, pd.Timestamp("2000")], ["a", "b"]]) + msg = "'<' not supported between instances of 'Timestamp' and 'int'" + with pytest.raises(TypeError, match=msg): + idx.union(idx[:1], sort=True) + + +def test_union_non_object_dtype_raises(): + # GH#32646 raise NotImplementedError instead of less-informative error + mi = MultiIndex.from_product([["a", "b"], [1, 2]]) + + idx = mi.levels[1] + + msg = "Can only union MultiIndex with MultiIndex or Index of tuples" + with pytest.raises(NotImplementedError, match=msg): + mi.union(idx) + + +def test_union_empty_self_different_names(): + # GH#38423 + mi = MultiIndex.from_arrays([[]]) + mi2 = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"]) + result = mi.union(mi2) + expected = MultiIndex.from_arrays([[1, 2], [3, 4]]) + tm.assert_index_equal(result, expected) + + +def test_union_multiindex_empty_rangeindex(): + # GH#41234 + mi = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"]) + ri = pd.RangeIndex(0) + + result_left = mi.union(ri) + tm.assert_index_equal(mi, result_left, check_names=False) + + result_right = ri.union(mi) + tm.assert_index_equal(mi, result_right, check_names=False) + + +@pytest.mark.parametrize( + "method", ["union", "intersection", "difference", "symmetric_difference"] +) +def test_setops_sort_validation(method): + idx1 = MultiIndex.from_product([["a", "b"], [1, 2]]) + idx2 = MultiIndex.from_product([["b", "c"], [1, 2]]) + + with pytest.raises(ValueError, match="The 'sort' keyword only takes"): + getattr(idx1, method)(idx2, sort=2) + + # sort=True is supported as of GH#? + getattr(idx1, method)(idx2, sort=True) + + +@pytest.mark.parametrize("val", [pd.NA, 100]) +def test_difference_keep_ea_dtypes(any_numeric_ea_dtype, val): + # GH#48606 + midx = MultiIndex.from_arrays( + [Series([1, 2], dtype=any_numeric_ea_dtype), [2, 1]], names=["a", None] + ) + midx2 = MultiIndex.from_arrays( + [Series([1, 2, val], dtype=any_numeric_ea_dtype), [1, 1, 3]] + ) + result = midx.difference(midx2) + expected = MultiIndex.from_arrays([Series([1], dtype=any_numeric_ea_dtype), [2]]) + tm.assert_index_equal(result, expected) + + result = midx.difference(midx.sort_values(ascending=False)) + expected = MultiIndex.from_arrays( + [Series([], dtype=any_numeric_ea_dtype), Series([], dtype=np.int64)], + names=["a", None], + ) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize("val", [pd.NA, 5]) +def test_symmetric_difference_keeping_ea_dtype(any_numeric_ea_dtype, val): + # GH#48607 + midx = MultiIndex.from_arrays( + [Series([1, 2], dtype=any_numeric_ea_dtype), [2, 1]], names=["a", None] + ) + midx2 = MultiIndex.from_arrays( + [Series([1, 2, val], dtype=any_numeric_ea_dtype), [1, 1, 3]] + ) + result = midx.symmetric_difference(midx2) + expected = MultiIndex.from_arrays( + [Series([1, 1, val], dtype=any_numeric_ea_dtype), [1, 2, 3]] + ) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize( + ("tuples", "exp_tuples"), + [ + ([("val1", "test1")], [("val1", "test1")]), + ([("val1", "test1"), ("val1", "test1")], [("val1", "test1")]), + ( + [("val2", "test2"), ("val1", "test1")], + [("val2", "test2"), ("val1", "test1")], + ), + ], +) +def test_intersect_with_duplicates(tuples, exp_tuples): + # GH#36915 + left = MultiIndex.from_tuples(tuples, names=["first", "second"]) + right = MultiIndex.from_tuples( + [("val1", "test1"), ("val1", "test1"), ("val2", "test2")], + names=["first", "second"], + ) + result = left.intersection(right) + expected = MultiIndex.from_tuples(exp_tuples, names=["first", "second"]) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize( + "data, names, expected", + [ + ((1,), None, [None, None]), + ((1,), ["a"], [None, None]), + ((1,), ["b"], [None, None]), + ((1, 2), ["c", "d"], [None, None]), + ((1, 2), ["b", "a"], [None, None]), + ((1, 2, 3), ["a", "b", "c"], [None, None]), + ((1, 2), ["a", "c"], ["a", None]), + ((1, 2), ["c", "b"], [None, "b"]), + ((1, 2), ["a", "b"], ["a", "b"]), + ((1, 2), [None, "b"], [None, "b"]), + ], +) +def test_maybe_match_names(data, names, expected): + # GH#38323 + mi = MultiIndex.from_tuples([], names=["a", "b"]) + mi2 = MultiIndex.from_tuples([data], names=names) + result = mi._maybe_match_names(mi2) + assert result == expected + + +def test_intersection_equal_different_names(): + # GH#30302 + mi1 = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["c", "b"]) + mi2 = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"]) + + result = mi1.intersection(mi2) + expected = MultiIndex.from_arrays([[1, 2], [3, 4]], names=[None, "b"]) + tm.assert_index_equal(result, expected) + + +def test_intersection_different_names(): + # GH#38323 + mi = MultiIndex.from_arrays([[1], [3]], names=["c", "b"]) + mi2 = MultiIndex.from_arrays([[1], [3]]) + result = mi.intersection(mi2) + tm.assert_index_equal(result, mi2) + + +def test_intersection_with_missing_values_on_both_sides(nulls_fixture): + # GH#38623 + mi1 = MultiIndex.from_arrays([[3, nulls_fixture, 4, nulls_fixture], [1, 2, 4, 2]]) + mi2 = MultiIndex.from_arrays([[3, nulls_fixture, 3], [1, 2, 4]]) + result = mi1.intersection(mi2) + expected = MultiIndex.from_arrays([[3, nulls_fixture], [1, 2]]) + tm.assert_index_equal(result, expected) + + +def test_union_with_missing_values_on_both_sides(nulls_fixture): + # GH#38623 + mi1 = MultiIndex.from_arrays([[1, nulls_fixture]]) + mi2 = MultiIndex.from_arrays([[1, nulls_fixture, 3]]) + result = mi1.union(mi2) + expected = MultiIndex.from_arrays([[1, 3, nulls_fixture]]) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize("dtype", ["float64", "Float64"]) +@pytest.mark.parametrize("sort", [None, False]) +def test_union_nan_got_duplicated(dtype, sort): + # GH#38977, GH#49010 + mi1 = MultiIndex.from_arrays([pd.array([1.0, np.nan], dtype=dtype), [2, 3]]) + mi2 = MultiIndex.from_arrays([pd.array([1.0, np.nan, 3.0], dtype=dtype), [2, 3, 4]]) + result = mi1.union(mi2, sort=sort) + if sort is None: + expected = MultiIndex.from_arrays( + [pd.array([1.0, 3.0, np.nan], dtype=dtype), [2, 4, 3]] + ) + else: + expected = mi2 + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize("val", [4, 1]) +def test_union_keep_ea_dtype(any_numeric_ea_dtype, val): + # GH#48505 + + arr1 = Series([val, 2], dtype=any_numeric_ea_dtype) + arr2 = Series([2, 1], dtype=any_numeric_ea_dtype) + midx = MultiIndex.from_arrays([arr1, [1, 2]], names=["a", None]) + midx2 = MultiIndex.from_arrays([arr2, [2, 1]]) + result = midx.union(midx2) + if val == 4: + expected = MultiIndex.from_arrays( + [Series([1, 2, 4], dtype=any_numeric_ea_dtype), [1, 2, 1]] + ) + else: + expected = MultiIndex.from_arrays( + [Series([1, 2], dtype=any_numeric_ea_dtype), [1, 2]] + ) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize("dupe_val", [3, pd.NA]) +def test_union_with_duplicates_keep_ea_dtype(dupe_val, any_numeric_ea_dtype): + # GH48900 + mi1 = MultiIndex.from_arrays( + [ + Series([1, dupe_val, 2], dtype=any_numeric_ea_dtype), + Series([1, dupe_val, 2], dtype=any_numeric_ea_dtype), + ] + ) + mi2 = MultiIndex.from_arrays( + [ + Series([2, dupe_val, dupe_val], dtype=any_numeric_ea_dtype), + Series([2, dupe_val, dupe_val], dtype=any_numeric_ea_dtype), + ] + ) + result = mi1.union(mi2) + expected = MultiIndex.from_arrays( + [ + Series([1, 2, dupe_val, dupe_val], dtype=any_numeric_ea_dtype), + Series([1, 2, dupe_val, dupe_val], dtype=any_numeric_ea_dtype), + ] + ) + tm.assert_index_equal(result, expected) + + +@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") +def test_union_duplicates(index, request): + # GH#38977 + if index.empty or isinstance(index, (IntervalIndex, CategoricalIndex)): + pytest.skip(f"No duplicates in an empty {type(index).__name__}") + + values = index.unique().values.tolist() + mi1 = MultiIndex.from_arrays([values, [1] * len(values)]) + mi2 = MultiIndex.from_arrays([[values[0]] + values, [1] * (len(values) + 1)]) + result = mi2.union(mi1) + expected = mi2.sort_values() + tm.assert_index_equal(result, expected) + + if ( + is_unsigned_integer_dtype(mi2.levels[0]) + and (mi2.get_level_values(0) < 2**63).all() + ): + # GH#47294 - union uses lib.fast_zip, converting data to Python integers + # and loses type information. Result is then unsigned only when values are + # sufficiently large to require unsigned dtype. This happens only if other + # has dups or one of both have missing values + expected = expected.set_levels( + [expected.levels[0].astype(np.int64), expected.levels[1]] + ) + elif is_float_dtype(mi2.levels[0]): + # mi2 has duplicates witch is a different path than above, Fix that path + # to use correct float dtype? + expected = expected.set_levels( + [expected.levels[0].astype(float), expected.levels[1]] + ) + + result = mi1.union(mi2) + tm.assert_index_equal(result, expected) + + +def test_union_keep_dtype_precision(any_real_numeric_dtype): + # GH#48498 + arr1 = Series([4, 1, 1], dtype=any_real_numeric_dtype) + arr2 = Series([1, 4], dtype=any_real_numeric_dtype) + midx = MultiIndex.from_arrays([arr1, [2, 1, 1]], names=["a", None]) + midx2 = MultiIndex.from_arrays([arr2, [1, 2]], names=["a", None]) + + result = midx.union(midx2) + expected = MultiIndex.from_arrays( + ([Series([1, 1, 4], dtype=any_real_numeric_dtype), [1, 1, 2]]), + names=["a", None], + ) + tm.assert_index_equal(result, expected) + + +def test_union_keep_ea_dtype_with_na(any_numeric_ea_dtype): + # GH#48498 + arr1 = Series([4, pd.NA], dtype=any_numeric_ea_dtype) + arr2 = Series([1, pd.NA], dtype=any_numeric_ea_dtype) + midx = MultiIndex.from_arrays([arr1, [2, 1]], names=["a", None]) + midx2 = MultiIndex.from_arrays([arr2, [1, 2]]) + result = midx.union(midx2) + expected = MultiIndex.from_arrays( + [Series([1, 4, pd.NA, pd.NA], dtype=any_numeric_ea_dtype), [1, 2, 1, 2]] + ) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize( + "levels1, levels2, codes1, codes2, names", + [ + ( + [["a", "b", "c"], [0, ""]], + [["c", "d", "b"], [""]], + [[0, 1, 2], [1, 1, 1]], + [[0, 1, 2], [0, 0, 0]], + ["name1", "name2"], + ), + ], +) +def test_intersection_lexsort_depth(levels1, levels2, codes1, codes2, names): + # GH#25169 + mi1 = MultiIndex(levels=levels1, codes=codes1, names=names) + mi2 = MultiIndex(levels=levels2, codes=codes2, names=names) + mi_int = mi1.intersection(mi2) + assert mi_int._lexsort_depth == 2 + + +@pytest.mark.parametrize( + "a", + [pd.Categorical(["a", "b"], categories=["a", "b"]), ["a", "b"]], +) +@pytest.mark.parametrize( + "b", + [ + pd.Categorical(["a", "b"], categories=["b", "a"], ordered=True), + pd.Categorical(["a", "b"], categories=["b", "a"]), + ], +) +def test_intersection_with_non_lex_sorted_categories(a, b): + # GH#49974 + other = ["1", "2"] + + df1 = DataFrame({"x": a, "y": other}) + df2 = DataFrame({"x": b, "y": other}) + + expected = MultiIndex.from_arrays([a, other], names=["x", "y"]) + + res1 = MultiIndex.from_frame(df1).intersection( + MultiIndex.from_frame(df2.sort_values(["x", "y"])) + ) + res2 = MultiIndex.from_frame(df1).intersection(MultiIndex.from_frame(df2)) + res3 = MultiIndex.from_frame(df1.sort_values(["x", "y"])).intersection( + MultiIndex.from_frame(df2) + ) + res4 = MultiIndex.from_frame(df1.sort_values(["x", "y"])).intersection( + MultiIndex.from_frame(df2.sort_values(["x", "y"])) + ) + + tm.assert_index_equal(res1, expected) + tm.assert_index_equal(res2, expected) + tm.assert_index_equal(res3, expected) + tm.assert_index_equal(res4, expected) + + +@pytest.mark.parametrize("val", [pd.NA, 100]) +def test_intersection_keep_ea_dtypes(val, any_numeric_ea_dtype): + # GH#48604 + midx = MultiIndex.from_arrays( + [Series([1, 2], dtype=any_numeric_ea_dtype), [2, 1]], names=["a", None] + ) + midx2 = MultiIndex.from_arrays( + [Series([1, 2, val], dtype=any_numeric_ea_dtype), [1, 1, 3]] + ) + result = midx.intersection(midx2) + expected = MultiIndex.from_arrays([Series([2], dtype=any_numeric_ea_dtype), [1]]) + tm.assert_index_equal(result, expected) + + +def test_union_with_na_when_constructing_dataframe(): + # GH43222 + series1 = Series( + (1,), + index=MultiIndex.from_arrays( + [Series([None], dtype="str"), Series([None], dtype="str")] + ), + ) + series2 = Series((10, 20), index=MultiIndex.from_tuples(((None, None), ("a", "b")))) + result = DataFrame([series1, series2]) + expected = DataFrame({(np.nan, np.nan): [1.0, 10.0], ("a", "b"): [np.nan, 20.0]}) + tm.assert_frame_equal(result, expected) diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_sorting.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_sorting.py new file mode 100644 index 0000000000000000000000000000000000000000..b4dcef71dcf50724c90599b03d1c1c5aa99b7916 --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_sorting.py @@ -0,0 +1,349 @@ +import numpy as np +import pytest + +from pandas.errors import ( + PerformanceWarning, + UnsortedIndexError, +) + +from pandas import ( + CategoricalIndex, + DataFrame, + Index, + MultiIndex, + RangeIndex, + Series, + Timestamp, +) +import pandas._testing as tm +from pandas.core.indexes.frozen import FrozenList + + +def test_sortlevel(idx): + tuples = list(idx) + np.random.default_rng(2).shuffle(tuples) + + index = MultiIndex.from_tuples(tuples) + + sorted_idx, _ = index.sortlevel(0) + expected = MultiIndex.from_tuples(sorted(tuples)) + assert sorted_idx.equals(expected) + + sorted_idx, _ = index.sortlevel(0, ascending=False) + assert sorted_idx.equals(expected[::-1]) + + sorted_idx, _ = index.sortlevel(1) + by1 = sorted(tuples, key=lambda x: (x[1], x[0])) + expected = MultiIndex.from_tuples(by1) + assert sorted_idx.equals(expected) + + sorted_idx, _ = index.sortlevel(1, ascending=False) + assert sorted_idx.equals(expected[::-1]) + + +def test_sortlevel_not_sort_remaining(): + mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC")) + sorted_idx, _ = mi.sortlevel("A", sort_remaining=False) + assert sorted_idx.equals(mi) + + +def test_sortlevel_deterministic(): + tuples = [ + ("bar", "one"), + ("foo", "two"), + ("qux", "two"), + ("foo", "one"), + ("baz", "two"), + ("qux", "one"), + ] + + index = MultiIndex.from_tuples(tuples) + + sorted_idx, _ = index.sortlevel(0) + expected = MultiIndex.from_tuples(sorted(tuples)) + assert sorted_idx.equals(expected) + + sorted_idx, _ = index.sortlevel(0, ascending=False) + assert sorted_idx.equals(expected[::-1]) + + sorted_idx, _ = index.sortlevel(1) + by1 = sorted(tuples, key=lambda x: (x[1], x[0])) + expected = MultiIndex.from_tuples(by1) + assert sorted_idx.equals(expected) + + sorted_idx, _ = index.sortlevel(1, ascending=False) + assert sorted_idx.equals(expected[::-1]) + + +def test_sortlevel_na_position(): + # GH#51612 + midx = MultiIndex.from_tuples([(1, np.nan), (1, 1)]) + result = midx.sortlevel(level=[0, 1], na_position="last")[0] + expected = MultiIndex.from_tuples([(1, 1), (1, np.nan)]) + tm.assert_index_equal(result, expected) + + +def test_numpy_argsort(idx): + result = np.argsort(idx) + expected = idx.argsort() + tm.assert_numpy_array_equal(result, expected) + + # these are the only two types that perform + # pandas compatibility input validation - the + # rest already perform separate (or no) such + # validation via their 'values' attribute as + # defined in pandas.core.indexes/base.py - they + # cannot be changed at the moment due to + # backwards compatibility concerns + if isinstance(type(idx), (CategoricalIndex, RangeIndex)): + msg = "the 'axis' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.argsort(idx, axis=1) + + msg = "the 'kind' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.argsort(idx, kind="mergesort") + + msg = "the 'order' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.argsort(idx, order=("a", "b")) + + +def test_unsortedindex(): + # GH 11897 + mi = MultiIndex.from_tuples( + [("z", "a"), ("x", "a"), ("y", "b"), ("x", "b"), ("y", "a"), ("z", "b")], + names=["one", "two"], + ) + df = DataFrame([[i, 10 * i] for i in range(6)], index=mi, columns=["one", "two"]) + + # GH 16734: not sorted, but no real slicing + result = df.loc(axis=0)["z", "a"] + expected = df.iloc[0] + tm.assert_series_equal(result, expected) + + msg = ( + "MultiIndex slicing requires the index to be lexsorted: " + r"slicing on levels \[1\], lexsort depth 0" + ) + with pytest.raises(UnsortedIndexError, match=msg): + df.loc(axis=0)["z", slice("a")] + df.sort_index(inplace=True) + assert len(df.loc(axis=0)["z", :]) == 2 + + with pytest.raises(KeyError, match="'q'"): + df.loc(axis=0)["q", :] + + +def test_unsortedindex_doc_examples(): + # https://pandas.pydata.org/pandas-docs/stable/advanced.html#sorting-a-multiindex + dfm = DataFrame( + { + "jim": [0, 0, 1, 1], + "joe": ["x", "x", "z", "y"], + "jolie": np.random.default_rng(2).random(4), + } + ) + + dfm = dfm.set_index(["jim", "joe"]) + with tm.assert_produces_warning(PerformanceWarning): + dfm.loc[(1, "z")] + + msg = r"Key length \(2\) was greater than MultiIndex lexsort depth \(1\)" + with pytest.raises(UnsortedIndexError, match=msg): + dfm.loc[(0, "y"):(1, "z")] + + assert not dfm.index._is_lexsorted() + assert dfm.index._lexsort_depth == 1 + + # sort it + dfm = dfm.sort_index() + dfm.loc[(1, "z")] + dfm.loc[(0, "y"):(1, "z")] + + assert dfm.index._is_lexsorted() + assert dfm.index._lexsort_depth == 2 + + +def test_reconstruct_sort(): + # starts off lexsorted & monotonic + mi = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]]) + assert mi.is_monotonic_increasing + recons = mi._sort_levels_monotonic() + assert recons.is_monotonic_increasing + assert mi is recons + + assert mi.equals(recons) + assert Index(mi.values).equals(Index(recons.values)) + + # cannot convert to lexsorted + mi = MultiIndex.from_tuples( + [("z", "a"), ("x", "a"), ("y", "b"), ("x", "b"), ("y", "a"), ("z", "b")], + names=["one", "two"], + ) + assert not mi.is_monotonic_increasing + recons = mi._sort_levels_monotonic() + assert not recons.is_monotonic_increasing + assert mi.equals(recons) + assert Index(mi.values).equals(Index(recons.values)) + + # cannot convert to lexsorted + mi = MultiIndex( + levels=[["b", "d", "a"], [1, 2, 3]], + codes=[[0, 1, 0, 2], [2, 0, 0, 1]], + names=["col1", "col2"], + ) + assert not mi.is_monotonic_increasing + recons = mi._sort_levels_monotonic() + assert not recons.is_monotonic_increasing + assert mi.equals(recons) + assert Index(mi.values).equals(Index(recons.values)) + + +def test_reconstruct_remove_unused(): + # xref to GH 2770 + df = DataFrame( + [["deleteMe", 1, 9], ["keepMe", 2, 9], ["keepMeToo", 3, 9]], + columns=["first", "second", "third"], + ) + df2 = df.set_index(["first", "second"], drop=False) + df2 = df2[df2["first"] != "deleteMe"] + + # removed levels are there + expected = MultiIndex( + levels=[["deleteMe", "keepMe", "keepMeToo"], [1, 2, 3]], + codes=[[1, 2], [1, 2]], + names=["first", "second"], + ) + result = df2.index + tm.assert_index_equal(result, expected) + + expected = MultiIndex( + levels=[["keepMe", "keepMeToo"], [2, 3]], + codes=[[0, 1], [0, 1]], + names=["first", "second"], + ) + result = df2.index.remove_unused_levels() + tm.assert_index_equal(result, expected) + + # idempotent + result2 = result.remove_unused_levels() + tm.assert_index_equal(result2, expected) + assert result2.is_(result) + + +@pytest.mark.parametrize( + "first_type,second_type", [("int64", "int64"), ("datetime64[D]", "str")] +) +def test_remove_unused_levels_large(first_type, second_type): + # GH16556 + + # because tests should be deterministic (and this test in particular + # checks that levels are removed, which is not the case for every + # random input): + rng = np.random.default_rng(10) # seed is arbitrary value that works + + size = 1 << 16 + df = DataFrame( + { + "first": rng.integers(0, 1 << 13, size).astype(first_type), + "second": rng.integers(0, 1 << 10, size).astype(second_type), + "third": rng.random(size), + } + ) + df = df.groupby(["first", "second"]).sum() + df = df[df.third < 0.1] + + result = df.index.remove_unused_levels() + assert len(result.levels[0]) < len(df.index.levels[0]) + assert len(result.levels[1]) < len(df.index.levels[1]) + assert result.equals(df.index) + + expected = df.reset_index().set_index(["first", "second"]).index + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize("level0", [["a", "d", "b"], ["a", "d", "b", "unused"]]) +@pytest.mark.parametrize( + "level1", [["w", "x", "y", "z"], ["w", "x", "y", "z", "unused"]] +) +def test_remove_unused_nan(level0, level1): + # GH 18417 + mi = MultiIndex(levels=[level0, level1], codes=[[0, 2, -1, 1, -1], [0, 1, 2, 3, 2]]) + + result = mi.remove_unused_levels() + tm.assert_index_equal(result, mi) + for level in 0, 1: + assert "unused" not in result.levels[level] + + +def test_argsort(idx): + result = idx.argsort() + expected = idx.values.argsort() + tm.assert_numpy_array_equal(result, expected) + + +def test_remove_unused_levels_with_nan(): + # GH 37510 + idx = Index([(1, np.nan), (3, 4)]).rename(["id1", "id2"]) + idx = idx.set_levels(["a", np.nan], level="id1") + idx = idx.remove_unused_levels() + result = idx.levels + expected = FrozenList([["a", np.nan], [4]]) + assert str(result) == str(expected) + + +def test_sort_values_nan(): + # GH48495, GH48626 + midx = MultiIndex(levels=[["A", "B", "C"], ["D"]], codes=[[1, 0, 2], [-1, -1, 0]]) + result = midx.sort_values() + expected = MultiIndex( + levels=[["A", "B", "C"], ["D"]], codes=[[0, 1, 2], [-1, -1, 0]] + ) + tm.assert_index_equal(result, expected) + + +def test_sort_values_incomparable(): + # GH48495 + mi = MultiIndex.from_arrays( + [ + [1, Timestamp("2000-01-01")], + [3, 4], + ] + ) + match = "'<' not supported between instances of 'Timestamp' and 'int'" + with pytest.raises(TypeError, match=match): + mi.sort_values() + + +@pytest.mark.parametrize("na_position", ["first", "last"]) +@pytest.mark.parametrize("dtype", ["float64", "Int64", "Float64"]) +def test_sort_values_with_na_na_position(dtype, na_position): + # 51612 + arrays = [ + Series([1, 1, 2], dtype=dtype), + Series([1, None, 3], dtype=dtype), + ] + index = MultiIndex.from_arrays(arrays) + result = index.sort_values(na_position=na_position) + if na_position == "first": + arrays = [ + Series([1, 1, 2], dtype=dtype), + Series([None, 1, 3], dtype=dtype), + ] + else: + arrays = [ + Series([1, 1, 2], dtype=dtype), + Series([1, None, 3], dtype=dtype), + ] + expected = MultiIndex.from_arrays(arrays) + tm.assert_index_equal(result, expected) + + +def test_sort_unnecessary_warning(): + # GH#55386 + midx = MultiIndex.from_tuples([(1.5, 2), (3.5, 3), (0, 1)]) + midx = midx.set_levels([2.5, np.nan, 1], level=0) + result = midx.sort_values() + expected = MultiIndex.from_tuples([(1, 3), (2.5, 1), (np.nan, 2)]) + tm.assert_index_equal(result, expected) diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_take.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_take.py new file mode 100644 index 0000000000000000000000000000000000000000..543cba25c373b71b8c79c7fe0ea5ae2fb7f40b18 --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/multi/test_take.py @@ -0,0 +1,78 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +def test_take(idx): + indexer = [4, 3, 0, 2] + result = idx.take(indexer) + expected = idx[indexer] + assert result.equals(expected) + + # GH 10791 + msg = "'MultiIndex' object has no attribute 'freq'" + with pytest.raises(AttributeError, match=msg): + idx.freq + + +def test_take_invalid_kwargs(idx): + indices = [1, 2] + + msg = r"take\(\) got an unexpected keyword argument 'foo'" + with pytest.raises(TypeError, match=msg): + idx.take(indices, foo=2) + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + idx.take(indices, out=indices) + + msg = "the 'mode' parameter is not supported" + with pytest.raises(ValueError, match=msg): + idx.take(indices, mode="clip") + + +def test_take_fill_value(): + # GH 12631 + vals = [["A", "B"], [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]] + idx = pd.MultiIndex.from_product(vals, names=["str", "dt"]) + + result = idx.take(np.array([1, 0, -1])) + exp_vals = [ + ("A", pd.Timestamp("2011-01-02")), + ("A", pd.Timestamp("2011-01-01")), + ("B", pd.Timestamp("2011-01-02")), + ] + expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"]) + tm.assert_index_equal(result, expected) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + exp_vals = [ + ("A", pd.Timestamp("2011-01-02")), + ("A", pd.Timestamp("2011-01-01")), + (np.nan, pd.NaT), + ] + expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"]) + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + exp_vals = [ + ("A", pd.Timestamp("2011-01-02")), + ("A", pd.Timestamp("2011-01-01")), + ("B", pd.Timestamp("2011-01-02")), + ] + expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"]) + tm.assert_index_equal(result, expected) + + msg = "When allow_fill=True and fill_value is not None, all indices must be >= -1" + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + msg = "index -5 is out of bounds for( axis 0 with)? size 4" + with pytest.raises(IndexError, match=msg): + idx.take(np.array([1, -5])) diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/numeric/__init__.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/numeric/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/numeric/test_astype.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/numeric/test_astype.py new file mode 100644 index 0000000000000000000000000000000000000000..1c2df6008de5d85789b026e947ac27a8036a9be7 --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/numeric/test_astype.py @@ -0,0 +1,95 @@ +import numpy as np +import pytest + +from pandas import ( + Index, + to_datetime, + to_timedelta, +) +import pandas._testing as tm + + +class TestAstype: + def test_astype_float64_to_uint64(self): + # GH#45309 used to incorrectly return Index with int64 dtype + idx = Index([0.0, 5.0, 10.0, 15.0, 20.0], dtype=np.float64) + result = idx.astype("u8") + expected = Index([0, 5, 10, 15, 20], dtype=np.uint64) + tm.assert_index_equal(result, expected, exact=True) + + idx_with_negatives = idx - 10 + with pytest.raises(ValueError, match="losslessly"): + idx_with_negatives.astype(np.uint64) + + def test_astype_float64_to_object(self): + float_index = Index([0.0, 2.5, 5.0, 7.5, 10.0], dtype=np.float64) + result = float_index.astype(object) + assert result.equals(float_index) + assert float_index.equals(result) + assert isinstance(result, Index) and result.dtype == object + + def test_astype_float64_mixed_to_object(self): + # mixed int-float + idx = Index([1.5, 2, 3, 4, 5], dtype=np.float64) + idx.name = "foo" + result = idx.astype(object) + assert result.equals(idx) + assert idx.equals(result) + assert isinstance(result, Index) and result.dtype == object + + @pytest.mark.parametrize("dtype", ["int16", "int32", "int64"]) + def test_astype_float64_to_int_dtype(self, dtype): + # GH#12881 + # a float astype int + idx = Index([0, 1, 2], dtype=np.float64) + result = idx.astype(dtype) + expected = Index([0, 1, 2], dtype=dtype) + tm.assert_index_equal(result, expected, exact=True) + + idx = Index([0, 1.1, 2], dtype=np.float64) + result = idx.astype(dtype) + expected = Index([0, 1, 2], dtype=dtype) + tm.assert_index_equal(result, expected, exact=True) + + @pytest.mark.parametrize("dtype", ["float32", "float64"]) + def test_astype_float64_to_float_dtype(self, dtype): + # GH#12881 + # a float astype int + idx = Index([0, 1, 2], dtype=np.float64) + result = idx.astype(dtype) + assert isinstance(result, Index) and result.dtype == dtype + + @pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"]) + def test_astype_float_to_datetimelike(self, dtype): + # GH#49660 pre-2.0 Index.astype from floating to M8/m8/Period raised, + # inconsistent with Series.astype + idx = Index([0, 1.1, 2], dtype=np.float64) + + result = idx.astype(dtype) + if dtype[0] == "M": + expected = to_datetime(idx.values) + else: + expected = to_timedelta(idx.values) + tm.assert_index_equal(result, expected) + + # check that we match Series behavior + result = idx.to_series().set_axis(range(3)).astype(dtype) + expected = expected.to_series().set_axis(range(3)) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("dtype", [int, "int16", "int32", "int64"]) + @pytest.mark.parametrize("non_finite", [np.inf, np.nan]) + def test_cannot_cast_inf_to_int(self, non_finite, dtype): + # GH#13149 + idx = Index([1, 2, non_finite], dtype=np.float64) + + msg = r"Cannot convert non-finite values \(NA or inf\) to integer" + with pytest.raises(ValueError, match=msg): + idx.astype(dtype) + + def test_astype_from_object(self): + index = Index([1.0, np.nan, 0.2], dtype="object") + result = index.astype(float) + expected = Index([1.0, np.nan, 0.2], dtype=np.float64) + assert result.dtype == expected.dtype + tm.assert_index_equal(result, expected) diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/numeric/test_indexing.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/numeric/test_indexing.py new file mode 100644 index 0000000000000000000000000000000000000000..cd28d519313ed36228040361dfbb2a8dccf77be5 --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/numeric/test_indexing.py @@ -0,0 +1,611 @@ +import numpy as np +import pytest + +from pandas.errors import InvalidIndexError + +from pandas import ( + NA, + Index, + RangeIndex, + Series, + Timestamp, +) +import pandas._testing as tm +from pandas.core.arrays import ( + ArrowExtensionArray, + FloatingArray, +) + + +@pytest.fixture +def index_large(): + # large values used in Index[uint64] tests where no compat needed with Int64/Float64 + large = [2**63, 2**63 + 10, 2**63 + 15, 2**63 + 20, 2**63 + 25] + return Index(large, dtype=np.uint64) + + +class TestGetLoc: + def test_get_loc(self): + index = Index([0, 1, 2]) + assert index.get_loc(1) == 1 + + def test_get_loc_raises_bad_label(self): + index = Index([0, 1, 2]) + with pytest.raises(InvalidIndexError, match=r"\[1, 2\]"): + index.get_loc([1, 2]) + + def test_get_loc_float64(self): + idx = Index([0.0, 1.0, 2.0], dtype=np.float64) + + with pytest.raises(KeyError, match="^'foo'$"): + idx.get_loc("foo") + with pytest.raises(KeyError, match=r"^1\.5$"): + idx.get_loc(1.5) + with pytest.raises(KeyError, match="^True$"): + idx.get_loc(True) + with pytest.raises(KeyError, match="^False$"): + idx.get_loc(False) + + def test_get_loc_na(self): + idx = Index([np.nan, 1, 2], dtype=np.float64) + assert idx.get_loc(1) == 1 + assert idx.get_loc(np.nan) == 0 + + idx = Index([np.nan, 1, np.nan], dtype=np.float64) + assert idx.get_loc(1) == 1 + + # representable by slice [0:2:2] + msg = "'Cannot get left slice bound for non-unique label: nan'" + with pytest.raises(KeyError, match=msg): + idx.slice_locs(np.nan) + # not representable by slice + idx = Index([np.nan, 1, np.nan, np.nan], dtype=np.float64) + assert idx.get_loc(1) == 1 + msg = "'Cannot get left slice bound for non-unique label: nan" + with pytest.raises(KeyError, match=msg): + idx.slice_locs(np.nan) + + def test_get_loc_missing_nan(self): + # GH#8569 + idx = Index([1, 2], dtype=np.float64) + assert idx.get_loc(1) == 0 + with pytest.raises(KeyError, match=r"^3$"): + idx.get_loc(3) + with pytest.raises(KeyError, match="^nan$"): + idx.get_loc(np.nan) + with pytest.raises(InvalidIndexError, match=r"\[nan\]"): + # listlike/non-hashable raises TypeError + idx.get_loc([np.nan]) + + @pytest.mark.parametrize("vals", [[1], [1.0], [Timestamp("2019-12-31")], ["test"]]) + def test_get_loc_float_index_nan_with_method(self, vals): + # GH#39382 + idx = Index(vals) + with pytest.raises(KeyError, match="nan"): + idx.get_loc(np.nan) + + @pytest.mark.parametrize("dtype", ["f8", "i8", "u8"]) + def test_get_loc_numericindex_none_raises(self, dtype): + # case that goes through searchsorted and key is non-comparable to values + arr = np.arange(10**7, dtype=dtype) + idx = Index(arr) + with pytest.raises(KeyError, match="None"): + idx.get_loc(None) + + def test_get_loc_overflows(self): + # unique but non-monotonic goes through IndexEngine.mapping.get_item + idx = Index([0, 2, 1]) + + val = np.iinfo(np.int64).max + 1 + + with pytest.raises(KeyError, match=str(val)): + idx.get_loc(val) + with pytest.raises(KeyError, match=str(val)): + idx._engine.get_loc(val) + + +class TestGetIndexer: + def test_get_indexer(self): + index1 = Index([1, 2, 3, 4, 5]) + index2 = Index([2, 4, 6]) + + r1 = index1.get_indexer(index2) + e1 = np.array([1, 3, -1], dtype=np.intp) + tm.assert_almost_equal(r1, e1) + + @pytest.mark.parametrize("reverse", [True, False]) + @pytest.mark.parametrize( + "expected,method", + [ + (np.array([-1, 0, 0, 1, 1], dtype=np.intp), "pad"), + (np.array([-1, 0, 0, 1, 1], dtype=np.intp), "ffill"), + (np.array([0, 0, 1, 1, 2], dtype=np.intp), "backfill"), + (np.array([0, 0, 1, 1, 2], dtype=np.intp), "bfill"), + ], + ) + def test_get_indexer_methods(self, reverse, expected, method): + index1 = Index([1, 2, 3, 4, 5]) + index2 = Index([2, 4, 6]) + + if reverse: + index1 = index1[::-1] + expected = expected[::-1] + + result = index2.get_indexer(index1, method=method) + tm.assert_almost_equal(result, expected) + + def test_get_indexer_invalid(self): + # GH10411 + index = Index(np.arange(10)) + + with pytest.raises(ValueError, match="tolerance argument"): + index.get_indexer([1, 0], tolerance=1) + + with pytest.raises(ValueError, match="limit argument"): + index.get_indexer([1, 0], limit=1) + + @pytest.mark.parametrize( + "method, tolerance, indexer, expected", + [ + ("pad", None, [0, 5, 9], [0, 5, 9]), + ("backfill", None, [0, 5, 9], [0, 5, 9]), + ("nearest", None, [0, 5, 9], [0, 5, 9]), + ("pad", 0, [0, 5, 9], [0, 5, 9]), + ("backfill", 0, [0, 5, 9], [0, 5, 9]), + ("nearest", 0, [0, 5, 9], [0, 5, 9]), + ("pad", None, [0.2, 1.8, 8.5], [0, 1, 8]), + ("backfill", None, [0.2, 1.8, 8.5], [1, 2, 9]), + ("nearest", None, [0.2, 1.8, 8.5], [0, 2, 9]), + ("pad", 1, [0.2, 1.8, 8.5], [0, 1, 8]), + ("backfill", 1, [0.2, 1.8, 8.5], [1, 2, 9]), + ("nearest", 1, [0.2, 1.8, 8.5], [0, 2, 9]), + ("pad", 0.2, [0.2, 1.8, 8.5], [0, -1, -1]), + ("backfill", 0.2, [0.2, 1.8, 8.5], [-1, 2, -1]), + ("nearest", 0.2, [0.2, 1.8, 8.5], [0, 2, -1]), + ], + ) + def test_get_indexer_nearest(self, method, tolerance, indexer, expected): + index = Index(np.arange(10)) + + actual = index.get_indexer(indexer, method=method, tolerance=tolerance) + tm.assert_numpy_array_equal(actual, np.array(expected, dtype=np.intp)) + + @pytest.mark.parametrize("listtype", [list, tuple, Series, np.array]) + @pytest.mark.parametrize( + "tolerance, expected", + list( + zip( + [[0.3, 0.3, 0.1], [0.2, 0.1, 0.1], [0.1, 0.5, 0.5]], + [[0, 2, -1], [0, -1, -1], [-1, 2, 9]], + ) + ), + ) + def test_get_indexer_nearest_listlike_tolerance( + self, tolerance, expected, listtype + ): + index = Index(np.arange(10)) + + actual = index.get_indexer( + [0.2, 1.8, 8.5], method="nearest", tolerance=listtype(tolerance) + ) + tm.assert_numpy_array_equal(actual, np.array(expected, dtype=np.intp)) + + def test_get_indexer_nearest_error(self): + index = Index(np.arange(10)) + with pytest.raises(ValueError, match="limit argument"): + index.get_indexer([1, 0], method="nearest", limit=1) + + with pytest.raises(ValueError, match="tolerance size must match"): + index.get_indexer([1, 0], method="nearest", tolerance=[1, 2, 3]) + + @pytest.mark.parametrize( + "method,expected", + [("pad", [8, 7, 0]), ("backfill", [9, 8, 1]), ("nearest", [9, 7, 0])], + ) + def test_get_indexer_nearest_decreasing(self, method, expected): + index = Index(np.arange(10))[::-1] + + actual = index.get_indexer([0, 5, 9], method=method) + tm.assert_numpy_array_equal(actual, np.array([9, 4, 0], dtype=np.intp)) + + actual = index.get_indexer([0.2, 1.8, 8.5], method=method) + tm.assert_numpy_array_equal(actual, np.array(expected, dtype=np.intp)) + + @pytest.mark.parametrize("idx_dtype", ["int64", "float64", "uint64", "range"]) + @pytest.mark.parametrize("method", ["get_indexer", "get_indexer_non_unique"]) + def test_get_indexer_numeric_index_boolean_target(self, method, idx_dtype): + # GH 16877 + + if idx_dtype == "range": + numeric_index = RangeIndex(4) + else: + numeric_index = Index(np.arange(4, dtype=idx_dtype)) + + other = Index([True, False, True]) + + result = getattr(numeric_index, method)(other) + expected = np.array([-1, -1, -1], dtype=np.intp) + if method == "get_indexer": + tm.assert_numpy_array_equal(result, expected) + else: + missing = np.arange(3, dtype=np.intp) + tm.assert_numpy_array_equal(result[0], expected) + tm.assert_numpy_array_equal(result[1], missing) + + @pytest.mark.parametrize("method", ["pad", "backfill", "nearest"]) + def test_get_indexer_with_method_numeric_vs_bool(self, method): + left = Index([1, 2, 3]) + right = Index([True, False]) + + with pytest.raises(TypeError, match="Cannot compare"): + left.get_indexer(right, method=method) + + with pytest.raises(TypeError, match="Cannot compare"): + right.get_indexer(left, method=method) + + def test_get_indexer_numeric_vs_bool(self): + left = Index([1, 2, 3]) + right = Index([True, False]) + + res = left.get_indexer(right) + expected = -1 * np.ones(len(right), dtype=np.intp) + tm.assert_numpy_array_equal(res, expected) + + res = right.get_indexer(left) + expected = -1 * np.ones(len(left), dtype=np.intp) + tm.assert_numpy_array_equal(res, expected) + + res = left.get_indexer_non_unique(right)[0] + expected = -1 * np.ones(len(right), dtype=np.intp) + tm.assert_numpy_array_equal(res, expected) + + res = right.get_indexer_non_unique(left)[0] + expected = -1 * np.ones(len(left), dtype=np.intp) + tm.assert_numpy_array_equal(res, expected) + + def test_get_indexer_float64(self): + idx = Index([0.0, 1.0, 2.0], dtype=np.float64) + tm.assert_numpy_array_equal( + idx.get_indexer(idx), np.array([0, 1, 2], dtype=np.intp) + ) + + target = [-0.1, 0.5, 1.1] + tm.assert_numpy_array_equal( + idx.get_indexer(target, "pad"), np.array([-1, 0, 1], dtype=np.intp) + ) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "backfill"), np.array([0, 1, 2], dtype=np.intp) + ) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "nearest"), np.array([0, 1, 1], dtype=np.intp) + ) + + def test_get_indexer_nan(self): + # GH#7820 + result = Index([1, 2, np.nan], dtype=np.float64).get_indexer([np.nan]) + expected = np.array([2], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + def test_get_indexer_int64(self): + index = Index(range(0, 20, 2), dtype=np.int64) + target = Index(np.arange(10), dtype=np.int64) + indexer = index.get_indexer(target) + expected = np.array([0, -1, 1, -1, 2, -1, 3, -1, 4, -1], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected) + + target = Index(np.arange(10), dtype=np.int64) + indexer = index.get_indexer(target, method="pad") + expected = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected) + + target = Index(np.arange(10), dtype=np.int64) + indexer = index.get_indexer(target, method="backfill") + expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected) + + def test_get_indexer_uint64(self, index_large): + target = Index(np.arange(10).astype("uint64") * 5 + 2**63) + indexer = index_large.get_indexer(target) + expected = np.array([0, -1, 1, 2, 3, 4, -1, -1, -1, -1], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected) + + target = Index(np.arange(10).astype("uint64") * 5 + 2**63) + indexer = index_large.get_indexer(target, method="pad") + expected = np.array([0, 0, 1, 2, 3, 4, 4, 4, 4, 4], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected) + + target = Index(np.arange(10).astype("uint64") * 5 + 2**63) + indexer = index_large.get_indexer(target, method="backfill") + expected = np.array([0, 1, 1, 2, 3, 4, -1, -1, -1, -1], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected) + + @pytest.mark.parametrize("val, val2", [(4, 5), (4, 4), (4, NA), (NA, NA)]) + def test_get_loc_masked(self, val, val2, any_numeric_ea_and_arrow_dtype): + # GH#39133 + idx = Index([1, 2, 3, val, val2], dtype=any_numeric_ea_and_arrow_dtype) + result = idx.get_loc(2) + assert result == 1 + + with pytest.raises(KeyError, match="9"): + idx.get_loc(9) + + def test_get_loc_masked_na(self, any_numeric_ea_and_arrow_dtype): + # GH#39133 + idx = Index([1, 2, NA], dtype=any_numeric_ea_and_arrow_dtype) + result = idx.get_loc(NA) + assert result == 2 + + idx = Index([1, 2, NA, NA], dtype=any_numeric_ea_and_arrow_dtype) + result = idx.get_loc(NA) + tm.assert_numpy_array_equal(result, np.array([False, False, True, True])) + + idx = Index([1, 2, 3], dtype=any_numeric_ea_and_arrow_dtype) + with pytest.raises(KeyError, match="NA"): + idx.get_loc(NA) + + def test_get_loc_masked_na_and_nan(self): + # GH#39133 + idx = Index( + FloatingArray( + np.array([1, 2, 1, np.nan]), mask=np.array([False, False, True, False]) + ) + ) + result = idx.get_loc(NA) + assert result == 2 + result = idx.get_loc(np.nan) + assert result == 3 + + idx = Index( + FloatingArray(np.array([1, 2, 1.0]), mask=np.array([False, False, True])) + ) + result = idx.get_loc(NA) + assert result == 2 + with pytest.raises(KeyError, match="nan"): + idx.get_loc(np.nan) + + idx = Index( + FloatingArray( + np.array([1, 2, np.nan]), mask=np.array([False, False, False]) + ) + ) + result = idx.get_loc(np.nan) + assert result == 2 + with pytest.raises(KeyError, match="NA"): + idx.get_loc(NA) + + @pytest.mark.parametrize("val", [4, 2]) + def test_get_indexer_masked_na(self, any_numeric_ea_and_arrow_dtype, val): + # GH#39133 + idx = Index([1, 2, NA, 3, val], dtype=any_numeric_ea_and_arrow_dtype) + result = idx.get_indexer_for([1, NA, 5]) + expected = np.array([0, 2, -1]) + tm.assert_numpy_array_equal(result, expected, check_dtype=False) + + @pytest.mark.parametrize("dtype", ["boolean", "bool[pyarrow]"]) + def test_get_indexer_masked_na_boolean(self, dtype): + # GH#39133 + if dtype == "bool[pyarrow]": + pytest.importorskip("pyarrow") + idx = Index([True, False, NA], dtype=dtype) + result = idx.get_loc(False) + assert result == 1 + result = idx.get_loc(NA) + assert result == 2 + + def test_get_indexer_arrow_dictionary_target(self): + pa = pytest.importorskip("pyarrow") + target = Index( + ArrowExtensionArray( + pa.array([1, 2], type=pa.dictionary(pa.int8(), pa.int8())) + ) + ) + idx = Index([1]) + + result = idx.get_indexer(target) + expected = np.array([0, -1], dtype=np.int64) + tm.assert_numpy_array_equal(result, expected) + + result_1, result_2 = idx.get_indexer_non_unique(target) + expected_1, expected_2 = np.array([0, -1], dtype=np.int64), np.array( + [1], dtype=np.int64 + ) + tm.assert_numpy_array_equal(result_1, expected_1) + tm.assert_numpy_array_equal(result_2, expected_2) + + +class TestWhere: + @pytest.mark.parametrize( + "index", + [ + Index(np.arange(5, dtype="float64")), + Index(range(0, 20, 2), dtype=np.int64), + Index(np.arange(5, dtype="uint64")), + ], + ) + def test_where(self, listlike_box, index): + cond = [True] * len(index) + expected = index + result = index.where(listlike_box(cond)) + + cond = [False] + [True] * (len(index) - 1) + expected = Index([index._na_value] + index[1:].tolist(), dtype=np.float64) + result = index.where(listlike_box(cond)) + tm.assert_index_equal(result, expected) + + def test_where_uint64(self): + idx = Index([0, 6, 2], dtype=np.uint64) + mask = np.array([False, True, False]) + other = np.array([1], dtype=np.int64) + + expected = Index([1, 6, 1], dtype=np.uint64) + + result = idx.where(mask, other) + tm.assert_index_equal(result, expected) + + result = idx.putmask(~mask, other) + tm.assert_index_equal(result, expected) + + def test_where_infers_type_instead_of_trying_to_convert_string_to_float(self): + # GH 32413 + index = Index([1, np.nan]) + cond = index.notna() + other = Index(["a", "b"], dtype="string") + + expected = Index([1.0, "b"]) + result = index.where(cond, other) + + tm.assert_index_equal(result, expected) + + +class TestTake: + @pytest.mark.parametrize("idx_dtype", [np.float64, np.int64, np.uint64]) + def test_take_preserve_name(self, idx_dtype): + index = Index([1, 2, 3, 4], dtype=idx_dtype, name="foo") + taken = index.take([3, 0, 1]) + assert index.name == taken.name + + def test_take_fill_value_float64(self): + # GH 12631 + idx = Index([1.0, 2.0, 3.0], name="xxx", dtype=np.float64) + result = idx.take(np.array([1, 0, -1])) + expected = Index([2.0, 1.0, 3.0], dtype=np.float64, name="xxx") + tm.assert_index_equal(result, expected) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = Index([2.0, 1.0, np.nan], dtype=np.float64, name="xxx") + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = Index([2.0, 1.0, 3.0], dtype=np.float64, name="xxx") + tm.assert_index_equal(result, expected) + + msg = ( + "When allow_fill=True and fill_value is not None, " + "all indices must be >= -1" + ) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + msg = "index -5 is out of bounds for (axis 0 with )?size 3" + with pytest.raises(IndexError, match=msg): + idx.take(np.array([1, -5])) + + @pytest.mark.parametrize("dtype", [np.int64, np.uint64]) + def test_take_fill_value_ints(self, dtype): + # see gh-12631 + idx = Index([1, 2, 3], dtype=dtype, name="xxx") + result = idx.take(np.array([1, 0, -1])) + expected = Index([2, 1, 3], dtype=dtype, name="xxx") + tm.assert_index_equal(result, expected) + + name = type(idx).__name__ + msg = f"Unable to fill values because {name} cannot contain NA" + + # fill_value=True + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -1]), fill_value=True) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = Index([2, 1, 3], dtype=dtype, name="xxx") + tm.assert_index_equal(result, expected) + + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + msg = "index -5 is out of bounds for (axis 0 with )?size 3" + with pytest.raises(IndexError, match=msg): + idx.take(np.array([1, -5])) + + +class TestContains: + @pytest.mark.parametrize("dtype", [np.float64, np.int64, np.uint64]) + def test_contains_none(self, dtype): + # GH#35788 should return False, not raise TypeError + index = Index([0, 1, 2, 3, 4], dtype=dtype) + assert None not in index + + def test_contains_float64_nans(self): + index = Index([1.0, 2.0, np.nan], dtype=np.float64) + assert np.nan in index + + def test_contains_float64_not_nans(self): + index = Index([1.0, 2.0, np.nan], dtype=np.float64) + assert 1.0 in index + + +class TestSliceLocs: + @pytest.mark.parametrize("dtype", [int, float]) + def test_slice_locs(self, dtype): + index = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=dtype)) + n = len(index) + + assert index.slice_locs(start=2) == (2, n) + assert index.slice_locs(start=3) == (3, n) + assert index.slice_locs(3, 8) == (3, 6) + assert index.slice_locs(5, 10) == (3, n) + assert index.slice_locs(end=8) == (0, 6) + assert index.slice_locs(end=9) == (0, 7) + + # reversed + index2 = index[::-1] + assert index2.slice_locs(8, 2) == (2, 6) + assert index2.slice_locs(7, 3) == (2, 5) + + @pytest.mark.parametrize("dtype", [int, float]) + def test_slice_locs_float_locs(self, dtype): + index = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=dtype)) + n = len(index) + assert index.slice_locs(5.0, 10.0) == (3, n) + assert index.slice_locs(4.5, 10.5) == (3, 8) + + index2 = index[::-1] + assert index2.slice_locs(8.5, 1.5) == (2, 6) + assert index2.slice_locs(10.5, -1) == (0, n) + + @pytest.mark.parametrize("dtype", [int, float]) + def test_slice_locs_dup_numeric(self, dtype): + index = Index(np.array([10, 12, 12, 14], dtype=dtype)) + assert index.slice_locs(12, 12) == (1, 3) + assert index.slice_locs(11, 13) == (1, 3) + + index2 = index[::-1] + assert index2.slice_locs(12, 12) == (1, 3) + assert index2.slice_locs(13, 11) == (1, 3) + + def test_slice_locs_na(self): + index = Index([np.nan, 1, 2]) + assert index.slice_locs(1) == (1, 3) + assert index.slice_locs(np.nan) == (0, 3) + + index = Index([0, np.nan, np.nan, 1, 2]) + assert index.slice_locs(np.nan) == (1, 5) + + def test_slice_locs_na_raises(self): + index = Index([np.nan, 1, 2]) + with pytest.raises(KeyError, match=""): + index.slice_locs(start=1.5) + + with pytest.raises(KeyError, match=""): + index.slice_locs(end=1.5) + + +class TestGetSliceBounds: + @pytest.mark.parametrize("side, expected", [("left", 4), ("right", 5)]) + def test_get_slice_bounds_within(self, side, expected): + index = Index(range(6)) + result = index.get_slice_bound(4, side=side) + assert result == expected + + @pytest.mark.parametrize("side", ["left", "right"]) + @pytest.mark.parametrize("bound, expected", [(-1, 0), (10, 6)]) + def test_get_slice_bounds_outside(self, side, expected, bound): + index = Index(range(6)) + result = index.get_slice_bound(bound, side=side) + assert result == expected diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/numeric/test_join.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/numeric/test_join.py new file mode 100644 index 0000000000000000000000000000000000000000..918d5052167356b1d51018434c03e6682f828872 --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/numeric/test_join.py @@ -0,0 +1,380 @@ +import numpy as np +import pytest + +import pandas._testing as tm +from pandas.core.indexes.api import Index + + +class TestJoinInt64Index: + def test_join_non_unique(self): + left = Index([4, 4, 3, 3]) + + joined, lidx, ridx = left.join(left, return_indexers=True) + + exp_joined = Index([4, 4, 4, 4, 3, 3, 3, 3]) + tm.assert_index_equal(joined, exp_joined) + + exp_lidx = np.array([0, 0, 1, 1, 2, 2, 3, 3], dtype=np.intp) + tm.assert_numpy_array_equal(lidx, exp_lidx) + + exp_ridx = np.array([0, 1, 0, 1, 2, 3, 2, 3], dtype=np.intp) + tm.assert_numpy_array_equal(ridx, exp_ridx) + + def test_join_inner(self): + index = Index(range(0, 20, 2), dtype=np.int64) + other = Index([7, 12, 25, 1, 2, 5], dtype=np.int64) + other_mono = Index([1, 2, 5, 7, 12, 25], dtype=np.int64) + + # not monotonic + res, lidx, ridx = index.join(other, how="inner", return_indexers=True) + + # no guarantee of sortedness, so sort for comparison purposes + ind = res.argsort() + res = res.take(ind) + lidx = lidx.take(ind) + ridx = ridx.take(ind) + + eres = Index([2, 12], dtype=np.int64) + elidx = np.array([1, 6], dtype=np.intp) + eridx = np.array([4, 1], dtype=np.intp) + + assert isinstance(res, Index) and res.dtype == np.int64 + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + # monotonic + res, lidx, ridx = index.join(other_mono, how="inner", return_indexers=True) + + res2 = index.intersection(other_mono) + tm.assert_index_equal(res, res2) + + elidx = np.array([1, 6], dtype=np.intp) + eridx = np.array([1, 4], dtype=np.intp) + assert isinstance(res, Index) and res.dtype == np.int64 + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_left(self): + index = Index(range(0, 20, 2), dtype=np.int64) + other = Index([7, 12, 25, 1, 2, 5], dtype=np.int64) + other_mono = Index([1, 2, 5, 7, 12, 25], dtype=np.int64) + + # not monotonic + res, lidx, ridx = index.join(other, how="left", return_indexers=True) + eres = index + eridx = np.array([-1, 4, -1, -1, -1, -1, 1, -1, -1, -1], dtype=np.intp) + + assert isinstance(res, Index) and res.dtype == np.int64 + tm.assert_index_equal(res, eres) + assert lidx is None + tm.assert_numpy_array_equal(ridx, eridx) + + # monotonic + res, lidx, ridx = index.join(other_mono, how="left", return_indexers=True) + eridx = np.array([-1, 1, -1, -1, -1, -1, 4, -1, -1, -1], dtype=np.intp) + assert isinstance(res, Index) and res.dtype == np.int64 + tm.assert_index_equal(res, eres) + assert lidx is None + tm.assert_numpy_array_equal(ridx, eridx) + + # non-unique + idx = Index([1, 1, 2, 5]) + idx2 = Index([1, 2, 5, 7, 9]) + res, lidx, ridx = idx2.join(idx, how="left", return_indexers=True) + eres = Index([1, 1, 2, 5, 7, 9]) # 1 is in idx2, so it should be x2 + eridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp) + elidx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_right(self): + index = Index(range(0, 20, 2), dtype=np.int64) + other = Index([7, 12, 25, 1, 2, 5], dtype=np.int64) + other_mono = Index([1, 2, 5, 7, 12, 25], dtype=np.int64) + + # not monotonic + res, lidx, ridx = index.join(other, how="right", return_indexers=True) + eres = other + elidx = np.array([-1, 6, -1, -1, 1, -1], dtype=np.intp) + + assert isinstance(other, Index) and other.dtype == np.int64 + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + assert ridx is None + + # monotonic + res, lidx, ridx = index.join(other_mono, how="right", return_indexers=True) + eres = other_mono + elidx = np.array([-1, 1, -1, -1, 6, -1], dtype=np.intp) + assert isinstance(other, Index) and other.dtype == np.int64 + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + assert ridx is None + + # non-unique + idx = Index([1, 1, 2, 5]) + idx2 = Index([1, 2, 5, 7, 9]) + res, lidx, ridx = idx.join(idx2, how="right", return_indexers=True) + eres = Index([1, 1, 2, 5, 7, 9]) # 1 is in idx2, so it should be x2 + elidx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp) + eridx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_non_int_index(self): + index = Index(range(0, 20, 2), dtype=np.int64) + other = Index([3, 6, 7, 8, 10], dtype=object) + + outer = index.join(other, how="outer") + outer2 = other.join(index, how="outer") + expected = Index([0, 2, 3, 4, 6, 7, 8, 10, 12, 14, 16, 18]) + tm.assert_index_equal(outer, outer2) + tm.assert_index_equal(outer, expected) + + inner = index.join(other, how="inner") + inner2 = other.join(index, how="inner") + expected = Index([6, 8, 10]) + tm.assert_index_equal(inner, inner2) + tm.assert_index_equal(inner, expected) + + left = index.join(other, how="left") + tm.assert_index_equal(left, index.astype(object)) + + left2 = other.join(index, how="left") + tm.assert_index_equal(left2, other) + + right = index.join(other, how="right") + tm.assert_index_equal(right, other) + + right2 = other.join(index, how="right") + tm.assert_index_equal(right2, index.astype(object)) + + def test_join_outer(self): + index = Index(range(0, 20, 2), dtype=np.int64) + other = Index([7, 12, 25, 1, 2, 5], dtype=np.int64) + other_mono = Index([1, 2, 5, 7, 12, 25], dtype=np.int64) + + # not monotonic + # guarantee of sortedness + res, lidx, ridx = index.join(other, how="outer", return_indexers=True) + noidx_res = index.join(other, how="outer") + tm.assert_index_equal(res, noidx_res) + + eres = Index([0, 1, 2, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 25], dtype=np.int64) + elidx = np.array([0, -1, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, 9, -1], dtype=np.intp) + eridx = np.array( + [-1, 3, 4, -1, 5, -1, 0, -1, -1, 1, -1, -1, -1, 2], dtype=np.intp + ) + + assert isinstance(res, Index) and res.dtype == np.int64 + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + # monotonic + res, lidx, ridx = index.join(other_mono, how="outer", return_indexers=True) + noidx_res = index.join(other_mono, how="outer") + tm.assert_index_equal(res, noidx_res) + + elidx = np.array([0, -1, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, 9, -1], dtype=np.intp) + eridx = np.array( + [-1, 0, 1, -1, 2, -1, 3, -1, -1, 4, -1, -1, -1, 5], dtype=np.intp + ) + assert isinstance(res, Index) and res.dtype == np.int64 + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + +class TestJoinUInt64Index: + @pytest.fixture + def index_large(self): + # large values used in TestUInt64Index where no compat needed with int64/float64 + large = [2**63, 2**63 + 10, 2**63 + 15, 2**63 + 20, 2**63 + 25] + return Index(large, dtype=np.uint64) + + def test_join_inner(self, index_large): + other = Index(2**63 + np.array([7, 12, 25, 1, 2, 10], dtype="uint64")) + other_mono = Index(2**63 + np.array([1, 2, 7, 10, 12, 25], dtype="uint64")) + + # not monotonic + res, lidx, ridx = index_large.join(other, how="inner", return_indexers=True) + + # no guarantee of sortedness, so sort for comparison purposes + ind = res.argsort() + res = res.take(ind) + lidx = lidx.take(ind) + ridx = ridx.take(ind) + + eres = Index(2**63 + np.array([10, 25], dtype="uint64")) + elidx = np.array([1, 4], dtype=np.intp) + eridx = np.array([5, 2], dtype=np.intp) + + assert isinstance(res, Index) and res.dtype == np.uint64 + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + # monotonic + res, lidx, ridx = index_large.join( + other_mono, how="inner", return_indexers=True + ) + + res2 = index_large.intersection(other_mono) + tm.assert_index_equal(res, res2) + + elidx = np.array([1, 4], dtype=np.intp) + eridx = np.array([3, 5], dtype=np.intp) + + assert isinstance(res, Index) and res.dtype == np.uint64 + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_left(self, index_large): + other = Index(2**63 + np.array([7, 12, 25, 1, 2, 10], dtype="uint64")) + other_mono = Index(2**63 + np.array([1, 2, 7, 10, 12, 25], dtype="uint64")) + + # not monotonic + res, lidx, ridx = index_large.join(other, how="left", return_indexers=True) + eres = index_large + eridx = np.array([-1, 5, -1, -1, 2], dtype=np.intp) + + assert isinstance(res, Index) and res.dtype == np.uint64 + tm.assert_index_equal(res, eres) + assert lidx is None + tm.assert_numpy_array_equal(ridx, eridx) + + # monotonic + res, lidx, ridx = index_large.join(other_mono, how="left", return_indexers=True) + eridx = np.array([-1, 3, -1, -1, 5], dtype=np.intp) + + assert isinstance(res, Index) and res.dtype == np.uint64 + tm.assert_index_equal(res, eres) + assert lidx is None + tm.assert_numpy_array_equal(ridx, eridx) + + # non-unique + idx = Index(2**63 + np.array([1, 1, 2, 5], dtype="uint64")) + idx2 = Index(2**63 + np.array([1, 2, 5, 7, 9], dtype="uint64")) + res, lidx, ridx = idx2.join(idx, how="left", return_indexers=True) + + # 1 is in idx2, so it should be x2 + eres = Index(2**63 + np.array([1, 1, 2, 5, 7, 9], dtype="uint64")) + eridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp) + elidx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp) + + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_right(self, index_large): + other = Index(2**63 + np.array([7, 12, 25, 1, 2, 10], dtype="uint64")) + other_mono = Index(2**63 + np.array([1, 2, 7, 10, 12, 25], dtype="uint64")) + + # not monotonic + res, lidx, ridx = index_large.join(other, how="right", return_indexers=True) + eres = other + elidx = np.array([-1, -1, 4, -1, -1, 1], dtype=np.intp) + + tm.assert_numpy_array_equal(lidx, elidx) + assert isinstance(other, Index) and other.dtype == np.uint64 + tm.assert_index_equal(res, eres) + assert ridx is None + + # monotonic + res, lidx, ridx = index_large.join( + other_mono, how="right", return_indexers=True + ) + eres = other_mono + elidx = np.array([-1, -1, -1, 1, -1, 4], dtype=np.intp) + + assert isinstance(other, Index) and other.dtype == np.uint64 + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_index_equal(res, eres) + assert ridx is None + + # non-unique + idx = Index(2**63 + np.array([1, 1, 2, 5], dtype="uint64")) + idx2 = Index(2**63 + np.array([1, 2, 5, 7, 9], dtype="uint64")) + res, lidx, ridx = idx.join(idx2, how="right", return_indexers=True) + + # 1 is in idx2, so it should be x2 + eres = Index(2**63 + np.array([1, 1, 2, 5, 7, 9], dtype="uint64")) + elidx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp) + eridx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp) + + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_non_int_index(self, index_large): + other = Index( + 2**63 + np.array([1, 5, 7, 10, 20], dtype="uint64"), dtype=object + ) + + outer = index_large.join(other, how="outer") + outer2 = other.join(index_large, how="outer") + expected = Index( + 2**63 + np.array([0, 1, 5, 7, 10, 15, 20, 25], dtype="uint64") + ) + tm.assert_index_equal(outer, outer2) + tm.assert_index_equal(outer, expected) + + inner = index_large.join(other, how="inner") + inner2 = other.join(index_large, how="inner") + expected = Index(2**63 + np.array([10, 20], dtype="uint64")) + tm.assert_index_equal(inner, inner2) + tm.assert_index_equal(inner, expected) + + left = index_large.join(other, how="left") + tm.assert_index_equal(left, index_large.astype(object)) + + left2 = other.join(index_large, how="left") + tm.assert_index_equal(left2, other) + + right = index_large.join(other, how="right") + tm.assert_index_equal(right, other) + + right2 = other.join(index_large, how="right") + tm.assert_index_equal(right2, index_large.astype(object)) + + def test_join_outer(self, index_large): + other = Index(2**63 + np.array([7, 12, 25, 1, 2, 10], dtype="uint64")) + other_mono = Index(2**63 + np.array([1, 2, 7, 10, 12, 25], dtype="uint64")) + + # not monotonic + # guarantee of sortedness + res, lidx, ridx = index_large.join(other, how="outer", return_indexers=True) + noidx_res = index_large.join(other, how="outer") + tm.assert_index_equal(res, noidx_res) + + eres = Index( + 2**63 + np.array([0, 1, 2, 7, 10, 12, 15, 20, 25], dtype="uint64") + ) + elidx = np.array([0, -1, -1, -1, 1, -1, 2, 3, 4], dtype=np.intp) + eridx = np.array([-1, 3, 4, 0, 5, 1, -1, -1, 2], dtype=np.intp) + + assert isinstance(res, Index) and res.dtype == np.uint64 + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + # monotonic + res, lidx, ridx = index_large.join( + other_mono, how="outer", return_indexers=True + ) + noidx_res = index_large.join(other_mono, how="outer") + tm.assert_index_equal(res, noidx_res) + + elidx = np.array([0, -1, -1, -1, 1, -1, 2, 3, 4], dtype=np.intp) + eridx = np.array([-1, 0, 1, 2, 3, 4, -1, -1, 5], dtype=np.intp) + + assert isinstance(res, Index) and res.dtype == np.uint64 + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/numeric/test_numeric.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/numeric/test_numeric.py new file mode 100644 index 0000000000000000000000000000000000000000..4fd807e1827ddc4faf900f15dcefa18c08d4cd0b --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/numeric/test_numeric.py @@ -0,0 +1,553 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Index, + Series, +) +import pandas._testing as tm + + +class TestFloatNumericIndex: + @pytest.fixture(params=[np.float64, np.float32]) + def dtype(self, request): + return request.param + + @pytest.fixture + def simple_index(self, dtype): + values = np.arange(5, dtype=dtype) + return Index(values) + + @pytest.fixture( + params=[ + [1.5, 2, 3, 4, 5], + [0.0, 2.5, 5.0, 7.5, 10.0], + [5, 4, 3, 2, 1.5], + [10.0, 7.5, 5.0, 2.5, 0.0], + ], + ids=["mixed", "float", "mixed_dec", "float_dec"], + ) + def index(self, request, dtype): + return Index(request.param, dtype=dtype) + + @pytest.fixture + def mixed_index(self, dtype): + return Index([1.5, 2, 3, 4, 5], dtype=dtype) + + @pytest.fixture + def float_index(self, dtype): + return Index([0.0, 2.5, 5.0, 7.5, 10.0], dtype=dtype) + + def test_repr_roundtrip(self, index): + tm.assert_index_equal(eval(repr(index)), index, exact=True) + + def check_coerce(self, a, b, is_float_index=True): + assert a.equals(b) + tm.assert_index_equal(a, b, exact=False) + if is_float_index: + assert isinstance(b, Index) + else: + assert type(b) is Index + + def test_constructor_from_list_no_dtype(self): + index = Index([1.5, 2.5, 3.5]) + assert index.dtype == np.float64 + + def test_constructor(self, dtype): + index_cls = Index + + # explicit construction + index = index_cls([1, 2, 3, 4, 5], dtype=dtype) + + assert isinstance(index, index_cls) + assert index.dtype == dtype + + expected = np.array([1, 2, 3, 4, 5], dtype=dtype) + tm.assert_numpy_array_equal(index.values, expected) + + index = index_cls(np.array([1, 2, 3, 4, 5]), dtype=dtype) + assert isinstance(index, index_cls) + assert index.dtype == dtype + + index = index_cls([1.0, 2, 3, 4, 5], dtype=dtype) + assert isinstance(index, index_cls) + assert index.dtype == dtype + + index = index_cls(np.array([1.0, 2, 3, 4, 5]), dtype=dtype) + assert isinstance(index, index_cls) + assert index.dtype == dtype + + index = index_cls([1.0, 2, 3, 4, 5], dtype=dtype) + assert isinstance(index, index_cls) + assert index.dtype == dtype + + index = index_cls(np.array([1.0, 2, 3, 4, 5]), dtype=dtype) + assert isinstance(index, index_cls) + assert index.dtype == dtype + + # nan handling + result = index_cls([np.nan, np.nan], dtype=dtype) + assert pd.isna(result.values).all() + + result = index_cls(np.array([np.nan]), dtype=dtype) + assert pd.isna(result.values).all() + + def test_constructor_invalid(self): + index_cls = Index + cls_name = index_cls.__name__ + # invalid + msg = ( + rf"{cls_name}\(\.\.\.\) must be called with a collection of " + r"some kind, 0\.0 was passed" + ) + with pytest.raises(TypeError, match=msg): + index_cls(0.0) + + def test_constructor_coerce(self, mixed_index, float_index): + self.check_coerce(mixed_index, Index([1.5, 2, 3, 4, 5])) + self.check_coerce(float_index, Index(np.arange(5) * 2.5)) + + result = Index(np.array(np.arange(5) * 2.5, dtype=object)) + assert result.dtype == object # as of 2.0 to match Series + self.check_coerce(float_index, result.astype("float64")) + + def test_constructor_explicit(self, mixed_index, float_index): + # these don't auto convert + self.check_coerce( + float_index, Index((np.arange(5) * 2.5), dtype=object), is_float_index=False + ) + self.check_coerce( + mixed_index, Index([1.5, 2, 3, 4, 5], dtype=object), is_float_index=False + ) + + def test_type_coercion_fail(self, any_int_numpy_dtype): + # see gh-15832 + msg = "Trying to coerce float values to integers" + with pytest.raises(ValueError, match=msg): + Index([1, 2, 3.5], dtype=any_int_numpy_dtype) + + def test_equals_numeric(self): + index_cls = Index + + idx = index_cls([1.0, 2.0]) + assert idx.equals(idx) + assert idx.identical(idx) + + idx2 = index_cls([1.0, 2.0]) + assert idx.equals(idx2) + + idx = index_cls([1.0, np.nan]) + assert idx.equals(idx) + assert idx.identical(idx) + + idx2 = index_cls([1.0, np.nan]) + assert idx.equals(idx2) + + @pytest.mark.parametrize( + "other", + ( + Index([1, 2], dtype=np.int64), + Index([1.0, 2.0], dtype=object), + Index([1, 2], dtype=object), + ), + ) + def test_equals_numeric_other_index_type(self, other): + idx = Index([1.0, 2.0]) + assert idx.equals(other) + assert other.equals(idx) + + @pytest.mark.parametrize( + "vals", + [ + pd.date_range("2016-01-01", periods=3), + pd.timedelta_range("1 Day", periods=3), + ], + ) + def test_lookups_datetimelike_values(self, vals, dtype): + # If we have datetime64 or timedelta64 values, make sure they are + # wrapped correctly GH#31163 + ser = Series(vals, index=range(3, 6)) + ser.index = ser.index.astype(dtype) + + expected = vals[1] + + result = ser[4.0] + assert isinstance(result, type(expected)) and result == expected + result = ser[4] + assert isinstance(result, type(expected)) and result == expected + + result = ser.loc[4.0] + assert isinstance(result, type(expected)) and result == expected + result = ser.loc[4] + assert isinstance(result, type(expected)) and result == expected + + result = ser.at[4.0] + assert isinstance(result, type(expected)) and result == expected + # GH#31329 .at[4] should cast to 4.0, matching .loc behavior + result = ser.at[4] + assert isinstance(result, type(expected)) and result == expected + + result = ser.iloc[1] + assert isinstance(result, type(expected)) and result == expected + + result = ser.iat[1] + assert isinstance(result, type(expected)) and result == expected + + def test_doesnt_contain_all_the_things(self): + idx = Index([np.nan]) + assert not idx.isin([0]).item() + assert not idx.isin([1]).item() + assert idx.isin([np.nan]).item() + + def test_nan_multiple_containment(self): + index_cls = Index + + idx = index_cls([1.0, np.nan]) + tm.assert_numpy_array_equal(idx.isin([1.0]), np.array([True, False])) + tm.assert_numpy_array_equal(idx.isin([2.0, np.pi]), np.array([False, False])) + tm.assert_numpy_array_equal(idx.isin([np.nan]), np.array([False, True])) + tm.assert_numpy_array_equal(idx.isin([1.0, np.nan]), np.array([True, True])) + idx = index_cls([1.0, 2.0]) + tm.assert_numpy_array_equal(idx.isin([np.nan]), np.array([False, False])) + + def test_fillna_float64(self): + index_cls = Index + # GH 11343 + idx = Index([1.0, np.nan, 3.0], dtype=float, name="x") + # can't downcast + exp = Index([1.0, 0.1, 3.0], name="x") + tm.assert_index_equal(idx.fillna(0.1), exp, exact=True) + + # downcast + exp = index_cls([1.0, 2.0, 3.0], name="x") + tm.assert_index_equal(idx.fillna(2), exp) + + # object + exp = Index([1.0, "obj", 3.0], name="x") + tm.assert_index_equal(idx.fillna("obj"), exp, exact=True) + + def test_logical_compat(self, simple_index): + idx = simple_index + assert idx.all() == idx.values.all() + assert idx.any() == idx.values.any() + + assert idx.all() == idx.to_series().all() + assert idx.any() == idx.to_series().any() + + +class TestNumericInt: + @pytest.fixture(params=[np.int64, np.int32, np.int16, np.int8, np.uint64]) + def dtype(self, request): + return request.param + + @pytest.fixture + def simple_index(self, dtype): + return Index(range(0, 20, 2), dtype=dtype) + + def test_is_monotonic(self): + index_cls = Index + + index = index_cls([1, 2, 3, 4]) + assert index.is_monotonic_increasing is True + assert index.is_monotonic_increasing is True + assert index._is_strictly_monotonic_increasing is True + assert index.is_monotonic_decreasing is False + assert index._is_strictly_monotonic_decreasing is False + + index = index_cls([4, 3, 2, 1]) + assert index.is_monotonic_increasing is False + assert index._is_strictly_monotonic_increasing is False + assert index._is_strictly_monotonic_decreasing is True + + index = index_cls([1]) + assert index.is_monotonic_increasing is True + assert index.is_monotonic_increasing is True + assert index.is_monotonic_decreasing is True + assert index._is_strictly_monotonic_increasing is True + assert index._is_strictly_monotonic_decreasing is True + + def test_is_strictly_monotonic(self): + index_cls = Index + + index = index_cls([1, 1, 2, 3]) + assert index.is_monotonic_increasing is True + assert index._is_strictly_monotonic_increasing is False + + index = index_cls([3, 2, 1, 1]) + assert index.is_monotonic_decreasing is True + assert index._is_strictly_monotonic_decreasing is False + + index = index_cls([1, 1]) + assert index.is_monotonic_increasing + assert index.is_monotonic_decreasing + assert not index._is_strictly_monotonic_increasing + assert not index._is_strictly_monotonic_decreasing + + def test_logical_compat(self, simple_index): + idx = simple_index + assert idx.all() == idx.values.all() + assert idx.any() == idx.values.any() + + def test_identical(self, simple_index, dtype): + index = simple_index + + idx = Index(index.copy()) + assert idx.identical(index) + + same_values_different_type = Index(idx, dtype=object) + assert not idx.identical(same_values_different_type) + + idx = index.astype(dtype=object) + idx = idx.rename("foo") + same_values = Index(idx, dtype=object) + assert same_values.identical(idx) + + assert not idx.identical(index) + assert Index(same_values, name="foo", dtype=object).identical(idx) + + assert not index.astype(dtype=object).identical(index.astype(dtype=dtype)) + + def test_cant_or_shouldnt_cast(self, dtype): + msg = r"invalid literal for int\(\) with base 10: 'foo'" + + # can't + data = ["foo", "bar", "baz"] + with pytest.raises(ValueError, match=msg): + Index(data, dtype=dtype) + + def test_view_index(self, simple_index): + index = simple_index + msg = "Passing a type in .*Index.view is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + index.view(Index) + + def test_prevent_casting(self, simple_index): + index = simple_index + result = index.astype("O") + assert result.dtype == np.object_ + + +class TestIntNumericIndex: + @pytest.fixture(params=[np.int64, np.int32, np.int16, np.int8]) + def dtype(self, request): + return request.param + + def test_constructor_from_list_no_dtype(self): + index = Index([1, 2, 3]) + assert index.dtype == np.int64 + + def test_constructor(self, dtype): + index_cls = Index + + # scalar raise Exception + msg = ( + rf"{index_cls.__name__}\(\.\.\.\) must be called with a collection of some " + "kind, 5 was passed" + ) + with pytest.raises(TypeError, match=msg): + index_cls(5) + + # copy + # pass list, coerce fine + index = index_cls([-5, 0, 1, 2], dtype=dtype) + arr = index.values.copy() + new_index = index_cls(arr, copy=True) + tm.assert_index_equal(new_index, index, exact=True) + val = int(arr[0]) + 3000 + + # this should not change index + if dtype != np.int8: + # NEP 50 won't allow assignment that would overflow + arr[0] = val + assert new_index[0] != val + + if dtype == np.int64: + # pass list, coerce fine + index = index_cls([-5, 0, 1, 2], dtype=dtype) + expected = Index([-5, 0, 1, 2], dtype=dtype) + tm.assert_index_equal(index, expected) + + # from iterable + index = index_cls(iter([-5, 0, 1, 2]), dtype=dtype) + expected = index_cls([-5, 0, 1, 2], dtype=dtype) + tm.assert_index_equal(index, expected, exact=True) + + # interpret list-like + expected = index_cls([5, 0], dtype=dtype) + for cls in [Index, index_cls]: + for idx in [ + cls([5, 0], dtype=dtype), + cls(np.array([5, 0]), dtype=dtype), + cls(Series([5, 0]), dtype=dtype), + ]: + tm.assert_index_equal(idx, expected) + + def test_constructor_corner(self, dtype): + index_cls = Index + + arr = np.array([1, 2, 3, 4], dtype=object) + + index = index_cls(arr, dtype=dtype) + assert index.values.dtype == index.dtype + if dtype == np.int64: + without_dtype = Index(arr) + # as of 2.0 we do not infer a dtype when we get an object-dtype + # ndarray of numbers, matching Series behavior + assert without_dtype.dtype == object + + tm.assert_index_equal(index, without_dtype.astype(np.int64)) + + # preventing casting + arr = np.array([1, "2", 3, "4"], dtype=object) + msg = "Trying to coerce float values to integers" + with pytest.raises(ValueError, match=msg): + index_cls(arr, dtype=dtype) + + def test_constructor_coercion_signed_to_unsigned( + self, + any_unsigned_int_numpy_dtype, + ): + # see gh-15832 + msg = "|".join( + [ + "Trying to coerce negative values to unsigned integers", + "The elements provided in the data cannot all be casted", + ] + ) + with pytest.raises(OverflowError, match=msg): + Index([-1], dtype=any_unsigned_int_numpy_dtype) + + def test_constructor_np_signed(self, any_signed_int_numpy_dtype): + # GH#47475 + scalar = np.dtype(any_signed_int_numpy_dtype).type(1) + result = Index([scalar]) + expected = Index([1], dtype=any_signed_int_numpy_dtype) + tm.assert_index_equal(result, expected, exact=True) + + def test_constructor_np_unsigned(self, any_unsigned_int_numpy_dtype): + # GH#47475 + scalar = np.dtype(any_unsigned_int_numpy_dtype).type(1) + result = Index([scalar]) + expected = Index([1], dtype=any_unsigned_int_numpy_dtype) + tm.assert_index_equal(result, expected, exact=True) + + def test_coerce_list(self): + # coerce things + arr = Index([1, 2, 3, 4]) + assert isinstance(arr, Index) + + # but not if explicit dtype passed + arr = Index([1, 2, 3, 4], dtype=object) + assert type(arr) is Index + + +class TestFloat16Index: + # float 16 indexes not supported + # GH 49535 + def test_constructor(self): + index_cls = Index + dtype = np.float16 + + msg = "float16 indexes are not supported" + + # explicit construction + with pytest.raises(NotImplementedError, match=msg): + index_cls([1, 2, 3, 4, 5], dtype=dtype) + + with pytest.raises(NotImplementedError, match=msg): + index_cls(np.array([1, 2, 3, 4, 5]), dtype=dtype) + + with pytest.raises(NotImplementedError, match=msg): + index_cls([1.0, 2, 3, 4, 5], dtype=dtype) + + with pytest.raises(NotImplementedError, match=msg): + index_cls(np.array([1.0, 2, 3, 4, 5]), dtype=dtype) + + with pytest.raises(NotImplementedError, match=msg): + index_cls([1.0, 2, 3, 4, 5], dtype=dtype) + + with pytest.raises(NotImplementedError, match=msg): + index_cls(np.array([1.0, 2, 3, 4, 5]), dtype=dtype) + + # nan handling + with pytest.raises(NotImplementedError, match=msg): + index_cls([np.nan, np.nan], dtype=dtype) + + with pytest.raises(NotImplementedError, match=msg): + index_cls(np.array([np.nan]), dtype=dtype) + + +@pytest.mark.parametrize( + "box", + [list, lambda x: np.array(x, dtype=object), lambda x: Index(x, dtype=object)], +) +def test_uint_index_does_not_convert_to_float64(box): + # https://github.com/pandas-dev/pandas/issues/28279 + # https://github.com/pandas-dev/pandas/issues/28023 + series = Series( + [0, 1, 2, 3, 4, 5], + index=[ + 7606741985629028552, + 17876870360202815256, + 17876870360202815256, + 13106359306506049338, + 8991270399732411471, + 8991270399732411472, + ], + ) + + result = series.loc[box([7606741985629028552, 17876870360202815256])] + + expected = Index( + [7606741985629028552, 17876870360202815256, 17876870360202815256], + dtype="uint64", + ) + tm.assert_index_equal(result.index, expected) + + tm.assert_equal(result, series.iloc[:3]) + + +def test_float64_index_equals(): + # https://github.com/pandas-dev/pandas/issues/35217 + float_index = Index([1.0, 2, 3]) + string_index = Index(["1", "2", "3"]) + + result = float_index.equals(string_index) + assert result is False + + result = string_index.equals(float_index) + assert result is False + + +def test_map_dtype_inference_unsigned_to_signed(): + # GH#44609 cases where we don't retain dtype + idx = Index([1, 2, 3], dtype=np.uint64) + result = idx.map(lambda x: -x) + expected = Index([-1, -2, -3], dtype=np.int64) + tm.assert_index_equal(result, expected) + + +def test_map_dtype_inference_overflows(): + # GH#44609 case where we have to upcast + idx = Index(np.array([1, 2, 3], dtype=np.int8)) + result = idx.map(lambda x: x * 1000) + # TODO: we could plausibly try to infer down to int16 here + expected = Index([1000, 2000, 3000], dtype=np.int64) + tm.assert_index_equal(result, expected) + + +def test_view_to_datetimelike(): + # GH#55710 + idx = Index([1, 2, 3]) + res = idx.view("m8[s]") + expected = pd.TimedeltaIndex(idx.values.view("m8[s]")) + tm.assert_index_equal(res, expected) + + res2 = idx.view("m8[D]") + expected2 = idx.values.view("m8[D]") + tm.assert_numpy_array_equal(res2, expected2) + + res3 = idx.view("M8[h]") + expected3 = idx.values.view("M8[h]") + tm.assert_numpy_array_equal(res3, expected3) diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/numeric/test_setops.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/numeric/test_setops.py new file mode 100644 index 0000000000000000000000000000000000000000..376b51dd98bb1b1c7c6c8a67914bc72f6c6c588d --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/numeric/test_setops.py @@ -0,0 +1,168 @@ +from datetime import ( + datetime, + timedelta, +) + +import numpy as np +import pytest + +import pandas._testing as tm +from pandas.core.indexes.api import ( + Index, + RangeIndex, +) + + +@pytest.fixture +def index_large(): + # large values used in TestUInt64Index where no compat needed with int64/float64 + large = [2**63, 2**63 + 10, 2**63 + 15, 2**63 + 20, 2**63 + 25] + return Index(large, dtype=np.uint64) + + +class TestSetOps: + @pytest.mark.parametrize("dtype", ["f8", "u8", "i8"]) + def test_union_non_numeric(self, dtype): + # corner case, non-numeric + index = Index(np.arange(5, dtype=dtype), dtype=dtype) + assert index.dtype == dtype + + other = Index([datetime.now() + timedelta(i) for i in range(4)], dtype=object) + result = index.union(other) + expected = Index(np.concatenate((index, other))) + tm.assert_index_equal(result, expected) + + result = other.union(index) + expected = Index(np.concatenate((other, index))) + tm.assert_index_equal(result, expected) + + def test_intersection(self): + index = Index(range(5), dtype=np.int64) + + other = Index([1, 2, 3, 4, 5]) + result = index.intersection(other) + expected = Index(np.sort(np.intersect1d(index.values, other.values))) + tm.assert_index_equal(result, expected) + + result = other.intersection(index) + expected = Index( + np.sort(np.asarray(np.intersect1d(index.values, other.values))) + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("dtype", ["int64", "uint64"]) + def test_int_float_union_dtype(self, dtype): + # https://github.com/pandas-dev/pandas/issues/26778 + # [u]int | float -> float + index = Index([0, 2, 3], dtype=dtype) + other = Index([0.5, 1.5], dtype=np.float64) + expected = Index([0.0, 0.5, 1.5, 2.0, 3.0], dtype=np.float64) + result = index.union(other) + tm.assert_index_equal(result, expected) + + result = other.union(index) + tm.assert_index_equal(result, expected) + + def test_range_float_union_dtype(self): + # https://github.com/pandas-dev/pandas/issues/26778 + index = RangeIndex(start=0, stop=3) + other = Index([0.5, 1.5], dtype=np.float64) + result = index.union(other) + expected = Index([0.0, 0.5, 1, 1.5, 2.0], dtype=np.float64) + tm.assert_index_equal(result, expected) + + result = other.union(index) + tm.assert_index_equal(result, expected) + + def test_range_uint64_union_dtype(self): + # https://github.com/pandas-dev/pandas/issues/26778 + index = RangeIndex(start=0, stop=3) + other = Index([0, 10], dtype=np.uint64) + result = index.union(other) + expected = Index([0, 1, 2, 10], dtype=object) + tm.assert_index_equal(result, expected) + + result = other.union(index) + tm.assert_index_equal(result, expected) + + def test_float64_index_difference(self): + # https://github.com/pandas-dev/pandas/issues/35217 + float_index = Index([1.0, 2, 3]) + string_index = Index(["1", "2", "3"]) + + result = float_index.difference(string_index) + tm.assert_index_equal(result, float_index) + + result = string_index.difference(float_index) + tm.assert_index_equal(result, string_index) + + def test_intersection_uint64_outside_int64_range(self, index_large): + other = Index([2**63, 2**63 + 5, 2**63 + 10, 2**63 + 15, 2**63 + 20]) + result = index_large.intersection(other) + expected = Index(np.sort(np.intersect1d(index_large.values, other.values))) + tm.assert_index_equal(result, expected) + + result = other.intersection(index_large) + expected = Index( + np.sort(np.asarray(np.intersect1d(index_large.values, other.values))) + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "index2,keeps_name", + [ + (Index([4, 7, 6, 5, 3], name="index"), True), + (Index([4, 7, 6, 5, 3], name="other"), False), + ], + ) + def test_intersection_monotonic(self, index2, keeps_name, sort): + index1 = Index([5, 3, 2, 4, 1], name="index") + expected = Index([5, 3, 4]) + + if keeps_name: + expected.name = "index" + + result = index1.intersection(index2, sort=sort) + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + + def test_symmetric_difference(self, sort): + # smoke + index1 = Index([5, 2, 3, 4], name="index1") + index2 = Index([2, 3, 4, 1]) + result = index1.symmetric_difference(index2, sort=sort) + expected = Index([5, 1]) + if sort is not None: + tm.assert_index_equal(result, expected) + else: + tm.assert_index_equal(result, expected.sort_values()) + assert result.name is None + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + + +class TestSetOpsSort: + @pytest.mark.parametrize("slice_", [slice(None), slice(0)]) + def test_union_sort_other_special(self, slice_): + # https://github.com/pandas-dev/pandas/issues/24959 + + idx = Index([1, 0, 2]) + # default, sort=None + other = idx[slice_] + tm.assert_index_equal(idx.union(other), idx) + tm.assert_index_equal(other.union(idx), idx) + + # sort=False + tm.assert_index_equal(idx.union(other, sort=False), idx) + + @pytest.mark.parametrize("slice_", [slice(None), slice(0)]) + def test_union_sort_special_true(self, slice_): + idx = Index([1, 0, 2]) + # default, sort=None + other = idx[slice_] + + result = idx.union(other, sort=True) + expected = Index([0, 1, 2]) + tm.assert_index_equal(result, expected) diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/object/__init__.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/object/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/object/test_astype.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/object/test_astype.py new file mode 100644 index 0000000000000000000000000000000000000000..7e0de138aacfbf89ef6800669383e39f466104b3 --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/object/test_astype.py @@ -0,0 +1,15 @@ +import pytest + +from pandas import ( + Index, + NaT, +) + + +def test_astype_invalid_nas_to_tdt64_raises(): + # GH#45722 don't cast np.datetime64 NaTs to timedelta64 NaT + idx = Index([NaT.asm8] * 2, dtype=object) + + msg = r"Invalid type for timedelta scalar: " + with pytest.raises(TypeError, match=msg): + idx.astype("m8[ns]") diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/object/test_indexing.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/object/test_indexing.py new file mode 100644 index 0000000000000000000000000000000000000000..42ef7e7a96f5e0418757fed6a5ae2115ea02229b --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/object/test_indexing.py @@ -0,0 +1,159 @@ +from decimal import Decimal + +import numpy as np +import pytest + +from pandas._libs.missing import is_matching_na + +from pandas import Index +import pandas._testing as tm + + +class TestGetIndexer: + @pytest.mark.parametrize( + "method,expected", + [ + ("pad", np.array([-1, 0, 1, 1], dtype=np.intp)), + ("backfill", np.array([0, 0, 1, -1], dtype=np.intp)), + ], + ) + def test_get_indexer_strings(self, method, expected): + expected = np.array(expected, dtype=np.intp) + index = Index(["b", "c"], dtype=object) + actual = index.get_indexer(["a", "b", "c", "d"], method=method) + + tm.assert_numpy_array_equal(actual, expected) + + def test_get_indexer_strings_raises(self): + index = Index(["b", "c"], dtype=object) + + msg = "|".join( + [ + "operation 'sub' not supported for dtype 'str'", + r"unsupported operand type\(s\) for -: 'str' and 'str'", + ] + ) + with pytest.raises(TypeError, match=msg): + index.get_indexer(["a", "b", "c", "d"], method="nearest") + + with pytest.raises(TypeError, match=msg): + index.get_indexer(["a", "b", "c", "d"], method="pad", tolerance=2) + + with pytest.raises(TypeError, match=msg): + index.get_indexer( + ["a", "b", "c", "d"], method="pad", tolerance=[2, 2, 2, 2] + ) + + def test_get_indexer_with_NA_values( + self, unique_nulls_fixture, unique_nulls_fixture2 + ): + # GH#22332 + # check pairwise, that no pair of na values + # is mangled + if unique_nulls_fixture is unique_nulls_fixture2: + return # skip it, values are not unique + arr = np.array([unique_nulls_fixture, unique_nulls_fixture2], dtype=object) + index = Index(arr, dtype=object) + result = index.get_indexer( + Index( + [unique_nulls_fixture, unique_nulls_fixture2, "Unknown"], dtype=object + ) + ) + expected = np.array([0, 1, -1], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + def test_get_indexer_infer_string_missing_values(self): + # ensure the passed list is not cast to string but to object so that + # the None value is matched in the index + # https://github.com/pandas-dev/pandas/issues/55834 + idx = Index(["a", "b", None], dtype="object") + result = idx.get_indexer([None, "x"]) + expected = np.array([2, -1], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + +class TestGetIndexerNonUnique: + def test_get_indexer_non_unique_nas(self, nulls_fixture): + # even though this isn't non-unique, this should still work + index = Index(["a", "b", nulls_fixture], dtype=object) + indexer, missing = index.get_indexer_non_unique([nulls_fixture]) + + expected_indexer = np.array([2], dtype=np.intp) + expected_missing = np.array([], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected_indexer) + tm.assert_numpy_array_equal(missing, expected_missing) + + # actually non-unique + index = Index(["a", nulls_fixture, "b", nulls_fixture], dtype=object) + indexer, missing = index.get_indexer_non_unique([nulls_fixture]) + + expected_indexer = np.array([1, 3], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected_indexer) + tm.assert_numpy_array_equal(missing, expected_missing) + + # matching-but-not-identical nans + if is_matching_na(nulls_fixture, float("NaN")): + index = Index(["a", float("NaN"), "b", float("NaN")], dtype=object) + match_but_not_identical = True + elif is_matching_na(nulls_fixture, Decimal("NaN")): + index = Index(["a", Decimal("NaN"), "b", Decimal("NaN")], dtype=object) + match_but_not_identical = True + else: + match_but_not_identical = False + + if match_but_not_identical: + indexer, missing = index.get_indexer_non_unique([nulls_fixture]) + + expected_indexer = np.array([1, 3], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected_indexer) + tm.assert_numpy_array_equal(missing, expected_missing) + + @pytest.mark.filterwarnings("ignore:elementwise comp:DeprecationWarning") + def test_get_indexer_non_unique_np_nats(self, np_nat_fixture, np_nat_fixture2): + expected_missing = np.array([], dtype=np.intp) + # matching-but-not-identical nats + if is_matching_na(np_nat_fixture, np_nat_fixture2): + # ensure nats are different objects + index = Index( + np.array( + ["2021-10-02", np_nat_fixture.copy(), np_nat_fixture2.copy()], + dtype=object, + ), + dtype=object, + ) + # pass as index to prevent target from being casted to DatetimeIndex + indexer, missing = index.get_indexer_non_unique( + Index([np_nat_fixture], dtype=object) + ) + expected_indexer = np.array([1, 2], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected_indexer) + tm.assert_numpy_array_equal(missing, expected_missing) + # dt64nat vs td64nat + else: + try: + np_nat_fixture == np_nat_fixture2 + except (TypeError, OverflowError): + # Numpy will raise on uncomparable types, like + # np.datetime64('NaT', 'Y') and np.datetime64('NaT', 'ps') + # https://github.com/numpy/numpy/issues/22762 + return + index = Index( + np.array( + [ + "2021-10-02", + np_nat_fixture, + np_nat_fixture2, + np_nat_fixture, + np_nat_fixture2, + ], + dtype=object, + ), + dtype=object, + ) + # pass as index to prevent target from being casted to DatetimeIndex + indexer, missing = index.get_indexer_non_unique( + Index([np_nat_fixture], dtype=object) + ) + expected_indexer = np.array([1, 3], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected_indexer) + tm.assert_numpy_array_equal(missing, expected_missing) diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/string/__init__.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/string/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/string/test_astype.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/string/test_astype.py new file mode 100644 index 0000000000000000000000000000000000000000..0349d85f2316707d6ecba2c2289fde49930cbbac --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/string/test_astype.py @@ -0,0 +1,21 @@ +from pandas import ( + Index, + Series, +) +import pandas._testing as tm + + +def test_astype_str_from_bytes(): + # https://github.com/pandas-dev/pandas/issues/38607 + # GH#49658 pre-2.0 Index called .values.astype(str) here, which effectively + # did a .decode() on the bytes object. In 2.0 we go through + # ensure_string_array which does f"{val}" + idx = Index(["あ", b"a"], dtype="object") + result = idx.astype(str) + expected = Index(["あ", "a"], dtype="str") + tm.assert_index_equal(result, expected) + + # while we're here, check that Series.astype behaves the same + result = Series(idx).astype(str) + expected = Series(expected, dtype="str") + tm.assert_series_equal(result, expected) diff --git a/py311/lib/python3.11/site-packages/pandas/tests/indexes/string/test_indexing.py b/py311/lib/python3.11/site-packages/pandas/tests/indexes/string/test_indexing.py new file mode 100644 index 0000000000000000000000000000000000000000..648ee47ddc34c1d4ae90bd986a283880743ac415 --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/indexes/string/test_indexing.py @@ -0,0 +1,199 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import Index +import pandas._testing as tm + + +def _isnan(val): + try: + return val is not pd.NA and np.isnan(val) + except TypeError: + return False + + +def _equivalent_na(dtype, null): + if dtype.na_value is pd.NA and null is pd.NA: + return True + elif _isnan(dtype.na_value) and _isnan(null): + return True + else: + return False + + +class TestGetLoc: + def test_get_loc(self, any_string_dtype): + index = Index(["a", "b", "c"], dtype=any_string_dtype) + assert index.get_loc("b") == 1 + + def test_get_loc_raises(self, any_string_dtype): + index = Index(["a", "b", "c"], dtype=any_string_dtype) + with pytest.raises(KeyError, match="d"): + index.get_loc("d") + + def test_get_loc_invalid_value(self, any_string_dtype): + index = Index(["a", "b", "c"], dtype=any_string_dtype) + with pytest.raises(KeyError, match="1"): + index.get_loc(1) + + def test_get_loc_non_unique(self, any_string_dtype): + index = Index(["a", "b", "a"], dtype=any_string_dtype) + result = index.get_loc("a") + expected = np.array([True, False, True]) + tm.assert_numpy_array_equal(result, expected) + + def test_get_loc_non_missing(self, any_string_dtype, nulls_fixture): + index = Index(["a", "b", "c"], dtype=any_string_dtype) + with pytest.raises(KeyError): + index.get_loc(nulls_fixture) + + def test_get_loc_missing(self, any_string_dtype, nulls_fixture): + index = Index(["a", "b", nulls_fixture], dtype=any_string_dtype) + assert index.get_loc(nulls_fixture) == 2 + + +class TestGetIndexer: + @pytest.mark.parametrize( + "method,expected", + [ + ("pad", [-1, 0, 1, 1]), + ("backfill", [0, 0, 1, -1]), + ], + ) + def test_get_indexer_strings(self, any_string_dtype, method, expected): + expected = np.array(expected, dtype=np.intp) + index = Index(["b", "c"], dtype=any_string_dtype) + actual = index.get_indexer(["a", "b", "c", "d"], method=method) + + tm.assert_numpy_array_equal(actual, expected) + + def test_get_indexer_strings_raises(self, any_string_dtype): + index = Index(["b", "c"], dtype=any_string_dtype) + + msg = "|".join( + [ + "operation 'sub' not supported for dtype 'str", + r"unsupported operand type\(s\) for -: 'str' and 'str'", + ] + ) + with pytest.raises(TypeError, match=msg): + index.get_indexer(["a", "b", "c", "d"], method="nearest") + + with pytest.raises(TypeError, match=msg): + index.get_indexer(["a", "b", "c", "d"], method="pad", tolerance=2) + + with pytest.raises(TypeError, match=msg): + index.get_indexer( + ["a", "b", "c", "d"], method="pad", tolerance=[2, 2, 2, 2] + ) + + @pytest.mark.parametrize("null", [None, np.nan, float("nan"), pd.NA]) + def test_get_indexer_missing(self, any_string_dtype, null, using_infer_string): + # NaT and Decimal("NaN") from null_fixture are not supported for string dtype + index = Index(["a", "b", null], dtype=any_string_dtype) + result = index.get_indexer(["a", null, "c"]) + if using_infer_string: + expected = np.array([0, 2, -1], dtype=np.intp) + elif any_string_dtype == "string" and not _equivalent_na( + any_string_dtype, null + ): + expected = np.array([0, -1, -1], dtype=np.intp) + else: + expected = np.array([0, 2, -1], dtype=np.intp) + + tm.assert_numpy_array_equal(result, expected) + + +class TestGetIndexerNonUnique: + @pytest.mark.parametrize("null", [None, np.nan, float("nan"), pd.NA]) + def test_get_indexer_non_unique_nas( + self, any_string_dtype, null, using_infer_string + ): + index = Index(["a", "b", null], dtype=any_string_dtype) + indexer, missing = index.get_indexer_non_unique(["a", null]) + + if using_infer_string: + expected_indexer = np.array([0, 2], dtype=np.intp) + expected_missing = np.array([], dtype=np.intp) + elif any_string_dtype == "string" and not _equivalent_na( + any_string_dtype, null + ): + expected_indexer = np.array([0, -1], dtype=np.intp) + expected_missing = np.array([1], dtype=np.intp) + else: + expected_indexer = np.array([0, 2], dtype=np.intp) + expected_missing = np.array([], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected_indexer) + tm.assert_numpy_array_equal(missing, expected_missing) + + # actually non-unique + index = Index(["a", null, "b", null], dtype=any_string_dtype) + indexer, missing = index.get_indexer_non_unique(["a", null]) + + if using_infer_string: + expected_indexer = np.array([0, 1, 3], dtype=np.intp) + elif any_string_dtype == "string" and not _equivalent_na( + any_string_dtype, null + ): + pass + else: + expected_indexer = np.array([0, 1, 3], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected_indexer) + tm.assert_numpy_array_equal(missing, expected_missing) + + +class TestSliceLocs: + @pytest.mark.parametrize( + "in_slice,expected", + [ + # error: Slice index must be an integer or None + (pd.IndexSlice[::-1], "yxdcb"), + (pd.IndexSlice["b":"y":-1], ""), # type: ignore[misc] + (pd.IndexSlice["b"::-1], "b"), # type: ignore[misc] + (pd.IndexSlice[:"b":-1], "yxdcb"), # type: ignore[misc] + (pd.IndexSlice[:"y":-1], "y"), # type: ignore[misc] + (pd.IndexSlice["y"::-1], "yxdcb"), # type: ignore[misc] + (pd.IndexSlice["y"::-4], "yb"), # type: ignore[misc] + # absent labels + (pd.IndexSlice[:"a":-1], "yxdcb"), # type: ignore[misc] + (pd.IndexSlice[:"a":-2], "ydb"), # type: ignore[misc] + (pd.IndexSlice["z"::-1], "yxdcb"), # type: ignore[misc] + (pd.IndexSlice["z"::-3], "yc"), # type: ignore[misc] + (pd.IndexSlice["m"::-1], "dcb"), # type: ignore[misc] + (pd.IndexSlice[:"m":-1], "yx"), # type: ignore[misc] + (pd.IndexSlice["a":"a":-1], ""), # type: ignore[misc] + (pd.IndexSlice["z":"z":-1], ""), # type: ignore[misc] + (pd.IndexSlice["m":"m":-1], ""), # type: ignore[misc] + ], + ) + def test_slice_locs_negative_step(self, in_slice, expected, any_string_dtype): + index = Index(list("bcdxy"), dtype=any_string_dtype) + + s_start, s_stop = index.slice_locs(in_slice.start, in_slice.stop, in_slice.step) + result = index[s_start : s_stop : in_slice.step] + expected = Index(list(expected), dtype=any_string_dtype) + tm.assert_index_equal(result, expected) + + def test_slice_locs_negative_step_oob(self, any_string_dtype): + index = Index(list("bcdxy"), dtype=any_string_dtype) + + result = index[-10:5:1] + tm.assert_index_equal(result, index) + + result = index[4:-10:-1] + expected = Index(list("yxdcb"), dtype=any_string_dtype) + tm.assert_index_equal(result, expected) + + def test_slice_locs_dup(self, any_string_dtype): + index = Index(["a", "a", "b", "c", "d", "d"], dtype=any_string_dtype) + assert index.slice_locs("a", "d") == (0, 6) + assert index.slice_locs(end="d") == (0, 6) + assert index.slice_locs("a", "c") == (0, 4) + assert index.slice_locs("b", "d") == (2, 6) + + index2 = index[::-1] + assert index2.slice_locs("d", "a") == (0, 6) + assert index2.slice_locs(end="a") == (0, 6) + assert index2.slice_locs("d", "b") == (0, 4) + assert index2.slice_locs("c", "a") == (2, 6) diff --git a/py311/lib/python3.11/site-packages/pandas/tests/io/formats/__init__.py b/py311/lib/python3.11/site-packages/pandas/tests/io/formats/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/py311/lib/python3.11/site-packages/pandas/tests/io/formats/style/__init__.py b/py311/lib/python3.11/site-packages/pandas/tests/io/formats/style/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/py311/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_bar.py b/py311/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_bar.py new file mode 100644 index 0000000000000000000000000000000000000000..d28c7c566d851f16f81cdc04f22e04ca8bde2c71 --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_bar.py @@ -0,0 +1,359 @@ +import io + +import numpy as np +import pytest + +from pandas import ( + NA, + DataFrame, + read_csv, +) + +pytest.importorskip("jinja2") + + +def bar_grad(a=None, b=None, c=None, d=None): + """Used in multiple tests to simplify formatting of expected result""" + ret = [("width", "10em")] + if all(x is None for x in [a, b, c, d]): + return ret + return ret + [ + ( + "background", + f"linear-gradient(90deg,{','.join([x for x in [a, b, c, d] if x])})", + ) + ] + + +def no_bar(): + return bar_grad() + + +def bar_to(x, color="#d65f5f"): + return bar_grad(f" {color} {x:.1f}%", f" transparent {x:.1f}%") + + +def bar_from_to(x, y, color="#d65f5f"): + return bar_grad( + f" transparent {x:.1f}%", + f" {color} {x:.1f}%", + f" {color} {y:.1f}%", + f" transparent {y:.1f}%", + ) + + +@pytest.fixture +def df_pos(): + return DataFrame([[1], [2], [3]]) + + +@pytest.fixture +def df_neg(): + return DataFrame([[-1], [-2], [-3]]) + + +@pytest.fixture +def df_mix(): + return DataFrame([[-3], [1], [2]]) + + +@pytest.mark.parametrize( + "align, exp", + [ + ("left", [no_bar(), bar_to(50), bar_to(100)]), + ("right", [bar_to(100), bar_from_to(50, 100), no_bar()]), + ("mid", [bar_to(33.33), bar_to(66.66), bar_to(100)]), + ("zero", [bar_from_to(50, 66.7), bar_from_to(50, 83.3), bar_from_to(50, 100)]), + ("mean", [bar_to(50), no_bar(), bar_from_to(50, 100)]), + (2.0, [bar_to(50), no_bar(), bar_from_to(50, 100)]), + (np.median, [bar_to(50), no_bar(), bar_from_to(50, 100)]), + ], +) +def test_align_positive_cases(df_pos, align, exp): + # test different align cases for all positive values + result = df_pos.style.bar(align=align)._compute().ctx + expected = {(0, 0): exp[0], (1, 0): exp[1], (2, 0): exp[2]} + assert result == expected + + +@pytest.mark.parametrize( + "align, exp", + [ + ("left", [bar_to(100), bar_to(50), no_bar()]), + ("right", [no_bar(), bar_from_to(50, 100), bar_to(100)]), + ("mid", [bar_from_to(66.66, 100), bar_from_to(33.33, 100), bar_to(100)]), + ("zero", [bar_from_to(33.33, 50), bar_from_to(16.66, 50), bar_to(50)]), + ("mean", [bar_from_to(50, 100), no_bar(), bar_to(50)]), + (-2.0, [bar_from_to(50, 100), no_bar(), bar_to(50)]), + (np.median, [bar_from_to(50, 100), no_bar(), bar_to(50)]), + ], +) +def test_align_negative_cases(df_neg, align, exp): + # test different align cases for all negative values + result = df_neg.style.bar(align=align)._compute().ctx + expected = {(0, 0): exp[0], (1, 0): exp[1], (2, 0): exp[2]} + assert result == expected + + +@pytest.mark.parametrize( + "align, exp", + [ + ("left", [no_bar(), bar_to(80), bar_to(100)]), + ("right", [bar_to(100), bar_from_to(80, 100), no_bar()]), + ("mid", [bar_to(60), bar_from_to(60, 80), bar_from_to(60, 100)]), + ("zero", [bar_to(50), bar_from_to(50, 66.66), bar_from_to(50, 83.33)]), + ("mean", [bar_to(50), bar_from_to(50, 66.66), bar_from_to(50, 83.33)]), + (-0.0, [bar_to(50), bar_from_to(50, 66.66), bar_from_to(50, 83.33)]), + (np.nanmedian, [bar_to(50), no_bar(), bar_from_to(50, 62.5)]), + ], +) +@pytest.mark.parametrize("nans", [True, False]) +def test_align_mixed_cases(df_mix, align, exp, nans): + # test different align cases for mixed positive and negative values + # also test no impact of NaNs and no_bar + expected = {(0, 0): exp[0], (1, 0): exp[1], (2, 0): exp[2]} + if nans: + df_mix.loc[3, :] = np.nan + expected.update({(3, 0): no_bar()}) + result = df_mix.style.bar(align=align)._compute().ctx + assert result == expected + + +@pytest.mark.parametrize( + "align, exp", + [ + ( + "left", + { + "index": [[no_bar(), no_bar()], [bar_to(100), bar_to(100)]], + "columns": [[no_bar(), bar_to(100)], [no_bar(), bar_to(100)]], + "none": [[no_bar(), bar_to(33.33)], [bar_to(66.66), bar_to(100)]], + }, + ), + ( + "mid", + { + "index": [[bar_to(33.33), bar_to(50)], [bar_to(100), bar_to(100)]], + "columns": [[bar_to(50), bar_to(100)], [bar_to(75), bar_to(100)]], + "none": [[bar_to(25), bar_to(50)], [bar_to(75), bar_to(100)]], + }, + ), + ( + "zero", + { + "index": [ + [bar_from_to(50, 66.66), bar_from_to(50, 75)], + [bar_from_to(50, 100), bar_from_to(50, 100)], + ], + "columns": [ + [bar_from_to(50, 75), bar_from_to(50, 100)], + [bar_from_to(50, 87.5), bar_from_to(50, 100)], + ], + "none": [ + [bar_from_to(50, 62.5), bar_from_to(50, 75)], + [bar_from_to(50, 87.5), bar_from_to(50, 100)], + ], + }, + ), + ( + 2, + { + "index": [ + [bar_to(50), no_bar()], + [bar_from_to(50, 100), bar_from_to(50, 100)], + ], + "columns": [ + [bar_to(50), no_bar()], + [bar_from_to(50, 75), bar_from_to(50, 100)], + ], + "none": [ + [bar_from_to(25, 50), no_bar()], + [bar_from_to(50, 75), bar_from_to(50, 100)], + ], + }, + ), + ], +) +@pytest.mark.parametrize("axis", ["index", "columns", "none"]) +def test_align_axis(align, exp, axis): + # test all axis combinations with positive values and different aligns + data = DataFrame([[1, 2], [3, 4]]) + result = ( + data.style.bar(align=align, axis=None if axis == "none" else axis) + ._compute() + .ctx + ) + expected = { + (0, 0): exp[axis][0][0], + (0, 1): exp[axis][0][1], + (1, 0): exp[axis][1][0], + (1, 1): exp[axis][1][1], + } + assert result == expected + + +@pytest.mark.parametrize( + "values, vmin, vmax", + [ + ("positive", 1.5, 2.5), + ("negative", -2.5, -1.5), + ("mixed", -2.5, 1.5), + ], +) +@pytest.mark.parametrize("nullify", [None, "vmin", "vmax"]) # test min/max separately +@pytest.mark.parametrize("align", ["left", "right", "zero", "mid"]) +def test_vmin_vmax_clipping(df_pos, df_neg, df_mix, values, vmin, vmax, nullify, align): + # test that clipping occurs if any vmin > data_values or vmax < data_values + if align == "mid": # mid acts as left or right in each case + if values == "positive": + align = "left" + elif values == "negative": + align = "right" + df = {"positive": df_pos, "negative": df_neg, "mixed": df_mix}[values] + vmin = None if nullify == "vmin" else vmin + vmax = None if nullify == "vmax" else vmax + + clip_df = df.where(df <= (vmax if vmax else 999), other=vmax) + clip_df = clip_df.where(clip_df >= (vmin if vmin else -999), other=vmin) + + result = ( + df.style.bar(align=align, vmin=vmin, vmax=vmax, color=["red", "green"]) + ._compute() + .ctx + ) + expected = clip_df.style.bar(align=align, color=["red", "green"])._compute().ctx + assert result == expected + + +@pytest.mark.parametrize( + "values, vmin, vmax", + [ + ("positive", 0.5, 4.5), + ("negative", -4.5, -0.5), + ("mixed", -4.5, 4.5), + ], +) +@pytest.mark.parametrize("nullify", [None, "vmin", "vmax"]) # test min/max separately +@pytest.mark.parametrize("align", ["left", "right", "zero", "mid"]) +def test_vmin_vmax_widening(df_pos, df_neg, df_mix, values, vmin, vmax, nullify, align): + # test that widening occurs if any vmax > data_values or vmin < data_values + if align == "mid": # mid acts as left or right in each case + if values == "positive": + align = "left" + elif values == "negative": + align = "right" + df = {"positive": df_pos, "negative": df_neg, "mixed": df_mix}[values] + vmin = None if nullify == "vmin" else vmin + vmax = None if nullify == "vmax" else vmax + + expand_df = df.copy() + expand_df.loc[3, :], expand_df.loc[4, :] = vmin, vmax + + result = ( + df.style.bar(align=align, vmin=vmin, vmax=vmax, color=["red", "green"]) + ._compute() + .ctx + ) + expected = expand_df.style.bar(align=align, color=["red", "green"])._compute().ctx + assert result.items() <= expected.items() + + +def test_numerics(): + # test data is pre-selected for numeric values + data = DataFrame([[1, "a"], [2, "b"]]) + result = data.style.bar()._compute().ctx + assert (0, 1) not in result + assert (1, 1) not in result + + +@pytest.mark.parametrize( + "align, exp", + [ + ("left", [no_bar(), bar_to(100, "green")]), + ("right", [bar_to(100, "red"), no_bar()]), + ("mid", [bar_to(25, "red"), bar_from_to(25, 100, "green")]), + ("zero", [bar_from_to(33.33, 50, "red"), bar_from_to(50, 100, "green")]), + ], +) +def test_colors_mixed(align, exp): + data = DataFrame([[-1], [3]]) + result = data.style.bar(align=align, color=["red", "green"])._compute().ctx + assert result == {(0, 0): exp[0], (1, 0): exp[1]} + + +def test_bar_align_height(): + # test when keyword height is used 'no-repeat center' and 'background-size' present + data = DataFrame([[1], [2]]) + result = data.style.bar(align="left", height=50)._compute().ctx + bg_s = "linear-gradient(90deg, #d65f5f 100.0%, transparent 100.0%) no-repeat center" + expected = { + (0, 0): [("width", "10em")], + (1, 0): [ + ("width", "10em"), + ("background", bg_s), + ("background-size", "100% 50.0%"), + ], + } + assert result == expected + + +def test_bar_value_error_raises(): + df = DataFrame({"A": [-100, -60, -30, -20]}) + + msg = "`align` should be in {'left', 'right', 'mid', 'mean', 'zero'} or" + with pytest.raises(ValueError, match=msg): + df.style.bar(align="poorly", color=["#d65f5f", "#5fba7d"]).to_html() + + msg = r"`width` must be a value in \[0, 100\]" + with pytest.raises(ValueError, match=msg): + df.style.bar(width=200).to_html() + + msg = r"`height` must be a value in \[0, 100\]" + with pytest.raises(ValueError, match=msg): + df.style.bar(height=200).to_html() + + +def test_bar_color_and_cmap_error_raises(): + df = DataFrame({"A": [1, 2, 3, 4]}) + msg = "`color` and `cmap` cannot both be given" + # Test that providing both color and cmap raises a ValueError + with pytest.raises(ValueError, match=msg): + df.style.bar(color="#d65f5f", cmap="viridis").to_html() + + +def test_bar_invalid_color_type_error_raises(): + df = DataFrame({"A": [1, 2, 3, 4]}) + msg = ( + r"`color` must be string or list or tuple of 2 strings," + r"\(eg: color=\['#d65f5f', '#5fba7d'\]\)" + ) + # Test that providing an invalid color type raises a ValueError + with pytest.raises(ValueError, match=msg): + df.style.bar(color=123).to_html() + + # Test that providing a color list with more than two elements raises a ValueError + with pytest.raises(ValueError, match=msg): + df.style.bar(color=["#d65f5f", "#5fba7d", "#abcdef"]).to_html() + + +def test_styler_bar_with_NA_values(): + df1 = DataFrame({"A": [1, 2, NA, 4]}) + df2 = DataFrame([[NA, NA], [NA, NA]]) + expected_substring = "style type=" + html_output1 = df1.style.bar(subset="A").to_html() + html_output2 = df2.style.bar(align="left", axis=None).to_html() + assert expected_substring in html_output1 + assert expected_substring in html_output2 + + +def test_style_bar_with_pyarrow_NA_values(): + pytest.importorskip("pyarrow") + data = """name,age,test1,test2,teacher + Adam,15,95.0,80,Ashby + Bob,16,81.0,82,Ashby + Dave,16,89.0,84,Jones + Fred,15,,88,Jones""" + df = read_csv(io.StringIO(data), dtype_backend="pyarrow") + expected_substring = "style type=" + html_output = df.style.bar(subset="test1").to_html() + assert expected_substring in html_output diff --git a/py311/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_exceptions.py b/py311/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_exceptions.py new file mode 100644 index 0000000000000000000000000000000000000000..d52e3a37e7693dadce34f73fc03a0790c7a0b4d3 --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_exceptions.py @@ -0,0 +1,44 @@ +import pytest + +jinja2 = pytest.importorskip("jinja2") + +from pandas import ( + DataFrame, + MultiIndex, +) + +from pandas.io.formats.style import Styler + + +@pytest.fixture +def df(): + return DataFrame( + data=[[0, -0.609], [1, -1.228]], + columns=["A", "B"], + index=["x", "y"], + ) + + +@pytest.fixture +def styler(df): + return Styler(df, uuid_len=0) + + +def test_concat_bad_columns(styler): + msg = "`other.data` must have same columns as `Styler.data" + with pytest.raises(ValueError, match=msg): + styler.concat(DataFrame([[1, 2]]).style) + + +def test_concat_bad_type(styler): + msg = "`other` must be of type `Styler`" + with pytest.raises(TypeError, match=msg): + styler.concat(DataFrame([[1, 2]])) + + +def test_concat_bad_index_levels(styler, df): + df = df.copy() + df.index = MultiIndex.from_tuples([(0, 0), (1, 1)]) + msg = "number of index levels must be same in `other`" + with pytest.raises(ValueError, match=msg): + styler.concat(df.style) diff --git a/py311/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_format.py b/py311/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_format.py new file mode 100644 index 0000000000000000000000000000000000000000..1c84816ead140b95f14df8dbeccc83b317ac239a --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_format.py @@ -0,0 +1,562 @@ +import numpy as np +import pytest + +from pandas import ( + NA, + DataFrame, + IndexSlice, + MultiIndex, + NaT, + Timestamp, + option_context, +) + +pytest.importorskip("jinja2") +from pandas.io.formats.style import Styler +from pandas.io.formats.style_render import _str_escape + + +@pytest.fixture +def df(): + return DataFrame( + data=[[0, -0.609], [1, -1.228]], + columns=["A", "B"], + index=["x", "y"], + ) + + +@pytest.fixture +def styler(df): + return Styler(df, uuid_len=0) + + +@pytest.fixture +def df_multi(): + return DataFrame( + data=np.arange(16).reshape(4, 4), + columns=MultiIndex.from_product([["A", "B"], ["a", "b"]]), + index=MultiIndex.from_product([["X", "Y"], ["x", "y"]]), + ) + + +@pytest.fixture +def styler_multi(df_multi): + return Styler(df_multi, uuid_len=0) + + +def test_display_format(styler): + ctx = styler.format("{:0.1f}")._translate(True, True) + assert all(["display_value" in c for c in row] for row in ctx["body"]) + assert all([len(c["display_value"]) <= 3 for c in row[1:]] for row in ctx["body"]) + assert len(ctx["body"][0][1]["display_value"].lstrip("-")) <= 3 + + +@pytest.mark.parametrize("index", [True, False]) +@pytest.mark.parametrize("columns", [True, False]) +def test_display_format_index(styler, index, columns): + exp_index = ["x", "y"] + if index: + styler.format_index(lambda v: v.upper(), axis=0) # test callable + exp_index = ["X", "Y"] + + exp_columns = ["A", "B"] + if columns: + styler.format_index("*{}*", axis=1) # test string + exp_columns = ["*A*", "*B*"] + + ctx = styler._translate(True, True) + + for r, row in enumerate(ctx["body"]): + assert row[0]["display_value"] == exp_index[r] + + for c, col in enumerate(ctx["head"][1:]): + assert col["display_value"] == exp_columns[c] + + +def test_format_dict(styler): + ctx = styler.format({"A": "{:0.1f}", "B": "{0:.2%}"})._translate(True, True) + assert ctx["body"][0][1]["display_value"] == "0.0" + assert ctx["body"][0][2]["display_value"] == "-60.90%" + + +def test_format_index_dict(styler): + ctx = styler.format_index({0: lambda v: v.upper()})._translate(True, True) + for i, val in enumerate(["X", "Y"]): + assert ctx["body"][i][0]["display_value"] == val + + +def test_format_string(styler): + ctx = styler.format("{:.2f}")._translate(True, True) + assert ctx["body"][0][1]["display_value"] == "0.00" + assert ctx["body"][0][2]["display_value"] == "-0.61" + assert ctx["body"][1][1]["display_value"] == "1.00" + assert ctx["body"][1][2]["display_value"] == "-1.23" + + +def test_format_callable(styler): + ctx = styler.format(lambda v: "neg" if v < 0 else "pos")._translate(True, True) + assert ctx["body"][0][1]["display_value"] == "pos" + assert ctx["body"][0][2]["display_value"] == "neg" + assert ctx["body"][1][1]["display_value"] == "pos" + assert ctx["body"][1][2]["display_value"] == "neg" + + +def test_format_with_na_rep(): + # GH 21527 28358 + df = DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"]) + + ctx = df.style.format(None, na_rep="-")._translate(True, True) + assert ctx["body"][0][1]["display_value"] == "-" + assert ctx["body"][0][2]["display_value"] == "-" + + ctx = df.style.format("{:.2%}", na_rep="-")._translate(True, True) + assert ctx["body"][0][1]["display_value"] == "-" + assert ctx["body"][0][2]["display_value"] == "-" + assert ctx["body"][1][1]["display_value"] == "110.00%" + assert ctx["body"][1][2]["display_value"] == "120.00%" + + ctx = df.style.format("{:.2%}", na_rep="-", subset=["B"])._translate(True, True) + assert ctx["body"][0][2]["display_value"] == "-" + assert ctx["body"][1][2]["display_value"] == "120.00%" + + +def test_format_index_with_na_rep(): + df = DataFrame([[1, 2, 3, 4, 5]], columns=["A", None, np.nan, NaT, NA]) + ctx = df.style.format_index(None, na_rep="--", axis=1)._translate(True, True) + assert ctx["head"][0][1]["display_value"] == "A" + for i in [2, 3, 4, 5]: + assert ctx["head"][0][i]["display_value"] == "--" + + +def test_format_non_numeric_na(): + # GH 21527 28358 + df = DataFrame( + { + "object": [None, np.nan, "foo"], + "datetime": [None, NaT, Timestamp("20120101")], + } + ) + ctx = df.style.format(None, na_rep="-")._translate(True, True) + assert ctx["body"][0][1]["display_value"] == "-" + assert ctx["body"][0][2]["display_value"] == "-" + assert ctx["body"][1][1]["display_value"] == "-" + assert ctx["body"][1][2]["display_value"] == "-" + + +@pytest.mark.parametrize( + "func, attr, kwargs", + [ + ("format", "_display_funcs", {}), + ("format_index", "_display_funcs_index", {"axis": 0}), + ("format_index", "_display_funcs_columns", {"axis": 1}), + ], +) +def test_format_clear(styler, func, attr, kwargs): + assert (0, 0) not in getattr(styler, attr) # using default + getattr(styler, func)("{:.2f}", **kwargs) + assert (0, 0) in getattr(styler, attr) # formatter is specified + getattr(styler, func)(**kwargs) + assert (0, 0) not in getattr(styler, attr) # formatter cleared to default + + +@pytest.mark.parametrize( + "escape, exp", + [ + ("html", "<>&"%$#_{}~^\\~ ^ \\ "), + ( + "latex", + '<>\\&"\\%\\$\\#\\_\\{\\}\\textasciitilde \\textasciicircum ' + "\\textbackslash \\textasciitilde \\space \\textasciicircum \\space " + "\\textbackslash \\space ", + ), + ], +) +def test_format_escape_html(escape, exp): + chars = '<>&"%$#_{}~^\\~ ^ \\ ' + df = DataFrame([[chars]]) + + s = Styler(df, uuid_len=0).format("&{0}&", escape=None) + expected = f'&{chars}&' + assert expected in s.to_html() + + # only the value should be escaped before passing to the formatter + s = Styler(df, uuid_len=0).format("&{0}&", escape=escape) + expected = f'&{exp}&' + assert expected in s.to_html() + + # also test format_index() + styler = Styler(DataFrame(columns=[chars]), uuid_len=0) + styler.format_index("&{0}&", escape=None, axis=1) + assert styler._translate(True, True)["head"][0][1]["display_value"] == f"&{chars}&" + styler.format_index("&{0}&", escape=escape, axis=1) + assert styler._translate(True, True)["head"][0][1]["display_value"] == f"&{exp}&" + + +@pytest.mark.parametrize( + "chars, expected", + [ + ( + r"$ \$&%#_{}~^\ $ &%#_{}~^\ $", + "".join( + [ + r"$ \$&%#_{}~^\ $ ", + r"\&\%\#\_\{\}\textasciitilde \textasciicircum ", + r"\textbackslash \space \$", + ] + ), + ), + ( + r"\( &%#_{}~^\ \) &%#_{}~^\ \(", + "".join( + [ + r"\( &%#_{}~^\ \) ", + r"\&\%\#\_\{\}\textasciitilde \textasciicircum ", + r"\textbackslash \space \textbackslash (", + ] + ), + ), + ( + r"$\&%#_{}^\$", + r"\$\textbackslash \&\%\#\_\{\}\textasciicircum \textbackslash \$", + ), + ( + r"$ \frac{1}{2} $ \( \frac{1}{2} \)", + "".join( + [ + r"$ \frac{1}{2} $", + r" \textbackslash ( \textbackslash frac\{1\}\{2\} \textbackslash )", + ] + ), + ), + ], +) +def test_format_escape_latex_math(chars, expected): + # GH 51903 + # latex-math escape works for each DataFrame cell separately. If we have + # a combination of dollar signs and brackets, the dollar sign would apply. + df = DataFrame([[chars]]) + s = df.style.format("{0}", escape="latex-math") + assert s._translate(True, True)["body"][0][1]["display_value"] == expected + + +def test_format_escape_na_rep(): + # tests the na_rep is not escaped + df = DataFrame([['<>&"', None]]) + s = Styler(df, uuid_len=0).format("X&{0}>X", escape="html", na_rep="&") + ex = 'X&<>&">X' + expected2 = '&' + assert ex in s.to_html() + assert expected2 in s.to_html() + + # also test for format_index() + df = DataFrame(columns=['<>&"', None]) + styler = Styler(df, uuid_len=0) + styler.format_index("X&{0}>X", escape="html", na_rep="&", axis=1) + ctx = styler._translate(True, True) + assert ctx["head"][0][1]["display_value"] == "X&<>&">X" + assert ctx["head"][0][2]["display_value"] == "&" + + +def test_format_escape_floats(styler): + # test given formatter for number format is not impacted by escape + s = styler.format("{:.1f}", escape="html") + for expected in [">0.0<", ">1.0<", ">-1.2<", ">-0.6<"]: + assert expected in s.to_html() + # tests precision of floats is not impacted by escape + s = styler.format(precision=1, escape="html") + for expected in [">0<", ">1<", ">-1.2<", ">-0.6<"]: + assert expected in s.to_html() + + +@pytest.mark.parametrize("formatter", [5, True, [2.0]]) +@pytest.mark.parametrize("func", ["format", "format_index"]) +def test_format_raises(styler, formatter, func): + with pytest.raises(TypeError, match="expected str or callable"): + getattr(styler, func)(formatter) + + +@pytest.mark.parametrize( + "precision, expected", + [ + (1, ["1.0", "2.0", "3.2", "4.6"]), + (2, ["1.00", "2.01", "3.21", "4.57"]), + (3, ["1.000", "2.009", "3.212", "4.566"]), + ], +) +def test_format_with_precision(precision, expected): + # Issue #13257 + df = DataFrame([[1.0, 2.0090, 3.2121, 4.566]], columns=[1.0, 2.0090, 3.2121, 4.566]) + styler = Styler(df) + styler.format(precision=precision) + styler.format_index(precision=precision, axis=1) + + ctx = styler._translate(True, True) + for col, exp in enumerate(expected): + assert ctx["body"][0][col + 1]["display_value"] == exp # format test + assert ctx["head"][0][col + 1]["display_value"] == exp # format_index test + + +@pytest.mark.parametrize("axis", [0, 1]) +@pytest.mark.parametrize( + "level, expected", + [ + (0, ["X", "X", "_", "_"]), # level int + ("zero", ["X", "X", "_", "_"]), # level name + (1, ["_", "_", "X", "X"]), # other level int + ("one", ["_", "_", "X", "X"]), # other level name + ([0, 1], ["X", "X", "X", "X"]), # both levels + ([0, "zero"], ["X", "X", "_", "_"]), # level int and name simultaneous + ([0, "one"], ["X", "X", "X", "X"]), # both levels as int and name + (["one", "zero"], ["X", "X", "X", "X"]), # both level names, reversed + ], +) +def test_format_index_level(axis, level, expected): + midx = MultiIndex.from_arrays([["_", "_"], ["_", "_"]], names=["zero", "one"]) + df = DataFrame([[1, 2], [3, 4]]) + if axis == 0: + df.index = midx + else: + df.columns = midx + + styler = df.style.format_index(lambda v: "X", level=level, axis=axis) + ctx = styler._translate(True, True) + + if axis == 0: # compare index + result = [ctx["body"][s][0]["display_value"] for s in range(2)] + result += [ctx["body"][s][1]["display_value"] for s in range(2)] + else: # compare columns + result = [ctx["head"][0][s + 1]["display_value"] for s in range(2)] + result += [ctx["head"][1][s + 1]["display_value"] for s in range(2)] + + assert expected == result + + +def test_format_subset(): + df = DataFrame([[0.1234, 0.1234], [1.1234, 1.1234]], columns=["a", "b"]) + ctx = df.style.format( + {"a": "{:0.1f}", "b": "{0:.2%}"}, subset=IndexSlice[0, :] + )._translate(True, True) + expected = "0.1" + raw_11 = "1.123400" + assert ctx["body"][0][1]["display_value"] == expected + assert ctx["body"][1][1]["display_value"] == raw_11 + assert ctx["body"][0][2]["display_value"] == "12.34%" + + ctx = df.style.format("{:0.1f}", subset=IndexSlice[0, :])._translate(True, True) + assert ctx["body"][0][1]["display_value"] == expected + assert ctx["body"][1][1]["display_value"] == raw_11 + + ctx = df.style.format("{:0.1f}", subset=IndexSlice["a"])._translate(True, True) + assert ctx["body"][0][1]["display_value"] == expected + assert ctx["body"][0][2]["display_value"] == "0.123400" + + ctx = df.style.format("{:0.1f}", subset=IndexSlice[0, "a"])._translate(True, True) + assert ctx["body"][0][1]["display_value"] == expected + assert ctx["body"][1][1]["display_value"] == raw_11 + + ctx = df.style.format("{:0.1f}", subset=IndexSlice[[0, 1], ["a"]])._translate( + True, True + ) + assert ctx["body"][0][1]["display_value"] == expected + assert ctx["body"][1][1]["display_value"] == "1.1" + assert ctx["body"][0][2]["display_value"] == "0.123400" + assert ctx["body"][1][2]["display_value"] == raw_11 + + +@pytest.mark.parametrize("formatter", [None, "{:,.1f}"]) +@pytest.mark.parametrize("decimal", [".", "*"]) +@pytest.mark.parametrize("precision", [None, 2]) +@pytest.mark.parametrize("func, col", [("format", 1), ("format_index", 0)]) +def test_format_thousands(formatter, decimal, precision, func, col): + styler = DataFrame([[1000000.123456789]], index=[1000000.123456789]).style + result = getattr(styler, func)( # testing float + thousands="_", formatter=formatter, decimal=decimal, precision=precision + )._translate(True, True) + assert "1_000_000" in result["body"][0][col]["display_value"] + + styler = DataFrame([[1000000]], index=[1000000]).style + result = getattr(styler, func)( # testing int + thousands="_", formatter=formatter, decimal=decimal, precision=precision + )._translate(True, True) + assert "1_000_000" in result["body"][0][col]["display_value"] + + styler = DataFrame([[1 + 1000000.123456789j]], index=[1 + 1000000.123456789j]).style + result = getattr(styler, func)( # testing complex + thousands="_", formatter=formatter, decimal=decimal, precision=precision + )._translate(True, True) + assert "1_000_000" in result["body"][0][col]["display_value"] + + +@pytest.mark.parametrize("formatter", [None, "{:,.4f}"]) +@pytest.mark.parametrize("thousands", [None, ",", "*"]) +@pytest.mark.parametrize("precision", [None, 4]) +@pytest.mark.parametrize("func, col", [("format", 1), ("format_index", 0)]) +def test_format_decimal(formatter, thousands, precision, func, col): + styler = DataFrame([[1000000.123456789]], index=[1000000.123456789]).style + result = getattr(styler, func)( # testing float + decimal="_", formatter=formatter, thousands=thousands, precision=precision + )._translate(True, True) + assert "000_123" in result["body"][0][col]["display_value"] + + styler = DataFrame([[1 + 1000000.123456789j]], index=[1 + 1000000.123456789j]).style + result = getattr(styler, func)( # testing complex + decimal="_", formatter=formatter, thousands=thousands, precision=precision + )._translate(True, True) + assert "000_123" in result["body"][0][col]["display_value"] + + +def test_str_escape_error(): + msg = "`escape` only permitted in {'html', 'latex', 'latex-math'}, got " + with pytest.raises(ValueError, match=msg): + _str_escape("text", "bad_escape") + + with pytest.raises(ValueError, match=msg): + _str_escape("text", []) + + _str_escape(2.00, "bad_escape") # OK since dtype is float + + +def test_long_int_formatting(): + df = DataFrame(data=[[1234567890123456789]], columns=["test"]) + styler = df.style + ctx = styler._translate(True, True) + assert ctx["body"][0][1]["display_value"] == "1234567890123456789" + + styler = df.style.format(thousands="_") + ctx = styler._translate(True, True) + assert ctx["body"][0][1]["display_value"] == "1_234_567_890_123_456_789" + + +def test_format_options(): + df = DataFrame({"int": [2000, 1], "float": [1.009, None], "str": ["&<", "&~"]}) + ctx = df.style._translate(True, True) + + # test option: na_rep + assert ctx["body"][1][2]["display_value"] == "nan" + with option_context("styler.format.na_rep", "MISSING"): + ctx_with_op = df.style._translate(True, True) + assert ctx_with_op["body"][1][2]["display_value"] == "MISSING" + + # test option: decimal and precision + assert ctx["body"][0][2]["display_value"] == "1.009000" + with option_context("styler.format.decimal", "_"): + ctx_with_op = df.style._translate(True, True) + assert ctx_with_op["body"][0][2]["display_value"] == "1_009000" + with option_context("styler.format.precision", 2): + ctx_with_op = df.style._translate(True, True) + assert ctx_with_op["body"][0][2]["display_value"] == "1.01" + + # test option: thousands + assert ctx["body"][0][1]["display_value"] == "2000" + with option_context("styler.format.thousands", "_"): + ctx_with_op = df.style._translate(True, True) + assert ctx_with_op["body"][0][1]["display_value"] == "2_000" + + # test option: escape + assert ctx["body"][0][3]["display_value"] == "&<" + assert ctx["body"][1][3]["display_value"] == "&~" + with option_context("styler.format.escape", "html"): + ctx_with_op = df.style._translate(True, True) + assert ctx_with_op["body"][0][3]["display_value"] == "&<" + with option_context("styler.format.escape", "latex"): + ctx_with_op = df.style._translate(True, True) + assert ctx_with_op["body"][1][3]["display_value"] == "\\&\\textasciitilde " + with option_context("styler.format.escape", "latex-math"): + ctx_with_op = df.style._translate(True, True) + assert ctx_with_op["body"][1][3]["display_value"] == "\\&\\textasciitilde " + + # test option: formatter + with option_context("styler.format.formatter", {"int": "{:,.2f}"}): + ctx_with_op = df.style._translate(True, True) + assert ctx_with_op["body"][0][1]["display_value"] == "2,000.00" + + +def test_precision_zero(df): + styler = Styler(df, precision=0) + ctx = styler._translate(True, True) + assert ctx["body"][0][2]["display_value"] == "-1" + assert ctx["body"][1][2]["display_value"] == "-1" + + +@pytest.mark.parametrize( + "formatter, exp", + [ + (lambda x: f"{x:.3f}", "9.000"), + ("{:.2f}", "9.00"), + ({0: "{:.1f}"}, "9.0"), + (None, "9"), + ], +) +def test_formatter_options_validator(formatter, exp): + df = DataFrame([[9]]) + with option_context("styler.format.formatter", formatter): + assert f" {exp} " in df.style.to_latex() + + +def test_formatter_options_raises(): + msg = "Value must be an instance of" + with pytest.raises(ValueError, match=msg): + with option_context("styler.format.formatter", ["bad", "type"]): + DataFrame().style.to_latex() + + +def test_1level_multiindex(): + # GH 43383 + midx = MultiIndex.from_product([[1, 2]], names=[""]) + df = DataFrame(-1, index=midx, columns=[0, 1]) + ctx = df.style._translate(True, True) + assert ctx["body"][0][0]["display_value"] == "1" + assert ctx["body"][0][0]["is_visible"] is True + assert ctx["body"][1][0]["display_value"] == "2" + assert ctx["body"][1][0]["is_visible"] is True + + +def test_boolean_format(): + # gh 46384: booleans do not collapse to integer representation on display + df = DataFrame([[True, False]]) + ctx = df.style._translate(True, True) + assert ctx["body"][0][1]["display_value"] is True + assert ctx["body"][0][2]["display_value"] is False + + +@pytest.mark.parametrize( + "hide, labels", + [ + (False, [1, 2]), + (True, [1, 2, 3, 4]), + ], +) +def test_relabel_raise_length(styler_multi, hide, labels): + if hide: + styler_multi.hide(axis=0, subset=[("X", "x"), ("Y", "y")]) + with pytest.raises(ValueError, match="``labels`` must be of length equal"): + styler_multi.relabel_index(labels=labels) + + +def test_relabel_index(styler_multi): + labels = [(1, 2), (3, 4)] + styler_multi.hide(axis=0, subset=[("X", "x"), ("Y", "y")]) + styler_multi.relabel_index(labels=labels) + ctx = styler_multi._translate(True, True) + assert {"value": "X", "display_value": 1}.items() <= ctx["body"][0][0].items() + assert {"value": "y", "display_value": 2}.items() <= ctx["body"][0][1].items() + assert {"value": "Y", "display_value": 3}.items() <= ctx["body"][1][0].items() + assert {"value": "x", "display_value": 4}.items() <= ctx["body"][1][1].items() + + +def test_relabel_columns(styler_multi): + labels = [(1, 2), (3, 4)] + styler_multi.hide(axis=1, subset=[("A", "a"), ("B", "b")]) + styler_multi.relabel_index(axis=1, labels=labels) + ctx = styler_multi._translate(True, True) + assert {"value": "A", "display_value": 1}.items() <= ctx["head"][0][3].items() + assert {"value": "B", "display_value": 3}.items() <= ctx["head"][0][4].items() + assert {"value": "b", "display_value": 2}.items() <= ctx["head"][1][3].items() + assert {"value": "a", "display_value": 4}.items() <= ctx["head"][1][4].items() + + +def test_relabel_roundtrip(styler): + styler.relabel_index(["{}", "{}"]) + ctx = styler._translate(True, True) + assert {"value": "x", "display_value": "x"}.items() <= ctx["body"][0][0].items() + assert {"value": "y", "display_value": "y"}.items() <= ctx["body"][1][0].items() diff --git a/py311/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_highlight.py b/py311/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_highlight.py new file mode 100644 index 0000000000000000000000000000000000000000..3d59719010ee03cc53373a1c96f5f8c5611d7681 --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_highlight.py @@ -0,0 +1,218 @@ +import numpy as np +import pytest + +from pandas import ( + NA, + DataFrame, + IndexSlice, +) + +pytest.importorskip("jinja2") + +from pandas.io.formats.style import Styler + + +@pytest.fixture(params=[(None, "float64"), (NA, "Int64")]) +def df(request): + # GH 45804 + return DataFrame( + {"A": [0, np.nan, 10], "B": [1, request.param[0], 2]}, dtype=request.param[1] + ) + + +@pytest.fixture +def styler(df): + return Styler(df, uuid_len=0) + + +def test_highlight_null(styler): + result = styler.highlight_null()._compute().ctx + expected = { + (1, 0): [("background-color", "red")], + (1, 1): [("background-color", "red")], + } + assert result == expected + + +def test_highlight_null_subset(styler): + # GH 31345 + result = ( + styler.highlight_null(color="red", subset=["A"]) + .highlight_null(color="green", subset=["B"]) + ._compute() + .ctx + ) + expected = { + (1, 0): [("background-color", "red")], + (1, 1): [("background-color", "green")], + } + assert result == expected + + +@pytest.mark.parametrize("f", ["highlight_min", "highlight_max"]) +def test_highlight_minmax_basic(df, f): + expected = { + (0, 1): [("background-color", "red")], + # ignores NaN row, + (2, 0): [("background-color", "red")], + } + if f == "highlight_min": + df = -df + result = getattr(df.style, f)(axis=1, color="red")._compute().ctx + assert result == expected + + +@pytest.mark.parametrize("f", ["highlight_min", "highlight_max"]) +@pytest.mark.parametrize( + "kwargs", + [ + {"axis": None, "color": "red"}, # test axis + {"axis": 0, "subset": ["A"], "color": "red"}, # test subset and ignores NaN + {"axis": None, "props": "background-color: red"}, # test props + ], +) +def test_highlight_minmax_ext(df, f, kwargs): + expected = {(2, 0): [("background-color", "red")]} + if f == "highlight_min": + df = -df + result = getattr(df.style, f)(**kwargs)._compute().ctx + assert result == expected + + +@pytest.mark.parametrize("f", ["highlight_min", "highlight_max"]) +@pytest.mark.parametrize("axis", [None, 0, 1]) +def test_highlight_minmax_nulls(f, axis): + # GH 42750 + expected = { + (1, 0): [("background-color", "yellow")], + (1, 1): [("background-color", "yellow")], + } + if axis == 1: + expected.update({(2, 1): [("background-color", "yellow")]}) + + if f == "highlight_max": + df = DataFrame({"a": [NA, 1, None], "b": [np.nan, 1, -1]}) + else: + df = DataFrame({"a": [NA, -1, None], "b": [np.nan, -1, 1]}) + + result = getattr(df.style, f)(axis=axis)._compute().ctx + assert result == expected + + +@pytest.mark.parametrize( + "kwargs", + [ + {"left": 0, "right": 1}, # test basic range + {"left": 0, "right": 1, "props": "background-color: yellow"}, # test props + {"left": -100, "right": 100, "subset": IndexSlice[[0, 1], :]}, # test subset + {"left": 0, "subset": IndexSlice[[0, 1], :]}, # test no right + {"right": 1}, # test no left + {"left": [0, 0, 11], "axis": 0}, # test left as sequence + {"left": DataFrame({"A": [0, 0, 11], "B": [1, 1, 11]}), "axis": None}, # axis + {"left": 0, "right": [0, 1], "axis": 1}, # test sequence right + ], +) +def test_highlight_between(styler, kwargs): + expected = { + (0, 0): [("background-color", "yellow")], + (0, 1): [("background-color", "yellow")], + } + result = styler.highlight_between(**kwargs)._compute().ctx + assert result == expected + + +@pytest.mark.parametrize( + "arg, map, axis", + [ + ("left", [1, 2], 0), # 0 axis has 3 elements not 2 + ("left", [1, 2, 3], 1), # 1 axis has 2 elements not 3 + ("left", np.array([[1, 2], [1, 2]]), None), # df is (2,3) not (2,2) + ("right", [1, 2], 0), # same tests as above for 'right' not 'left' + ("right", [1, 2, 3], 1), # .. + ("right", np.array([[1, 2], [1, 2]]), None), # .. + ], +) +def test_highlight_between_raises(arg, styler, map, axis): + msg = f"supplied '{arg}' is not correct shape" + with pytest.raises(ValueError, match=msg): + styler.highlight_between(**{arg: map, "axis": axis})._compute() + + +def test_highlight_between_raises2(styler): + msg = "values can be 'both', 'left', 'right', or 'neither'" + with pytest.raises(ValueError, match=msg): + styler.highlight_between(inclusive="badstring")._compute() + + with pytest.raises(ValueError, match=msg): + styler.highlight_between(inclusive=1)._compute() + + +@pytest.mark.parametrize( + "inclusive, expected", + [ + ( + "both", + { + (0, 0): [("background-color", "yellow")], + (0, 1): [("background-color", "yellow")], + }, + ), + ("neither", {}), + ("left", {(0, 0): [("background-color", "yellow")]}), + ("right", {(0, 1): [("background-color", "yellow")]}), + ], +) +def test_highlight_between_inclusive(styler, inclusive, expected): + kwargs = {"left": 0, "right": 1, "subset": IndexSlice[[0, 1], :]} + result = styler.highlight_between(**kwargs, inclusive=inclusive)._compute() + assert result.ctx == expected + + +@pytest.mark.parametrize( + "kwargs", + [ + {"q_left": 0.5, "q_right": 1, "axis": 0}, # base case + {"q_left": 0.5, "q_right": 1, "axis": None}, # test axis + {"q_left": 0, "q_right": 1, "subset": IndexSlice[2, :]}, # test subset + {"q_left": 0.5, "axis": 0}, # test no high + {"q_right": 1, "subset": IndexSlice[2, :], "axis": 1}, # test no low + {"q_left": 0.5, "axis": 0, "props": "background-color: yellow"}, # tst prop + ], +) +def test_highlight_quantile(styler, kwargs): + expected = { + (2, 0): [("background-color", "yellow")], + (2, 1): [("background-color", "yellow")], + } + result = styler.highlight_quantile(**kwargs)._compute().ctx + assert result == expected + + +@pytest.mark.parametrize( + "f,kwargs", + [ + ("highlight_min", {"axis": 1, "subset": IndexSlice[1, :]}), + ("highlight_max", {"axis": 0, "subset": [0]}), + ("highlight_quantile", {"axis": None, "q_left": 0.6, "q_right": 0.8}), + ("highlight_between", {"subset": [0]}), + ], +) +@pytest.mark.parametrize( + "df", + [ + DataFrame([[0, 10], [20, 30]], dtype=int), + DataFrame([[0, 10], [20, 30]], dtype=float), + DataFrame([[0, 10], [20, 30]], dtype="datetime64[ns]"), + DataFrame([[0, 10], [20, 30]], dtype=str), + DataFrame([[0, 10], [20, 30]], dtype="timedelta64[ns]"), + ], +) +def test_all_highlight_dtypes(f, kwargs, df): + if f == "highlight_quantile" and isinstance(df.iloc[0, 0], (str)): + return None # quantile incompatible with str + if f == "highlight_between": + kwargs["left"] = df.iloc[1, 0] # set the range low for testing + + expected = {(1, 0): [("background-color", "yellow")]} + result = getattr(df.style, f)(**kwargs)._compute().ctx + assert result == expected diff --git a/py311/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_html.py b/py311/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_html.py new file mode 100644 index 0000000000000000000000000000000000000000..1e345eb82ed3c31e7a5e0f89fa574aea84923dd7 --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_html.py @@ -0,0 +1,1009 @@ +from textwrap import ( + dedent, + indent, +) + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + MultiIndex, + option_context, +) + +jinja2 = pytest.importorskip("jinja2") +from pandas.io.formats.style import Styler + + +@pytest.fixture +def env(): + loader = jinja2.PackageLoader("pandas", "io/formats/templates") + env = jinja2.Environment(loader=loader, trim_blocks=True) + return env + + +@pytest.fixture +def styler(): + return Styler(DataFrame([[2.61], [2.69]], index=["a", "b"], columns=["A"])) + + +@pytest.fixture +def styler_mi(): + midx = MultiIndex.from_product([["a", "b"], ["c", "d"]]) + return Styler(DataFrame(np.arange(16).reshape(4, 4), index=midx, columns=midx)) + + +@pytest.fixture +def tpl_style(env): + return env.get_template("html_style.tpl") + + +@pytest.fixture +def tpl_table(env): + return env.get_template("html_table.tpl") + + +def test_html_template_extends_options(): + # make sure if templates are edited tests are updated as are setup fixtures + # to understand the dependency + with open("pandas/io/formats/templates/html.tpl", encoding="utf-8") as file: + result = file.read() + assert "{% include html_style_tpl %}" in result + assert "{% include html_table_tpl %}" in result + + +def test_exclude_styles(styler): + result = styler.to_html(exclude_styles=True, doctype_html=True) + expected = dedent( + """\ + + + + + + + + + + + + + + + + + + + + + + + +
 A
a2.610000
b2.690000
+ + + """ + ) + assert result == expected + + +def test_w3_html_format(styler): + styler.set_uuid("").set_table_styles([{"selector": "th", "props": "att2:v2;"}]).map( + lambda x: "att1:v1;" + ).set_table_attributes('class="my-cls1" style="attr3:v3;"').set_td_classes( + DataFrame(["my-cls2"], index=["a"], columns=["A"]) + ).format( + "{:.1f}" + ).set_caption( + "A comprehensive test" + ) + expected = dedent( + """\ + + + + + + + + + + + + + + + + + + + +
A comprehensive test
 A
a2.6
b2.7
+ """ + ) + assert expected == styler.to_html() + + +def test_colspan_w3(): + # GH 36223 + df = DataFrame(data=[[1, 2]], columns=[["l0", "l0"], ["l1a", "l1b"]]) + styler = Styler(df, uuid="_", cell_ids=False) + assert 'l0' in styler.to_html() + + +def test_rowspan_w3(): + # GH 38533 + df = DataFrame(data=[[1, 2]], index=[["l0", "l0"], ["l1a", "l1b"]]) + styler = Styler(df, uuid="_", cell_ids=False) + assert 'l0' in styler.to_html() + + +def test_styles(styler): + styler.set_uuid("abc") + styler.set_table_styles([{"selector": "td", "props": "color: red;"}]) + result = styler.to_html(doctype_html=True) + expected = dedent( + """\ + + + + + + + + + + + + + + + + + + + + + + + + +
 A
a2.610000
b2.690000
+ + + """ + ) + assert result == expected + + +def test_doctype(styler): + result = styler.to_html(doctype_html=False) + assert "" not in result + assert "" not in result + assert "" not in result + assert "" not in result + + +def test_doctype_encoding(styler): + with option_context("styler.render.encoding", "ASCII"): + result = styler.to_html(doctype_html=True) + assert '' in result + result = styler.to_html(doctype_html=True, encoding="ANSI") + assert '' in result + + +def test_bold_headers_arg(styler): + result = styler.to_html(bold_headers=True) + assert "th {\n font-weight: bold;\n}" in result + result = styler.to_html() + assert "th {\n font-weight: bold;\n}" not in result + + +def test_caption_arg(styler): + result = styler.to_html(caption="foo bar") + assert "foo bar" in result + result = styler.to_html() + assert "foo bar" not in result + + +def test_block_names(tpl_style, tpl_table): + # catch accidental removal of a block + expected_style = { + "before_style", + "style", + "table_styles", + "before_cellstyle", + "cellstyle", + } + expected_table = { + "before_table", + "table", + "caption", + "thead", + "tbody", + "after_table", + "before_head_rows", + "head_tr", + "after_head_rows", + "before_rows", + "tr", + "after_rows", + } + result1 = set(tpl_style.blocks) + assert result1 == expected_style + + result2 = set(tpl_table.blocks) + assert result2 == expected_table + + +def test_from_custom_template_table(tmpdir): + p = tmpdir.mkdir("tpl").join("myhtml_table.tpl") + p.write( + dedent( + """\ + {% extends "html_table.tpl" %} + {% block table %} +

{{custom_title}}

+ {{ super() }} + {% endblock table %}""" + ) + ) + result = Styler.from_custom_template(str(tmpdir.join("tpl")), "myhtml_table.tpl") + assert issubclass(result, Styler) + assert result.env is not Styler.env + assert result.template_html_table is not Styler.template_html_table + styler = result(DataFrame({"A": [1, 2]})) + assert "

My Title

\n\n\n + {{ super() }} + {% endblock style %}""" + ) + ) + result = Styler.from_custom_template( + str(tmpdir.join("tpl")), html_style="myhtml_style.tpl" + ) + assert issubclass(result, Styler) + assert result.env is not Styler.env + assert result.template_html_style is not Styler.template_html_style + styler = result(DataFrame({"A": [1, 2]})) + assert '\n\nfull cap" in styler.to_html() + + +@pytest.mark.parametrize("index", [False, True]) +@pytest.mark.parametrize("columns", [False, True]) +@pytest.mark.parametrize("index_name", [True, False]) +def test_sticky_basic(styler, index, columns, index_name): + if index_name: + styler.index.name = "some text" + if index: + styler.set_sticky(axis=0) + if columns: + styler.set_sticky(axis=1) + + left_css = ( + "#T_ {0} {{\n position: sticky;\n background-color: inherit;\n" + " left: 0px;\n z-index: {1};\n}}" + ) + top_css = ( + "#T_ {0} {{\n position: sticky;\n background-color: inherit;\n" + " top: {1}px;\n z-index: {2};\n{3}}}" + ) + + res = styler.set_uuid("").to_html() + + # test index stickys over thead and tbody + assert (left_css.format("thead tr th:nth-child(1)", "3 !important") in res) is index + assert (left_css.format("tbody tr th:nth-child(1)", "1") in res) is index + + # test column stickys including if name row + assert ( + top_css.format("thead tr:nth-child(1) th", "0", "2", " height: 25px;\n") in res + ) is (columns and index_name) + assert ( + top_css.format("thead tr:nth-child(2) th", "25", "2", " height: 25px;\n") + in res + ) is (columns and index_name) + assert (top_css.format("thead tr:nth-child(1) th", "0", "2", "") in res) is ( + columns and not index_name + ) + + +@pytest.mark.parametrize("index", [False, True]) +@pytest.mark.parametrize("columns", [False, True]) +def test_sticky_mi(styler_mi, index, columns): + if index: + styler_mi.set_sticky(axis=0) + if columns: + styler_mi.set_sticky(axis=1) + + left_css = ( + "#T_ {0} {{\n position: sticky;\n background-color: inherit;\n" + " left: {1}px;\n min-width: 75px;\n max-width: 75px;\n z-index: {2};\n}}" + ) + top_css = ( + "#T_ {0} {{\n position: sticky;\n background-color: inherit;\n" + " top: {1}px;\n height: 25px;\n z-index: {2};\n}}" + ) + + res = styler_mi.set_uuid("").to_html() + + # test the index stickys for thead and tbody over both levels + assert ( + left_css.format("thead tr th:nth-child(1)", "0", "3 !important") in res + ) is index + assert (left_css.format("tbody tr th.level0", "0", "1") in res) is index + assert ( + left_css.format("thead tr th:nth-child(2)", "75", "3 !important") in res + ) is index + assert (left_css.format("tbody tr th.level1", "75", "1") in res) is index + + # test the column stickys for each level row + assert (top_css.format("thead tr:nth-child(1) th", "0", "2") in res) is columns + assert (top_css.format("thead tr:nth-child(2) th", "25", "2") in res) is columns + + +@pytest.mark.parametrize("index", [False, True]) +@pytest.mark.parametrize("columns", [False, True]) +@pytest.mark.parametrize("levels", [[1], ["one"], "one"]) +def test_sticky_levels(styler_mi, index, columns, levels): + styler_mi.index.names, styler_mi.columns.names = ["zero", "one"], ["zero", "one"] + if index: + styler_mi.set_sticky(axis=0, levels=levels) + if columns: + styler_mi.set_sticky(axis=1, levels=levels) + + left_css = ( + "#T_ {0} {{\n position: sticky;\n background-color: inherit;\n" + " left: {1}px;\n min-width: 75px;\n max-width: 75px;\n z-index: {2};\n}}" + ) + top_css = ( + "#T_ {0} {{\n position: sticky;\n background-color: inherit;\n" + " top: {1}px;\n height: 25px;\n z-index: {2};\n}}" + ) + + res = styler_mi.set_uuid("").to_html() + + # test no sticking of level0 + assert "#T_ thead tr th:nth-child(1)" not in res + assert "#T_ tbody tr th.level0" not in res + assert "#T_ thead tr:nth-child(1) th" not in res + + # test sticking level1 + assert ( + left_css.format("thead tr th:nth-child(2)", "0", "3 !important") in res + ) is index + assert (left_css.format("tbody tr th.level1", "0", "1") in res) is index + assert (top_css.format("thead tr:nth-child(2) th", "0", "2") in res) is columns + + +def test_sticky_raises(styler): + with pytest.raises(ValueError, match="No axis named bad for object type DataFrame"): + styler.set_sticky(axis="bad") + + +@pytest.mark.parametrize( + "sparse_index, sparse_columns", + [(True, True), (True, False), (False, True), (False, False)], +) +def test_sparse_options(sparse_index, sparse_columns): + cidx = MultiIndex.from_tuples([("Z", "a"), ("Z", "b"), ("Y", "c")]) + ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")]) + df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=ridx, columns=cidx) + styler = df.style + + default_html = styler.to_html() # defaults under pd.options to (True , True) + + with option_context( + "styler.sparse.index", sparse_index, "styler.sparse.columns", sparse_columns + ): + html1 = styler.to_html() + assert (html1 == default_html) is (sparse_index and sparse_columns) + html2 = styler.to_html(sparse_index=sparse_index, sparse_columns=sparse_columns) + assert html1 == html2 + + +@pytest.mark.parametrize("index", [True, False]) +@pytest.mark.parametrize("columns", [True, False]) +def test_map_header_cell_ids(styler, index, columns): + # GH 41893 + func = lambda v: "attr: val;" + styler.uuid, styler.cell_ids = "", False + if index: + styler.map_index(func, axis="index") + if columns: + styler.map_index(func, axis="columns") + + result = styler.to_html() + + # test no data cell ids + assert '2.610000' in result + assert '2.690000' in result + + # test index header ids where needed and css styles + assert ( + 'a' in result + ) is index + assert ( + 'b' in result + ) is index + assert ("#T__level0_row0, #T__level0_row1 {\n attr: val;\n}" in result) is index + + # test column header ids where needed and css styles + assert ( + 'A' in result + ) is columns + assert ("#T__level0_col0 {\n attr: val;\n}" in result) is columns + + +@pytest.mark.parametrize("rows", [True, False]) +@pytest.mark.parametrize("cols", [True, False]) +def test_maximums(styler_mi, rows, cols): + result = styler_mi.to_html( + max_rows=2 if rows else None, + max_columns=2 if cols else None, + ) + + assert ">5" in result # [[0,1], [4,5]] always visible + assert (">8" in result) is not rows # first trimmed vertical element + assert (">2" in result) is not cols # first trimmed horizontal element + + +def test_replaced_css_class_names(): + css = { + "row_heading": "ROWHEAD", + # "col_heading": "COLHEAD", + "index_name": "IDXNAME", + # "col": "COL", + "row": "ROW", + # "col_trim": "COLTRIM", + "row_trim": "ROWTRIM", + "level": "LEVEL", + "data": "DATA", + "blank": "BLANK", + } + midx = MultiIndex.from_product([["a", "b"], ["c", "d"]]) + styler_mi = Styler( + DataFrame(np.arange(16).reshape(4, 4), index=midx, columns=midx), + uuid_len=0, + ).set_table_styles(css_class_names=css) + styler_mi.index.names = ["n1", "n2"] + styler_mi.hide(styler_mi.index[1:], axis=0) + styler_mi.hide(styler_mi.columns[1:], axis=1) + styler_mi.map_index(lambda v: "color: red;", axis=0) + styler_mi.map_index(lambda v: "color: green;", axis=1) + styler_mi.map(lambda v: "color: blue;") + expected = dedent( + """\ + + + + + + + + + + + + + + + + + + + + + + + + + + +
 n1a
 n2c
n1n2 
ac0
+ """ + ) + result = styler_mi.to_html() + assert result == expected + + +def test_include_css_style_rules_only_for_visible_cells(styler_mi): + # GH 43619 + result = ( + styler_mi.set_uuid("") + .map(lambda v: "color: blue;") + .hide(styler_mi.data.columns[1:], axis="columns") + .hide(styler_mi.data.index[1:], axis="index") + .to_html() + ) + expected_styles = dedent( + """\ + + """ + ) + assert expected_styles in result + + +def test_include_css_style_rules_only_for_visible_index_labels(styler_mi): + # GH 43619 + result = ( + styler_mi.set_uuid("") + .map_index(lambda v: "color: blue;", axis="index") + .hide(styler_mi.data.columns, axis="columns") + .hide(styler_mi.data.index[1:], axis="index") + .to_html() + ) + expected_styles = dedent( + """\ + + """ + ) + assert expected_styles in result + + +def test_include_css_style_rules_only_for_visible_column_labels(styler_mi): + # GH 43619 + result = ( + styler_mi.set_uuid("") + .map_index(lambda v: "color: blue;", axis="columns") + .hide(styler_mi.data.columns[1:], axis="columns") + .hide(styler_mi.data.index, axis="index") + .to_html() + ) + expected_styles = dedent( + """\ + + """ + ) + assert expected_styles in result + + +def test_hiding_index_columns_multiindex_alignment(): + # gh 43644 + midx = MultiIndex.from_product( + [["i0", "j0"], ["i1"], ["i2", "j2"]], names=["i-0", "i-1", "i-2"] + ) + cidx = MultiIndex.from_product( + [["c0"], ["c1", "d1"], ["c2", "d2"]], names=["c-0", "c-1", "c-2"] + ) + df = DataFrame(np.arange(16).reshape(4, 4), index=midx, columns=cidx) + styler = Styler(df, uuid_len=0) + styler.hide(level=1, axis=0).hide(level=0, axis=1) + styler.hide([("j0", "i1", "j2")], axis=0) + styler.hide([("c0", "d1", "d2")], axis=1) + result = styler.to_html() + expected = dedent( + """\ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 c-1c1d1
 c-2c2d2c2
i-0i-2   
i0i2012
j2456
j0i28910
+ """ + ) + assert result == expected + + +def test_hiding_index_columns_multiindex_trimming(): + # gh 44272 + df = DataFrame(np.arange(64).reshape(8, 8)) + df.columns = MultiIndex.from_product([[0, 1, 2, 3], [0, 1]]) + df.index = MultiIndex.from_product([[0, 1, 2, 3], [0, 1]]) + df.index.names, df.columns.names = ["a", "b"], ["c", "d"] + styler = Styler(df, cell_ids=False, uuid_len=0) + styler.hide([(0, 0), (0, 1), (1, 0)], axis=1).hide([(0, 0), (0, 1), (1, 0)], axis=0) + with option_context("styler.render.max_rows", 4, "styler.render.max_columns", 4): + result = styler.to_html() + + expected = dedent( + """\ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 c123
 d1010...
ab     
1127282930...
2035363738...
143444546...
3051525354...
.....................
+ """ + ) + + assert result == expected + + +@pytest.mark.parametrize("type", ["data", "index"]) +@pytest.mark.parametrize( + "text, exp, found", + [ + ("no link, just text", False, ""), + ("subdomain not www: sub.web.com", False, ""), + ("www subdomain: www.web.com other", True, "www.web.com"), + ("scheme full structure: http://www.web.com", True, "http://www.web.com"), + ("scheme no top-level: http://www.web", True, "http://www.web"), + ("no scheme, no top-level: www.web", False, "www.web"), + ("https scheme: https://www.web.com", True, "https://www.web.com"), + ("ftp scheme: ftp://www.web", True, "ftp://www.web"), + ("ftps scheme: ftps://www.web", True, "ftps://www.web"), + ("subdirectories: www.web.com/directory", True, "www.web.com/directory"), + ("Multiple domains: www.1.2.3.4", True, "www.1.2.3.4"), + ("with port: http://web.com:80", True, "http://web.com:80"), + ( + "full net_loc scheme: http://user:pass@web.com", + True, + "http://user:pass@web.com", + ), + ( + "with valid special chars: http://web.com/,.':;~!@#$*()[]", + True, + "http://web.com/,.':;~!@#$*()[]", + ), + ], +) +def test_rendered_links(type, text, exp, found): + if type == "data": + df = DataFrame([text]) + styler = df.style.format(hyperlinks="html") + else: + df = DataFrame([0], index=[text]) + styler = df.style.format_index(hyperlinks="html") + + rendered = f'{found}' + result = styler.to_html() + assert (rendered in result) is exp + assert (text in result) is not exp # test conversion done when expected and not + + +def test_multiple_rendered_links(): + links = ("www.a.b", "http://a.c", "https://a.d", "ftp://a.e") + # pylint: disable-next=consider-using-f-string + df = DataFrame(["text {} {} text {} {}".format(*links)]) + result = df.style.format(hyperlinks="html").to_html() + href = '{0}' + for link in links: + assert href.format(link) in result + assert href.format("text") not in result + + +def test_concat(styler): + other = styler.data.agg(["mean"]).style + styler.concat(other).set_uuid("X") + result = styler.to_html() + fp = "foot0_" + expected = dedent( + f"""\ + + b + 2.690000 + + + mean + 2.650000 + + + + """ + ) + assert expected in result + + +def test_concat_recursion(styler): + df = styler.data + styler1 = styler + styler2 = Styler(df.agg(["mean"]), precision=3) + styler3 = Styler(df.agg(["mean"]), precision=4) + styler1.concat(styler2.concat(styler3)).set_uuid("X") + result = styler.to_html() + # notice that the second concat (last of the output html), + # there are two `foot_` in the id and class + fp1 = "foot0_" + fp2 = "foot0_foot0_" + expected = dedent( + f"""\ + + b + 2.690000 + + + mean + 2.650 + + + mean + 2.6500 + + + + """ + ) + assert expected in result + + +def test_concat_chain(styler): + df = styler.data + styler1 = styler + styler2 = Styler(df.agg(["mean"]), precision=3) + styler3 = Styler(df.agg(["mean"]), precision=4) + styler1.concat(styler2).concat(styler3).set_uuid("X") + result = styler.to_html() + fp1 = "foot0_" + fp2 = "foot1_" + expected = dedent( + f"""\ + + b + 2.690000 + + + mean + 2.650 + + + mean + 2.6500 + + + + """ + ) + assert expected in result + + +def test_concat_combined(): + def html_lines(foot_prefix: str): + assert foot_prefix.endswith("_") or foot_prefix == "" + fp = foot_prefix + return indent( + dedent( + f"""\ + + a + 2.610000 + + + b + 2.690000 + + """ + ), + prefix=" " * 4, + ) + + df = DataFrame([[2.61], [2.69]], index=["a", "b"], columns=["A"]) + s1 = df.style.highlight_max(color="red") + s2 = df.style.highlight_max(color="green") + s3 = df.style.highlight_max(color="blue") + s4 = df.style.highlight_max(color="yellow") + + result = s1.concat(s2).concat(s3.concat(s4)).set_uuid("X").to_html() + expected_css = dedent( + """\ + + """ + ) + expected_table = ( + dedent( + """\ + + + + + + + + + """ + ) + + html_lines("") + + html_lines("foot0_") + + html_lines("foot1_") + + html_lines("foot1_foot0_") + + dedent( + """\ + +
 A
+ """ + ) + ) + assert expected_css + expected_table == result + + +def test_to_html_na_rep_non_scalar_data(datapath): + # GH47103 + df = DataFrame([{"a": 1, "b": [1, 2, 3], "c": np.nan}]) + result = df.style.format(na_rep="-").to_html(table_uuid="test") + expected = """\ + + + + + + + + + + + + + + + + + + +
 abc
01[1, 2, 3]-
+""" + assert result == expected diff --git a/py311/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_matplotlib.py b/py311/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_matplotlib.py new file mode 100644 index 0000000000000000000000000000000000000000..fb7a77f1ddb27db66a847fc1a1d87d14d95822aa --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_matplotlib.py @@ -0,0 +1,335 @@ +import gc + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + IndexSlice, + Series, +) + +pytest.importorskip("matplotlib") +pytest.importorskip("jinja2") + +import matplotlib as mpl + +from pandas.io.formats.style import Styler + + +@pytest.fixture(autouse=True) +def mpl_cleanup(): + # matplotlib/testing/decorators.py#L24 + # 1) Resets units registry + # 2) Resets rc_context + # 3) Closes all figures + mpl = pytest.importorskip("matplotlib") + mpl_units = pytest.importorskip("matplotlib.units") + plt = pytest.importorskip("matplotlib.pyplot") + orig_units_registry = mpl_units.registry.copy() + with mpl.rc_context(): + mpl.use("template") + yield + mpl_units.registry.clear() + mpl_units.registry.update(orig_units_registry) + plt.close("all") + # https://matplotlib.org/stable/users/prev_whats_new/whats_new_3.6.0.html#garbage-collection-is-no-longer-run-on-figure-close # noqa: E501 + gc.collect(1) + + +@pytest.fixture +def df(): + return DataFrame([[1, 2], [2, 4]], columns=["A", "B"]) + + +@pytest.fixture +def styler(df): + return Styler(df, uuid_len=0) + + +@pytest.fixture +def df_blank(): + return DataFrame([[0, 0], [0, 0]], columns=["A", "B"], index=["X", "Y"]) + + +@pytest.fixture +def styler_blank(df_blank): + return Styler(df_blank, uuid_len=0) + + +@pytest.mark.parametrize("f", ["background_gradient", "text_gradient"]) +def test_function_gradient(styler, f): + for c_map in [None, "YlOrRd"]: + result = getattr(styler, f)(cmap=c_map)._compute().ctx + assert all("#" in x[0][1] for x in result.values()) + assert result[(0, 0)] == result[(0, 1)] + assert result[(1, 0)] == result[(1, 1)] + + +@pytest.mark.parametrize("f", ["background_gradient", "text_gradient"]) +def test_background_gradient_color(styler, f): + result = getattr(styler, f)(subset=IndexSlice[1, "A"])._compute().ctx + if f == "background_gradient": + assert result[(1, 0)] == [("background-color", "#fff7fb"), ("color", "#000000")] + elif f == "text_gradient": + assert result[(1, 0)] == [("color", "#fff7fb")] + + +@pytest.mark.parametrize( + "axis, expected", + [ + (0, ["low", "low", "high", "high"]), + (1, ["low", "high", "low", "high"]), + (None, ["low", "mid", "mid", "high"]), + ], +) +@pytest.mark.parametrize("f", ["background_gradient", "text_gradient"]) +def test_background_gradient_axis(styler, axis, expected, f): + if f == "background_gradient": + colors = { + "low": [("background-color", "#f7fbff"), ("color", "#000000")], + "mid": [("background-color", "#abd0e6"), ("color", "#000000")], + "high": [("background-color", "#08306b"), ("color", "#f1f1f1")], + } + elif f == "text_gradient": + colors = { + "low": [("color", "#f7fbff")], + "mid": [("color", "#abd0e6")], + "high": [("color", "#08306b")], + } + result = getattr(styler, f)(cmap="Blues", axis=axis)._compute().ctx + for i, cell in enumerate([(0, 0), (0, 1), (1, 0), (1, 1)]): + assert result[cell] == colors[expected[i]] + + +@pytest.mark.parametrize( + "cmap, expected", + [ + ( + "PuBu", + { + (4, 5): [("background-color", "#86b0d3"), ("color", "#000000")], + (4, 6): [("background-color", "#83afd3"), ("color", "#f1f1f1")], + }, + ), + ( + "YlOrRd", + { + (4, 8): [("background-color", "#fd913e"), ("color", "#000000")], + (4, 9): [("background-color", "#fd8f3d"), ("color", "#f1f1f1")], + }, + ), + ( + None, + { + (7, 0): [("background-color", "#48c16e"), ("color", "#f1f1f1")], + (7, 1): [("background-color", "#4cc26c"), ("color", "#000000")], + }, + ), + ], +) +def test_text_color_threshold(cmap, expected): + # GH 39888 + df = DataFrame(np.arange(100).reshape(10, 10)) + result = df.style.background_gradient(cmap=cmap, axis=None)._compute().ctx + for k in expected.keys(): + assert result[k] == expected[k] + + +def test_background_gradient_vmin_vmax(): + # GH 12145 + df = DataFrame(range(5)) + ctx = df.style.background_gradient(vmin=1, vmax=3)._compute().ctx + assert ctx[(0, 0)] == ctx[(1, 0)] + assert ctx[(4, 0)] == ctx[(3, 0)] + + +def test_background_gradient_int64(): + # GH 28869 + df1 = Series(range(3)).to_frame() + df2 = Series(range(3), dtype="Int64").to_frame() + ctx1 = df1.style.background_gradient()._compute().ctx + ctx2 = df2.style.background_gradient()._compute().ctx + assert ctx2[(0, 0)] == ctx1[(0, 0)] + assert ctx2[(1, 0)] == ctx1[(1, 0)] + assert ctx2[(2, 0)] == ctx1[(2, 0)] + + +@pytest.mark.parametrize( + "axis, gmap, expected", + [ + ( + 0, + [1, 2], + { + (0, 0): [("background-color", "#fff7fb"), ("color", "#000000")], + (1, 0): [("background-color", "#023858"), ("color", "#f1f1f1")], + (0, 1): [("background-color", "#fff7fb"), ("color", "#000000")], + (1, 1): [("background-color", "#023858"), ("color", "#f1f1f1")], + }, + ), + ( + 1, + [1, 2], + { + (0, 0): [("background-color", "#fff7fb"), ("color", "#000000")], + (1, 0): [("background-color", "#fff7fb"), ("color", "#000000")], + (0, 1): [("background-color", "#023858"), ("color", "#f1f1f1")], + (1, 1): [("background-color", "#023858"), ("color", "#f1f1f1")], + }, + ), + ( + None, + np.array([[2, 1], [1, 2]]), + { + (0, 0): [("background-color", "#023858"), ("color", "#f1f1f1")], + (1, 0): [("background-color", "#fff7fb"), ("color", "#000000")], + (0, 1): [("background-color", "#fff7fb"), ("color", "#000000")], + (1, 1): [("background-color", "#023858"), ("color", "#f1f1f1")], + }, + ), + ], +) +def test_background_gradient_gmap_array(styler_blank, axis, gmap, expected): + # tests when gmap is given as a sequence and converted to ndarray + result = styler_blank.background_gradient(axis=axis, gmap=gmap)._compute().ctx + assert result == expected + + +@pytest.mark.parametrize( + "gmap, axis", [([1, 2, 3], 0), ([1, 2], 1), (np.array([[1, 2], [1, 2]]), None)] +) +def test_background_gradient_gmap_array_raises(gmap, axis): + # test when gmap as converted ndarray is bad shape + df = DataFrame([[0, 0, 0], [0, 0, 0]]) + msg = "supplied 'gmap' is not correct shape" + with pytest.raises(ValueError, match=msg): + df.style.background_gradient(gmap=gmap, axis=axis)._compute() + + +@pytest.mark.parametrize( + "gmap", + [ + DataFrame( # reverse the columns + [[2, 1], [1, 2]], columns=["B", "A"], index=["X", "Y"] + ), + DataFrame( # reverse the index + [[2, 1], [1, 2]], columns=["A", "B"], index=["Y", "X"] + ), + DataFrame( # reverse the index and columns + [[1, 2], [2, 1]], columns=["B", "A"], index=["Y", "X"] + ), + DataFrame( # add unnecessary columns + [[1, 2, 3], [2, 1, 3]], columns=["A", "B", "C"], index=["X", "Y"] + ), + DataFrame( # add unnecessary index + [[1, 2], [2, 1], [3, 3]], columns=["A", "B"], index=["X", "Y", "Z"] + ), + ], +) +@pytest.mark.parametrize( + "subset, exp_gmap", # exp_gmap is underlying map DataFrame should conform to + [ + (None, [[1, 2], [2, 1]]), + (["A"], [[1], [2]]), # slice only column "A" in data and gmap + (["B", "A"], [[2, 1], [1, 2]]), # reverse the columns in data + (IndexSlice["X", :], [[1, 2]]), # slice only index "X" in data and gmap + (IndexSlice[["Y", "X"], :], [[2, 1], [1, 2]]), # reverse the index in data + ], +) +def test_background_gradient_gmap_dataframe_align(styler_blank, gmap, subset, exp_gmap): + # test gmap given as DataFrame that it aligns to the data including subset + expected = styler_blank.background_gradient(axis=None, gmap=exp_gmap, subset=subset) + result = styler_blank.background_gradient(axis=None, gmap=gmap, subset=subset) + assert expected._compute().ctx == result._compute().ctx + + +@pytest.mark.parametrize( + "gmap, axis, exp_gmap", + [ + (Series([2, 1], index=["Y", "X"]), 0, [[1, 1], [2, 2]]), # revrse the index + (Series([2, 1], index=["B", "A"]), 1, [[1, 2], [1, 2]]), # revrse the cols + (Series([1, 2, 3], index=["X", "Y", "Z"]), 0, [[1, 1], [2, 2]]), # add idx + (Series([1, 2, 3], index=["A", "B", "C"]), 1, [[1, 2], [1, 2]]), # add col + ], +) +def test_background_gradient_gmap_series_align(styler_blank, gmap, axis, exp_gmap): + # test gmap given as Series that it aligns to the data including subset + expected = styler_blank.background_gradient(axis=None, gmap=exp_gmap)._compute() + result = styler_blank.background_gradient(axis=axis, gmap=gmap)._compute() + assert expected.ctx == result.ctx + + +@pytest.mark.parametrize( + "gmap, axis", + [ + (DataFrame([[1, 2], [2, 1]], columns=["A", "B"], index=["X", "Y"]), 1), + (DataFrame([[1, 2], [2, 1]], columns=["A", "B"], index=["X", "Y"]), 0), + ], +) +def test_background_gradient_gmap_wrong_dataframe(styler_blank, gmap, axis): + # test giving a gmap in DataFrame but with wrong axis + msg = "'gmap' is a DataFrame but underlying data for operations is a Series" + with pytest.raises(ValueError, match=msg): + styler_blank.background_gradient(gmap=gmap, axis=axis)._compute() + + +def test_background_gradient_gmap_wrong_series(styler_blank): + # test giving a gmap in Series form but with wrong axis + msg = "'gmap' is a Series but underlying data for operations is a DataFrame" + gmap = Series([1, 2], index=["X", "Y"]) + with pytest.raises(ValueError, match=msg): + styler_blank.background_gradient(gmap=gmap, axis=None)._compute() + + +def test_background_gradient_nullable_dtypes(): + # GH 50712 + df1 = DataFrame([[1], [0], [np.nan]], dtype=float) + df2 = DataFrame([[1], [0], [None]], dtype="Int64") + + ctx1 = df1.style.background_gradient()._compute().ctx + ctx2 = df2.style.background_gradient()._compute().ctx + assert ctx1 == ctx2 + + +@pytest.mark.parametrize( + "cmap", + ["PuBu", mpl.colormaps["PuBu"]], +) +def test_bar_colormap(cmap): + data = DataFrame([[1, 2], [3, 4]]) + ctx = data.style.bar(cmap=cmap, axis=None)._compute().ctx + pubu_colors = { + (0, 0): "#d0d1e6", + (1, 0): "#056faf", + (0, 1): "#73a9cf", + (1, 1): "#023858", + } + for k, v in pubu_colors.items(): + assert v in ctx[k][1][1] + + +def test_bar_color_raises(df): + msg = "`color` must be string or list or tuple of 2 strings" + with pytest.raises(ValueError, match=msg): + df.style.bar(color={"a", "b"}).to_html() + with pytest.raises(ValueError, match=msg): + df.style.bar(color=["a", "b", "c"]).to_html() + + msg = "`color` and `cmap` cannot both be given" + with pytest.raises(ValueError, match=msg): + df.style.bar(color="something", cmap="something else").to_html() + + +@pytest.mark.parametrize( + "plot_method", + ["scatter", "hexbin"], +) +def test_pass_colormap_instance(df, plot_method): + # https://github.com/pandas-dev/pandas/issues/49374 + cmap = mpl.colors.ListedColormap([[1, 1, 1], [0, 0, 0]]) + df["c"] = df.A + df.B + kwargs = {"x": "A", "y": "B", "c": "c", "colormap": cmap} + if plot_method == "hexbin": + kwargs["C"] = kwargs.pop("c") + getattr(df.plot, plot_method)(**kwargs) diff --git a/py311/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_non_unique.py b/py311/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_non_unique.py new file mode 100644 index 0000000000000000000000000000000000000000..e4d31fe21f2c9cf3454a67f8c7443382f7f1c0ef --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_non_unique.py @@ -0,0 +1,140 @@ +from textwrap import dedent + +import pytest + +from pandas import ( + DataFrame, + IndexSlice, +) + +pytest.importorskip("jinja2") + +from pandas.io.formats.style import Styler + + +@pytest.fixture +def df(): + return DataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + index=["i", "j", "j"], + columns=["c", "d", "d"], + dtype=float, + ) + + +@pytest.fixture +def styler(df): + return Styler(df, uuid_len=0) + + +def test_format_non_unique(df): + # GH 41269 + + # test dict + html = df.style.format({"d": "{:.1f}"}).to_html() + for val in ["1.000000<", "4.000000<", "7.000000<"]: + assert val in html + for val in ["2.0<", "3.0<", "5.0<", "6.0<", "8.0<", "9.0<"]: + assert val in html + + # test subset + html = df.style.format(precision=1, subset=IndexSlice["j", "d"]).to_html() + for val in ["1.000000<", "4.000000<", "7.000000<", "2.000000<", "3.000000<"]: + assert val in html + for val in ["5.0<", "6.0<", "8.0<", "9.0<"]: + assert val in html + + +@pytest.mark.parametrize("func", ["apply", "map"]) +def test_apply_map_non_unique_raises(df, func): + # GH 41269 + if func == "apply": + op = lambda s: ["color: red;"] * len(s) + else: + op = lambda v: "color: red;" + + with pytest.raises(KeyError, match="`Styler.apply` and `.map` are not"): + getattr(df.style, func)(op)._compute() + + +def test_table_styles_dict_non_unique_index(styler): + styles = styler.set_table_styles( + {"j": [{"selector": "td", "props": "a: v;"}]}, axis=1 + ).table_styles + assert styles == [ + {"selector": "td.row1", "props": [("a", "v")]}, + {"selector": "td.row2", "props": [("a", "v")]}, + ] + + +def test_table_styles_dict_non_unique_columns(styler): + styles = styler.set_table_styles( + {"d": [{"selector": "td", "props": "a: v;"}]}, axis=0 + ).table_styles + assert styles == [ + {"selector": "td.col1", "props": [("a", "v")]}, + {"selector": "td.col2", "props": [("a", "v")]}, + ] + + +def test_tooltips_non_unique_raises(styler): + # ttips has unique keys + ttips = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "b"]) + styler.set_tooltips(ttips=ttips) # OK + + # ttips has non-unique columns + ttips = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "c"], index=["a", "b"]) + with pytest.raises(KeyError, match="Tooltips render only if `ttips` has unique"): + styler.set_tooltips(ttips=ttips) + + # ttips has non-unique index + ttips = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "a"]) + with pytest.raises(KeyError, match="Tooltips render only if `ttips` has unique"): + styler.set_tooltips(ttips=ttips) + + +def test_set_td_classes_non_unique_raises(styler): + # classes has unique keys + classes = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "b"]) + styler.set_td_classes(classes=classes) # OK + + # classes has non-unique columns + classes = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "c"], index=["a", "b"]) + with pytest.raises(KeyError, match="Classes render only if `classes` has unique"): + styler.set_td_classes(classes=classes) + + # classes has non-unique index + classes = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "a"]) + with pytest.raises(KeyError, match="Classes render only if `classes` has unique"): + styler.set_td_classes(classes=classes) + + +def test_hide_columns_non_unique(styler): + ctx = styler.hide(["d"], axis="columns")._translate(True, True) + + assert ctx["head"][0][1]["display_value"] == "c" + assert ctx["head"][0][1]["is_visible"] is True + + assert ctx["head"][0][2]["display_value"] == "d" + assert ctx["head"][0][2]["is_visible"] is False + + assert ctx["head"][0][3]["display_value"] == "d" + assert ctx["head"][0][3]["is_visible"] is False + + assert ctx["body"][0][1]["is_visible"] is True + assert ctx["body"][0][2]["is_visible"] is False + assert ctx["body"][0][3]["is_visible"] is False + + +def test_latex_non_unique(styler): + result = styler.to_latex() + assert result == dedent( + """\ + \\begin{tabular}{lrrr} + & c & d & d \\\\ + i & 1.000000 & 2.000000 & 3.000000 \\\\ + j & 4.000000 & 5.000000 & 6.000000 \\\\ + j & 7.000000 & 8.000000 & 9.000000 \\\\ + \\end{tabular} + """ + ) diff --git a/py311/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_style.py b/py311/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_style.py new file mode 100644 index 0000000000000000000000000000000000000000..6fa72bd48031cca999b81cccfcedafcd3abcd924 --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_style.py @@ -0,0 +1,1588 @@ +import contextlib +import copy +import re +from textwrap import dedent + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + IndexSlice, + MultiIndex, + Series, + option_context, +) +import pandas._testing as tm + +jinja2 = pytest.importorskip("jinja2") +from pandas.io.formats.style import ( # isort:skip + Styler, +) +from pandas.io.formats.style_render import ( + _get_level_lengths, + _get_trimming_maximums, + maybe_convert_css_to_tuples, + non_reducing_slice, +) + + +@pytest.fixture +def mi_df(): + return DataFrame( + [[1, 2], [3, 4]], + index=MultiIndex.from_product([["i0"], ["i1_a", "i1_b"]]), + columns=MultiIndex.from_product([["c0"], ["c1_a", "c1_b"]]), + dtype=int, + ) + + +@pytest.fixture +def mi_styler(mi_df): + return Styler(mi_df, uuid_len=0) + + +@pytest.fixture +def mi_styler_comp(mi_styler): + # comprehensively add features to mi_styler + mi_styler = mi_styler._copy(deepcopy=True) + mi_styler.css = {**mi_styler.css, "row": "ROW", "col": "COL"} + mi_styler.uuid_len = 5 + mi_styler.uuid = "abcde" + mi_styler.set_caption("capt") + mi_styler.set_table_styles([{"selector": "a", "props": "a:v;"}]) + mi_styler.hide(axis="columns") + mi_styler.hide([("c0", "c1_a")], axis="columns", names=True) + mi_styler.hide(axis="index") + mi_styler.hide([("i0", "i1_a")], axis="index", names=True) + mi_styler.set_table_attributes('class="box"') + other = mi_styler.data.agg(["mean"]) + other.index = MultiIndex.from_product([[""], other.index]) + mi_styler.concat(other.style) + mi_styler.format(na_rep="MISSING", precision=3) + mi_styler.format_index(precision=2, axis=0) + mi_styler.format_index(precision=4, axis=1) + mi_styler.highlight_max(axis=None) + mi_styler.map_index(lambda x: "color: white;", axis=0) + mi_styler.map_index(lambda x: "color: black;", axis=1) + mi_styler.set_td_classes( + DataFrame( + [["a", "b"], ["a", "c"]], index=mi_styler.index, columns=mi_styler.columns + ) + ) + mi_styler.set_tooltips( + DataFrame( + [["a2", "b2"], ["a2", "c2"]], + index=mi_styler.index, + columns=mi_styler.columns, + ) + ) + return mi_styler + + +@pytest.fixture +def blank_value(): + return " " + + +@pytest.fixture +def df(): + df = DataFrame({"A": [0, 1], "B": np.random.default_rng(2).standard_normal(2)}) + return df + + +@pytest.fixture +def styler(df): + df = DataFrame({"A": [0, 1], "B": np.random.default_rng(2).standard_normal(2)}) + return Styler(df) + + +@pytest.mark.parametrize( + "sparse_columns, exp_cols", + [ + ( + True, + [ + {"is_visible": True, "attributes": 'colspan="2"', "value": "c0"}, + {"is_visible": False, "attributes": "", "value": "c0"}, + ], + ), + ( + False, + [ + {"is_visible": True, "attributes": "", "value": "c0"}, + {"is_visible": True, "attributes": "", "value": "c0"}, + ], + ), + ], +) +def test_mi_styler_sparsify_columns(mi_styler, sparse_columns, exp_cols): + exp_l1_c0 = {"is_visible": True, "attributes": "", "display_value": "c1_a"} + exp_l1_c1 = {"is_visible": True, "attributes": "", "display_value": "c1_b"} + + ctx = mi_styler._translate(True, sparse_columns) + + assert exp_cols[0].items() <= ctx["head"][0][2].items() + assert exp_cols[1].items() <= ctx["head"][0][3].items() + assert exp_l1_c0.items() <= ctx["head"][1][2].items() + assert exp_l1_c1.items() <= ctx["head"][1][3].items() + + +@pytest.mark.parametrize( + "sparse_index, exp_rows", + [ + ( + True, + [ + {"is_visible": True, "attributes": 'rowspan="2"', "value": "i0"}, + {"is_visible": False, "attributes": "", "value": "i0"}, + ], + ), + ( + False, + [ + {"is_visible": True, "attributes": "", "value": "i0"}, + {"is_visible": True, "attributes": "", "value": "i0"}, + ], + ), + ], +) +def test_mi_styler_sparsify_index(mi_styler, sparse_index, exp_rows): + exp_l1_r0 = {"is_visible": True, "attributes": "", "display_value": "i1_a"} + exp_l1_r1 = {"is_visible": True, "attributes": "", "display_value": "i1_b"} + + ctx = mi_styler._translate(sparse_index, True) + + assert exp_rows[0].items() <= ctx["body"][0][0].items() + assert exp_rows[1].items() <= ctx["body"][1][0].items() + assert exp_l1_r0.items() <= ctx["body"][0][1].items() + assert exp_l1_r1.items() <= ctx["body"][1][1].items() + + +def test_mi_styler_sparsify_options(mi_styler): + with option_context("styler.sparse.index", False): + html1 = mi_styler.to_html() + with option_context("styler.sparse.index", True): + html2 = mi_styler.to_html() + + assert html1 != html2 + + with option_context("styler.sparse.columns", False): + html1 = mi_styler.to_html() + with option_context("styler.sparse.columns", True): + html2 = mi_styler.to_html() + + assert html1 != html2 + + +@pytest.mark.parametrize( + "rn, cn, max_els, max_rows, max_cols, exp_rn, exp_cn", + [ + (100, 100, 100, None, None, 12, 6), # reduce to (12, 6) < 100 elements + (1000, 3, 750, None, None, 250, 3), # dynamically reduce rows to 250, keep cols + (4, 1000, 500, None, None, 4, 125), # dynamically reduce cols to 125, keep rows + (1000, 3, 750, 10, None, 10, 3), # overwrite above dynamics with max_row + (4, 1000, 500, None, 5, 4, 5), # overwrite above dynamics with max_col + (100, 100, 700, 50, 50, 25, 25), # rows cols below given maxes so < 700 elmts + ], +) +def test_trimming_maximum(rn, cn, max_els, max_rows, max_cols, exp_rn, exp_cn): + rn, cn = _get_trimming_maximums( + rn, cn, max_els, max_rows, max_cols, scaling_factor=0.5 + ) + assert (rn, cn) == (exp_rn, exp_cn) + + +@pytest.mark.parametrize( + "option, val", + [ + ("styler.render.max_elements", 6), + ("styler.render.max_rows", 3), + ], +) +def test_render_trimming_rows(option, val): + # test auto and specific trimming of rows + df = DataFrame(np.arange(120).reshape(60, 2)) + with option_context(option, val): + ctx = df.style._translate(True, True) + assert len(ctx["head"][0]) == 3 # index + 2 data cols + assert len(ctx["body"]) == 4 # 3 data rows + trimming row + assert len(ctx["body"][0]) == 3 # index + 2 data cols + + +@pytest.mark.parametrize( + "option, val", + [ + ("styler.render.max_elements", 6), + ("styler.render.max_columns", 2), + ], +) +def test_render_trimming_cols(option, val): + # test auto and specific trimming of cols + df = DataFrame(np.arange(30).reshape(3, 10)) + with option_context(option, val): + ctx = df.style._translate(True, True) + assert len(ctx["head"][0]) == 4 # index + 2 data cols + trimming col + assert len(ctx["body"]) == 3 # 3 data rows + assert len(ctx["body"][0]) == 4 # index + 2 data cols + trimming col + + +def test_render_trimming_mi(): + midx = MultiIndex.from_product([[1, 2], [1, 2, 3]]) + df = DataFrame(np.arange(36).reshape(6, 6), columns=midx, index=midx) + with option_context("styler.render.max_elements", 4): + ctx = df.style._translate(True, True) + + assert len(ctx["body"][0]) == 5 # 2 indexes + 2 data cols + trimming row + assert {"attributes": 'rowspan="2"'}.items() <= ctx["body"][0][0].items() + assert {"class": "data row0 col_trim"}.items() <= ctx["body"][0][4].items() + assert {"class": "data row_trim col_trim"}.items() <= ctx["body"][2][4].items() + assert len(ctx["body"]) == 3 # 2 data rows + trimming row + + +def test_render_empty_mi(): + # GH 43305 + df = DataFrame(index=MultiIndex.from_product([["A"], [0, 1]], names=[None, "one"])) + expected = dedent( + """\ + > + + +   + one + + + """ + ) + assert expected in df.style.to_html() + + +@pytest.mark.parametrize("comprehensive", [True, False]) +@pytest.mark.parametrize("render", [True, False]) +@pytest.mark.parametrize("deepcopy", [True, False]) +def test_copy(comprehensive, render, deepcopy, mi_styler, mi_styler_comp): + styler = mi_styler_comp if comprehensive else mi_styler + styler.uuid_len = 5 + + s2 = copy.deepcopy(styler) if deepcopy else copy.copy(styler) # make copy and check + assert s2 is not styler + + if render: + styler.to_html() + + excl = [ + "cellstyle_map", # render time vars.. + "cellstyle_map_columns", + "cellstyle_map_index", + "template_latex", # render templates are class level + "template_html", + "template_html_style", + "template_html_table", + ] + if not deepcopy: # check memory locations are equal for all included attributes + for attr in [a for a in styler.__dict__ if (not callable(a) and a not in excl)]: + assert id(getattr(s2, attr)) == id(getattr(styler, attr)) + else: # check memory locations are different for nested or mutable vars + shallow = [ + "data", + "columns", + "index", + "uuid_len", + "uuid", + "caption", + "cell_ids", + "hide_index_", + "hide_columns_", + "hide_index_names", + "hide_column_names", + "table_attributes", + ] + for attr in shallow: + assert id(getattr(s2, attr)) == id(getattr(styler, attr)) + + for attr in [ + a + for a in styler.__dict__ + if (not callable(a) and a not in excl and a not in shallow) + ]: + if getattr(s2, attr) is None: + assert id(getattr(s2, attr)) == id(getattr(styler, attr)) + else: + assert id(getattr(s2, attr)) != id(getattr(styler, attr)) + + +@pytest.mark.parametrize("deepcopy", [True, False]) +def test_inherited_copy(mi_styler, deepcopy): + # Ensure that the inherited class is preserved when a Styler object is copied. + # GH 52728 + class CustomStyler(Styler): + pass + + custom_styler = CustomStyler(mi_styler.data) + custom_styler_copy = ( + copy.deepcopy(custom_styler) if deepcopy else copy.copy(custom_styler) + ) + assert isinstance(custom_styler_copy, CustomStyler) + + +def test_clear(mi_styler_comp): + # NOTE: if this test fails for new features then 'mi_styler_comp' should be updated + # to ensure proper testing of the 'copy', 'clear', 'export' methods with new feature + # GH 40675 + styler = mi_styler_comp + styler._compute() # execute applied methods + + clean_copy = Styler(styler.data, uuid=styler.uuid) + + excl = [ + "data", + "index", + "columns", + "uuid", + "uuid_len", # uuid is set to be the same on styler and clean_copy + "cell_ids", + "cellstyle_map", # execution time only + "cellstyle_map_columns", # execution time only + "cellstyle_map_index", # execution time only + "template_latex", # render templates are class level + "template_html", + "template_html_style", + "template_html_table", + ] + # tests vars are not same vals on obj and clean copy before clear (except for excl) + for attr in [a for a in styler.__dict__ if not (callable(a) or a in excl)]: + res = getattr(styler, attr) == getattr(clean_copy, attr) + if hasattr(res, "__iter__") and len(res) > 0: + assert not all(res) # some element in iterable differs + elif hasattr(res, "__iter__") and len(res) == 0: + pass # empty array + else: + assert not res # explicit var differs + + # test vars have same vales on obj and clean copy after clearing + styler.clear() + for attr in [a for a in styler.__dict__ if not callable(a)]: + res = getattr(styler, attr) == getattr(clean_copy, attr) + assert all(res) if hasattr(res, "__iter__") else res + + +def test_export(mi_styler_comp, mi_styler): + exp_attrs = [ + "_todo", + "hide_index_", + "hide_index_names", + "hide_columns_", + "hide_column_names", + "table_attributes", + "table_styles", + "css", + ] + for attr in exp_attrs: + check = getattr(mi_styler, attr) == getattr(mi_styler_comp, attr) + assert not ( + all(check) if (hasattr(check, "__iter__") and len(check) > 0) else check + ) + + export = mi_styler_comp.export() + used = mi_styler.use(export) + for attr in exp_attrs: + check = getattr(used, attr) == getattr(mi_styler_comp, attr) + assert all(check) if (hasattr(check, "__iter__") and len(check) > 0) else check + + used.to_html() + + +def test_hide_raises(mi_styler): + msg = "`subset` and `level` cannot be passed simultaneously" + with pytest.raises(ValueError, match=msg): + mi_styler.hide(axis="index", subset="something", level="something else") + + msg = "`level` must be of type `int`, `str` or list of such" + with pytest.raises(ValueError, match=msg): + mi_styler.hide(axis="index", level={"bad": 1, "type": 2}) + + +@pytest.mark.parametrize("level", [1, "one", [1], ["one"]]) +def test_hide_index_level(mi_styler, level): + mi_styler.index.names, mi_styler.columns.names = ["zero", "one"], ["zero", "one"] + ctx = mi_styler.hide(axis="index", level=level)._translate(False, True) + assert len(ctx["head"][0]) == 3 + assert len(ctx["head"][1]) == 3 + assert len(ctx["head"][2]) == 4 + assert ctx["head"][2][0]["is_visible"] + assert not ctx["head"][2][1]["is_visible"] + + assert ctx["body"][0][0]["is_visible"] + assert not ctx["body"][0][1]["is_visible"] + assert ctx["body"][1][0]["is_visible"] + assert not ctx["body"][1][1]["is_visible"] + + +@pytest.mark.parametrize("level", [1, "one", [1], ["one"]]) +@pytest.mark.parametrize("names", [True, False]) +def test_hide_columns_level(mi_styler, level, names): + mi_styler.columns.names = ["zero", "one"] + if names: + mi_styler.index.names = ["zero", "one"] + ctx = mi_styler.hide(axis="columns", level=level)._translate(True, False) + assert len(ctx["head"]) == (2 if names else 1) + + +@pytest.mark.parametrize("method", ["map", "apply"]) +@pytest.mark.parametrize("axis", ["index", "columns"]) +def test_apply_map_header(method, axis): + # GH 41893 + df = DataFrame({"A": [0, 0], "B": [1, 1]}, index=["C", "D"]) + func = { + "apply": lambda s: ["attr: val" if ("A" in v or "C" in v) else "" for v in s], + "map": lambda v: "attr: val" if ("A" in v or "C" in v) else "", + } + + # test execution added to todo + result = getattr(df.style, f"{method}_index")(func[method], axis=axis) + assert len(result._todo) == 1 + assert len(getattr(result, f"ctx_{axis}")) == 0 + + # test ctx object on compute + result._compute() + expected = { + (0, 0): [("attr", "val")], + } + assert getattr(result, f"ctx_{axis}") == expected + + +@pytest.mark.parametrize("method", ["apply", "map"]) +@pytest.mark.parametrize("axis", ["index", "columns"]) +def test_apply_map_header_mi(mi_styler, method, axis): + # GH 41893 + func = { + "apply": lambda s: ["attr: val;" if "b" in v else "" for v in s], + "map": lambda v: "attr: val" if "b" in v else "", + } + result = getattr(mi_styler, f"{method}_index")(func[method], axis=axis)._compute() + expected = {(1, 1): [("attr", "val")]} + assert getattr(result, f"ctx_{axis}") == expected + + +def test_apply_map_header_raises(mi_styler): + # GH 41893 + with pytest.raises(ValueError, match="No axis named bad for object type DataFrame"): + mi_styler.map_index(lambda v: "attr: val;", axis="bad")._compute() + + +class TestStyler: + def test_init_non_pandas(self): + msg = "``data`` must be a Series or DataFrame" + with pytest.raises(TypeError, match=msg): + Styler([1, 2, 3]) + + def test_init_series(self): + result = Styler(Series([1, 2])) + assert result.data.ndim == 2 + + def test_repr_html_ok(self, styler): + styler._repr_html_() + + def test_repr_html_mathjax(self, styler): + # gh-19824 / 41395 + assert "tex2jax_ignore" not in styler._repr_html_() + + with option_context("styler.html.mathjax", False): + assert "tex2jax_ignore" in styler._repr_html_() + + def test_update_ctx(self, styler): + styler._update_ctx(DataFrame({"A": ["color: red", "color: blue"]})) + expected = {(0, 0): [("color", "red")], (1, 0): [("color", "blue")]} + assert styler.ctx == expected + + def test_update_ctx_flatten_multi_and_trailing_semi(self, styler): + attrs = DataFrame({"A": ["color: red; foo: bar", "color:blue ; foo: baz;"]}) + styler._update_ctx(attrs) + expected = { + (0, 0): [("color", "red"), ("foo", "bar")], + (1, 0): [("color", "blue"), ("foo", "baz")], + } + assert styler.ctx == expected + + def test_render(self): + df = DataFrame({"A": [0, 1]}) + style = lambda x: Series(["color: red", "color: blue"], name=x.name) + s = Styler(df, uuid="AB").apply(style) + s.to_html() + # it worked? + + def test_multiple_render(self, df): + # GH 39396 + s = Styler(df, uuid_len=0).map(lambda x: "color: red;", subset=["A"]) + s.to_html() # do 2 renders to ensure css styles not duplicated + assert ( + '" in s.to_html() + ) + + def test_render_empty_dfs(self): + empty_df = DataFrame() + es = Styler(empty_df) + es.to_html() + # An index but no columns + DataFrame(columns=["a"]).style.to_html() + # A column but no index + DataFrame(index=["a"]).style.to_html() + # No IndexError raised? + + def test_render_double(self): + df = DataFrame({"A": [0, 1]}) + style = lambda x: Series( + ["color: red; border: 1px", "color: blue; border: 2px"], name=x.name + ) + s = Styler(df, uuid="AB").apply(style) + s.to_html() + # it worked? + + def test_set_properties(self): + df = DataFrame({"A": [0, 1]}) + result = df.style.set_properties(color="white", size="10px")._compute().ctx + # order is deterministic + v = [("color", "white"), ("size", "10px")] + expected = {(0, 0): v, (1, 0): v} + assert result.keys() == expected.keys() + for v1, v2 in zip(result.values(), expected.values()): + assert sorted(v1) == sorted(v2) + + def test_set_properties_subset(self): + df = DataFrame({"A": [0, 1]}) + result = ( + df.style.set_properties(subset=IndexSlice[0, "A"], color="white") + ._compute() + .ctx + ) + expected = {(0, 0): [("color", "white")]} + assert result == expected + + def test_empty_index_name_doesnt_display(self, blank_value): + # https://github.com/pandas-dev/pandas/pull/12090#issuecomment-180695902 + df = DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]}) + result = df.style._translate(True, True) + assert len(result["head"]) == 1 + expected = { + "class": "blank level0", + "type": "th", + "value": blank_value, + "is_visible": True, + "display_value": blank_value, + } + assert expected.items() <= result["head"][0][0].items() + + def test_index_name(self): + # https://github.com/pandas-dev/pandas/issues/11655 + df = DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]}) + result = df.set_index("A").style._translate(True, True) + expected = { + "class": "index_name level0", + "type": "th", + "value": "A", + "is_visible": True, + "display_value": "A", + } + assert expected.items() <= result["head"][1][0].items() + + def test_numeric_columns(self): + # https://github.com/pandas-dev/pandas/issues/12125 + # smoke test for _translate + df = DataFrame({0: [1, 2, 3]}) + df.style._translate(True, True) + + def test_apply_axis(self): + df = DataFrame({"A": [0, 0], "B": [1, 1]}) + f = lambda x: [f"val: {x.max()}" for v in x] + result = df.style.apply(f, axis=1) + assert len(result._todo) == 1 + assert len(result.ctx) == 0 + result._compute() + expected = { + (0, 0): [("val", "1")], + (0, 1): [("val", "1")], + (1, 0): [("val", "1")], + (1, 1): [("val", "1")], + } + assert result.ctx == expected + + result = df.style.apply(f, axis=0) + expected = { + (0, 0): [("val", "0")], + (0, 1): [("val", "1")], + (1, 0): [("val", "0")], + (1, 1): [("val", "1")], + } + result._compute() + assert result.ctx == expected + result = df.style.apply(f) # default + result._compute() + assert result.ctx == expected + + @pytest.mark.parametrize("axis", [0, 1]) + def test_apply_series_return(self, axis): + # GH 42014 + df = DataFrame([[1, 2], [3, 4]], index=["X", "Y"], columns=["X", "Y"]) + + # test Series return where len(Series) < df.index or df.columns but labels OK + func = lambda s: Series(["color: red;"], index=["Y"]) + result = df.style.apply(func, axis=axis)._compute().ctx + assert result[(1, 1)] == [("color", "red")] + assert result[(1 - axis, axis)] == [("color", "red")] + + # test Series return where labels align but different order + func = lambda s: Series(["color: red;", "color: blue;"], index=["Y", "X"]) + result = df.style.apply(func, axis=axis)._compute().ctx + assert result[(0, 0)] == [("color", "blue")] + assert result[(1, 1)] == [("color", "red")] + assert result[(1 - axis, axis)] == [("color", "red")] + assert result[(axis, 1 - axis)] == [("color", "blue")] + + @pytest.mark.parametrize("index", [False, True]) + @pytest.mark.parametrize("columns", [False, True]) + def test_apply_dataframe_return(self, index, columns): + # GH 42014 + df = DataFrame([[1, 2], [3, 4]], index=["X", "Y"], columns=["X", "Y"]) + idxs = ["X", "Y"] if index else ["Y"] + cols = ["X", "Y"] if columns else ["Y"] + df_styles = DataFrame("color: red;", index=idxs, columns=cols) + result = df.style.apply(lambda x: df_styles, axis=None)._compute().ctx + + assert result[(1, 1)] == [("color", "red")] # (Y,Y) styles always present + assert (result[(0, 1)] == [("color", "red")]) is index # (X,Y) only if index + assert (result[(1, 0)] == [("color", "red")]) is columns # (Y,X) only if cols + assert (result[(0, 0)] == [("color", "red")]) is (index and columns) # (X,X) + + @pytest.mark.parametrize( + "slice_", + [ + IndexSlice[:], + IndexSlice[:, ["A"]], + IndexSlice[[1], :], + IndexSlice[[1], ["A"]], + IndexSlice[:2, ["A", "B"]], + ], + ) + @pytest.mark.parametrize("axis", [0, 1]) + def test_apply_subset(self, slice_, axis, df): + def h(x, color="bar"): + return Series(f"color: {color}", index=x.index, name=x.name) + + result = df.style.apply(h, axis=axis, subset=slice_, color="baz")._compute().ctx + expected = { + (r, c): [("color", "baz")] + for r, row in enumerate(df.index) + for c, col in enumerate(df.columns) + if row in df.loc[slice_].index and col in df.loc[slice_].columns + } + assert result == expected + + @pytest.mark.parametrize( + "slice_", + [ + IndexSlice[:], + IndexSlice[:, ["A"]], + IndexSlice[[1], :], + IndexSlice[[1], ["A"]], + IndexSlice[:2, ["A", "B"]], + ], + ) + def test_map_subset(self, slice_, df): + result = df.style.map(lambda x: "color:baz;", subset=slice_)._compute().ctx + expected = { + (r, c): [("color", "baz")] + for r, row in enumerate(df.index) + for c, col in enumerate(df.columns) + if row in df.loc[slice_].index and col in df.loc[slice_].columns + } + assert result == expected + + @pytest.mark.parametrize( + "slice_", + [ + IndexSlice[:, IndexSlice["x", "A"]], + IndexSlice[:, IndexSlice[:, "A"]], + IndexSlice[:, IndexSlice[:, ["A", "C"]]], # missing col element + IndexSlice[IndexSlice["a", 1], :], + IndexSlice[IndexSlice[:, 1], :], + IndexSlice[IndexSlice[:, [1, 3]], :], # missing row element + IndexSlice[:, ("x", "A")], + IndexSlice[("a", 1), :], + ], + ) + def test_map_subset_multiindex(self, slice_): + # GH 19861 + # edited for GH 33562 + if ( + isinstance(slice_[-1], tuple) + and isinstance(slice_[-1][-1], list) + and "C" in slice_[-1][-1] + ): + ctx = pytest.raises(KeyError, match="C") + elif ( + isinstance(slice_[0], tuple) + and isinstance(slice_[0][1], list) + and 3 in slice_[0][1] + ): + ctx = pytest.raises(KeyError, match="3") + else: + ctx = contextlib.nullcontext() + + idx = MultiIndex.from_product([["a", "b"], [1, 2]]) + col = MultiIndex.from_product([["x", "y"], ["A", "B"]]) + df = DataFrame(np.random.default_rng(2).random((4, 4)), columns=col, index=idx) + + with ctx: + df.style.map(lambda x: "color: red;", subset=slice_).to_html() + + def test_map_subset_multiindex_code(self): + # https://github.com/pandas-dev/pandas/issues/25858 + # Checks styler.map works with multindex when codes are provided + codes = np.array([[0, 0, 1, 1], [0, 1, 0, 1]]) + columns = MultiIndex( + levels=[["a", "b"], ["%", "#"]], codes=codes, names=["", ""] + ) + df = DataFrame( + [[1, -1, 1, 1], [-1, 1, 1, 1]], index=["hello", "world"], columns=columns + ) + pct_subset = IndexSlice[:, IndexSlice[:, "%":"%"]] + + def color_negative_red(val): + color = "red" if val < 0 else "black" + return f"color: {color}" + + df.loc[pct_subset] + df.style.map(color_negative_red, subset=pct_subset) + + @pytest.mark.parametrize( + "stylefunc", ["background_gradient", "bar", "text_gradient"] + ) + def test_subset_for_boolean_cols(self, stylefunc): + # GH47838 + df = DataFrame( + [ + [1, 2], + [3, 4], + ], + columns=[False, True], + ) + styled = getattr(df.style, stylefunc)() + styled._compute() + assert set(styled.ctx) == {(0, 0), (0, 1), (1, 0), (1, 1)} + + def test_empty(self): + df = DataFrame({"A": [1, 0]}) + s = df.style + s.ctx = {(0, 0): [("color", "red")], (1, 0): [("", "")]} + + result = s._translate(True, True)["cellstyle"] + expected = [ + {"props": [("color", "red")], "selectors": ["row0_col0"]}, + {"props": [("", "")], "selectors": ["row1_col0"]}, + ] + assert result == expected + + def test_duplicate(self): + df = DataFrame({"A": [1, 0]}) + s = df.style + s.ctx = {(0, 0): [("color", "red")], (1, 0): [("color", "red")]} + + result = s._translate(True, True)["cellstyle"] + expected = [ + {"props": [("color", "red")], "selectors": ["row0_col0", "row1_col0"]} + ] + assert result == expected + + def test_init_with_na_rep(self): + # GH 21527 28358 + df = DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"]) + + ctx = Styler(df, na_rep="NA")._translate(True, True) + assert ctx["body"][0][1]["display_value"] == "NA" + assert ctx["body"][0][2]["display_value"] == "NA" + + def test_caption(self, df): + styler = Styler(df, caption="foo") + result = styler.to_html() + assert all(["caption" in result, "foo" in result]) + + styler = df.style + result = styler.set_caption("baz") + assert styler is result + assert styler.caption == "baz" + + def test_uuid(self, df): + styler = Styler(df, uuid="abc123") + result = styler.to_html() + assert "abc123" in result + + styler = df.style + result = styler.set_uuid("aaa") + assert result is styler + assert result.uuid == "aaa" + + def test_unique_id(self): + # See https://github.com/pandas-dev/pandas/issues/16780 + df = DataFrame({"a": [1, 3, 5, 6], "b": [2, 4, 12, 21]}) + result = df.style.to_html(uuid="test") + assert "test" in result + ids = re.findall('id="(.*?)"', result) + assert np.unique(ids).size == len(ids) + + def test_table_styles(self, df): + style = [{"selector": "th", "props": [("foo", "bar")]}] # default format + styler = Styler(df, table_styles=style) + result = " ".join(styler.to_html().split()) + assert "th { foo: bar; }" in result + + styler = df.style + result = styler.set_table_styles(style) + assert styler is result + assert styler.table_styles == style + + # GH 39563 + style = [{"selector": "th", "props": "foo:bar;"}] # css string format + styler = df.style.set_table_styles(style) + result = " ".join(styler.to_html().split()) + assert "th { foo: bar; }" in result + + def test_table_styles_multiple(self, df): + ctx = df.style.set_table_styles( + [ + {"selector": "th,td", "props": "color:red;"}, + {"selector": "tr", "props": "color:green;"}, + ] + )._translate(True, True)["table_styles"] + assert ctx == [ + {"selector": "th", "props": [("color", "red")]}, + {"selector": "td", "props": [("color", "red")]}, + {"selector": "tr", "props": [("color", "green")]}, + ] + + def test_table_styles_dict_multiple_selectors(self, df): + # GH 44011 + result = df.style.set_table_styles( + { + "B": [ + {"selector": "th,td", "props": [("border-left", "2px solid black")]} + ] + } + )._translate(True, True)["table_styles"] + + expected = [ + {"selector": "th.col1", "props": [("border-left", "2px solid black")]}, + {"selector": "td.col1", "props": [("border-left", "2px solid black")]}, + ] + + assert result == expected + + def test_maybe_convert_css_to_tuples(self): + expected = [("a", "b"), ("c", "d e")] + assert maybe_convert_css_to_tuples("a:b;c:d e;") == expected + assert maybe_convert_css_to_tuples("a: b ;c: d e ") == expected + expected = [] + assert maybe_convert_css_to_tuples("") == expected + + def test_maybe_convert_css_to_tuples_err(self): + msg = "Styles supplied as string must follow CSS rule formats" + with pytest.raises(ValueError, match=msg): + maybe_convert_css_to_tuples("err") + + def test_table_attributes(self, df): + attributes = 'class="foo" data-bar' + styler = Styler(df, table_attributes=attributes) + result = styler.to_html() + assert 'class="foo" data-bar' in result + + result = df.style.set_table_attributes(attributes).to_html() + assert 'class="foo" data-bar' in result + + def test_apply_none(self): + def f(x): + return DataFrame( + np.where(x == x.max(), "color: red", ""), + index=x.index, + columns=x.columns, + ) + + result = DataFrame([[1, 2], [3, 4]]).style.apply(f, axis=None)._compute().ctx + assert result[(1, 1)] == [("color", "red")] + + def test_trim(self, df): + result = df.style.to_html() # trim=True + assert result.count("#") == 0 + + result = df.style.highlight_max().to_html() + assert result.count("#") == len(df.columns) + + def test_export(self, df, styler): + f = lambda x: "color: red" if x > 0 else "color: blue" + g = lambda x, z: f"color: {z}" if x > 0 else f"color: {z}" + style1 = styler + style1.map(f).map(g, z="b").highlight_max()._compute() # = render + result = style1.export() + style2 = df.style + style2.use(result) + assert style1._todo == style2._todo + style2.to_html() + + def test_bad_apply_shape(self): + df = DataFrame([[1, 2], [3, 4]], index=["A", "B"], columns=["X", "Y"]) + + msg = "resulted in the apply method collapsing to a Series." + with pytest.raises(ValueError, match=msg): + df.style._apply(lambda x: "x") + + msg = "created invalid {} labels" + with pytest.raises(ValueError, match=msg.format("index")): + df.style._apply(lambda x: [""]) + + with pytest.raises(ValueError, match=msg.format("index")): + df.style._apply(lambda x: ["", "", "", ""]) + + with pytest.raises(ValueError, match=msg.format("index")): + df.style._apply(lambda x: Series(["a:v;", ""], index=["A", "C"]), axis=0) + + with pytest.raises(ValueError, match=msg.format("columns")): + df.style._apply(lambda x: ["", "", ""], axis=1) + + with pytest.raises(ValueError, match=msg.format("columns")): + df.style._apply(lambda x: Series(["a:v;", ""], index=["X", "Z"]), axis=1) + + msg = "returned ndarray with wrong shape" + with pytest.raises(ValueError, match=msg): + df.style._apply(lambda x: np.array([[""], [""]]), axis=None) + + def test_apply_bad_return(self): + def f(x): + return "" + + df = DataFrame([[1, 2], [3, 4]]) + msg = ( + "must return a DataFrame or ndarray when passed to `Styler.apply` " + "with axis=None" + ) + with pytest.raises(TypeError, match=msg): + df.style._apply(f, axis=None) + + @pytest.mark.parametrize("axis", ["index", "columns"]) + def test_apply_bad_labels(self, axis): + def f(x): + return DataFrame(**{axis: ["bad", "labels"]}) + + df = DataFrame([[1, 2], [3, 4]]) + msg = f"created invalid {axis} labels." + with pytest.raises(ValueError, match=msg): + df.style._apply(f, axis=None) + + def test_get_level_lengths(self): + index = MultiIndex.from_product([["a", "b"], [0, 1, 2]]) + expected = { + (0, 0): 3, + (0, 3): 3, + (1, 0): 1, + (1, 1): 1, + (1, 2): 1, + (1, 3): 1, + (1, 4): 1, + (1, 5): 1, + } + result = _get_level_lengths(index, sparsify=True, max_index=100) + tm.assert_dict_equal(result, expected) + + expected = { + (0, 0): 1, + (0, 1): 1, + (0, 2): 1, + (0, 3): 1, + (0, 4): 1, + (0, 5): 1, + (1, 0): 1, + (1, 1): 1, + (1, 2): 1, + (1, 3): 1, + (1, 4): 1, + (1, 5): 1, + } + result = _get_level_lengths(index, sparsify=False, max_index=100) + tm.assert_dict_equal(result, expected) + + def test_get_level_lengths_un_sorted(self): + index = MultiIndex.from_arrays([[1, 1, 2, 1], ["a", "b", "b", "d"]]) + expected = { + (0, 0): 2, + (0, 2): 1, + (0, 3): 1, + (1, 0): 1, + (1, 1): 1, + (1, 2): 1, + (1, 3): 1, + } + result = _get_level_lengths(index, sparsify=True, max_index=100) + tm.assert_dict_equal(result, expected) + + expected = { + (0, 0): 1, + (0, 1): 1, + (0, 2): 1, + (0, 3): 1, + (1, 0): 1, + (1, 1): 1, + (1, 2): 1, + (1, 3): 1, + } + result = _get_level_lengths(index, sparsify=False, max_index=100) + tm.assert_dict_equal(result, expected) + + def test_mi_sparse_index_names(self, blank_value): + # Test the class names and displayed value are correct on rendering MI names + df = DataFrame( + {"A": [1, 2]}, + index=MultiIndex.from_arrays( + [["a", "a"], [0, 1]], names=["idx_level_0", "idx_level_1"] + ), + ) + result = df.style._translate(True, True) + head = result["head"][1] + expected = [ + { + "class": "index_name level0", + "display_value": "idx_level_0", + "is_visible": True, + }, + { + "class": "index_name level1", + "display_value": "idx_level_1", + "is_visible": True, + }, + { + "class": "blank col0", + "display_value": blank_value, + "is_visible": True, + }, + ] + for i, expected_dict in enumerate(expected): + assert expected_dict.items() <= head[i].items() + + def test_mi_sparse_column_names(self, blank_value): + df = DataFrame( + np.arange(16).reshape(4, 4), + index=MultiIndex.from_arrays( + [["a", "a", "b", "a"], [0, 1, 1, 2]], + names=["idx_level_0", "idx_level_1"], + ), + columns=MultiIndex.from_arrays( + [["C1", "C1", "C2", "C2"], [1, 0, 1, 0]], names=["colnam_0", "colnam_1"] + ), + ) + result = Styler(df, cell_ids=False)._translate(True, True) + + for level in [0, 1]: + head = result["head"][level] + expected = [ + { + "class": "blank", + "display_value": blank_value, + "is_visible": True, + }, + { + "class": f"index_name level{level}", + "display_value": f"colnam_{level}", + "is_visible": True, + }, + ] + for i, expected_dict in enumerate(expected): + assert expected_dict.items() <= head[i].items() + + def test_hide_column_headers(self, df, styler): + ctx = styler.hide(axis="columns")._translate(True, True) + assert len(ctx["head"]) == 0 # no header entries with an unnamed index + + df.index.name = "some_name" + ctx = df.style.hide(axis="columns")._translate(True, True) + assert len(ctx["head"]) == 1 + # index names still visible, changed in #42101, reverted in 43404 + + def test_hide_single_index(self, df): + # GH 14194 + # single unnamed index + ctx = df.style._translate(True, True) + assert ctx["body"][0][0]["is_visible"] + assert ctx["head"][0][0]["is_visible"] + ctx2 = df.style.hide(axis="index")._translate(True, True) + assert not ctx2["body"][0][0]["is_visible"] + assert not ctx2["head"][0][0]["is_visible"] + + # single named index + ctx3 = df.set_index("A").style._translate(True, True) + assert ctx3["body"][0][0]["is_visible"] + assert len(ctx3["head"]) == 2 # 2 header levels + assert ctx3["head"][0][0]["is_visible"] + + ctx4 = df.set_index("A").style.hide(axis="index")._translate(True, True) + assert not ctx4["body"][0][0]["is_visible"] + assert len(ctx4["head"]) == 1 # only 1 header levels + assert not ctx4["head"][0][0]["is_visible"] + + def test_hide_multiindex(self): + # GH 14194 + df = DataFrame( + {"A": [1, 2], "B": [1, 2]}, + index=MultiIndex.from_arrays( + [["a", "a"], [0, 1]], names=["idx_level_0", "idx_level_1"] + ), + ) + ctx1 = df.style._translate(True, True) + # tests for 'a' and '0' + assert ctx1["body"][0][0]["is_visible"] + assert ctx1["body"][0][1]["is_visible"] + # check for blank header rows + assert len(ctx1["head"][0]) == 4 # two visible indexes and two data columns + + ctx2 = df.style.hide(axis="index")._translate(True, True) + # tests for 'a' and '0' + assert not ctx2["body"][0][0]["is_visible"] + assert not ctx2["body"][0][1]["is_visible"] + # check for blank header rows + assert len(ctx2["head"][0]) == 3 # one hidden (col name) and two data columns + assert not ctx2["head"][0][0]["is_visible"] + + def test_hide_columns_single_level(self, df): + # GH 14194 + # test hiding single column + ctx = df.style._translate(True, True) + assert ctx["head"][0][1]["is_visible"] + assert ctx["head"][0][1]["display_value"] == "A" + assert ctx["head"][0][2]["is_visible"] + assert ctx["head"][0][2]["display_value"] == "B" + assert ctx["body"][0][1]["is_visible"] # col A, row 1 + assert ctx["body"][1][2]["is_visible"] # col B, row 1 + + ctx = df.style.hide("A", axis="columns")._translate(True, True) + assert not ctx["head"][0][1]["is_visible"] + assert not ctx["body"][0][1]["is_visible"] # col A, row 1 + assert ctx["body"][1][2]["is_visible"] # col B, row 1 + + # test hiding multiple columns + ctx = df.style.hide(["A", "B"], axis="columns")._translate(True, True) + assert not ctx["head"][0][1]["is_visible"] + assert not ctx["head"][0][2]["is_visible"] + assert not ctx["body"][0][1]["is_visible"] # col A, row 1 + assert not ctx["body"][1][2]["is_visible"] # col B, row 1 + + def test_hide_columns_index_mult_levels(self): + # GH 14194 + # setup dataframe with multiple column levels and indices + i1 = MultiIndex.from_arrays( + [["a", "a"], [0, 1]], names=["idx_level_0", "idx_level_1"] + ) + i2 = MultiIndex.from_arrays( + [["b", "b"], [0, 1]], names=["col_level_0", "col_level_1"] + ) + df = DataFrame([[1, 2], [3, 4]], index=i1, columns=i2) + ctx = df.style._translate(True, True) + # column headers + assert ctx["head"][0][2]["is_visible"] + assert ctx["head"][1][2]["is_visible"] + assert ctx["head"][1][3]["display_value"] == "1" + # indices + assert ctx["body"][0][0]["is_visible"] + # data + assert ctx["body"][1][2]["is_visible"] + assert ctx["body"][1][2]["display_value"] == "3" + assert ctx["body"][1][3]["is_visible"] + assert ctx["body"][1][3]["display_value"] == "4" + + # hide top column level, which hides both columns + ctx = df.style.hide("b", axis="columns")._translate(True, True) + assert not ctx["head"][0][2]["is_visible"] # b + assert not ctx["head"][1][2]["is_visible"] # 0 + assert not ctx["body"][1][2]["is_visible"] # 3 + assert ctx["body"][0][0]["is_visible"] # index + + # hide first column only + ctx = df.style.hide([("b", 0)], axis="columns")._translate(True, True) + assert not ctx["head"][0][2]["is_visible"] # b + assert ctx["head"][0][3]["is_visible"] # b + assert not ctx["head"][1][2]["is_visible"] # 0 + assert not ctx["body"][1][2]["is_visible"] # 3 + assert ctx["body"][1][3]["is_visible"] + assert ctx["body"][1][3]["display_value"] == "4" + + # hide second column and index + ctx = df.style.hide([("b", 1)], axis=1).hide(axis=0)._translate(True, True) + assert not ctx["body"][0][0]["is_visible"] # index + assert len(ctx["head"][0]) == 3 + assert ctx["head"][0][1]["is_visible"] # b + assert ctx["head"][1][1]["is_visible"] # 0 + assert not ctx["head"][1][2]["is_visible"] # 1 + assert not ctx["body"][1][3]["is_visible"] # 4 + assert ctx["body"][1][2]["is_visible"] + assert ctx["body"][1][2]["display_value"] == "3" + + # hide top row level, which hides both rows so body empty + ctx = df.style.hide("a", axis="index")._translate(True, True) + assert ctx["body"] == [] + + # hide first row only + ctx = df.style.hide(("a", 0), axis="index")._translate(True, True) + for i in [0, 1, 2, 3]: + assert "row1" in ctx["body"][0][i]["class"] # row0 not included in body + assert ctx["body"][0][i]["is_visible"] + + def test_pipe(self, df): + def set_caption_from_template(styler, a, b): + return styler.set_caption(f"Dataframe with a = {a} and b = {b}") + + styler = df.style.pipe(set_caption_from_template, "A", b="B") + assert "Dataframe with a = A and b = B" in styler.to_html() + + # Test with an argument that is a (callable, keyword_name) pair. + def f(a, b, styler): + return (a, b, styler) + + styler = df.style + result = styler.pipe((f, "styler"), a=1, b=2) + assert result == (1, 2, styler) + + def test_no_cell_ids(self): + # GH 35588 + # GH 35663 + df = DataFrame(data=[[0]]) + styler = Styler(df, uuid="_", cell_ids=False) + styler.to_html() + s = styler.to_html() # render twice to ensure ctx is not updated + assert s.find('') != -1 + + @pytest.mark.parametrize( + "classes", + [ + DataFrame( + data=[["", "test-class"], [np.nan, None]], + columns=["A", "B"], + index=["a", "b"], + ), + DataFrame(data=[["test-class"]], columns=["B"], index=["a"]), + DataFrame(data=[["test-class", "unused"]], columns=["B", "C"], index=["a"]), + ], + ) + def test_set_data_classes(self, classes): + # GH 36159 + df = DataFrame(data=[[0, 1], [2, 3]], columns=["A", "B"], index=["a", "b"]) + s = Styler(df, uuid_len=0, cell_ids=False).set_td_classes(classes).to_html() + assert '0' in s + assert '1' in s + assert '2' in s + assert '3' in s + # GH 39317 + s = Styler(df, uuid_len=0, cell_ids=True).set_td_classes(classes).to_html() + assert '0' in s + assert '1' in s + assert '2' in s + assert '3' in s + + def test_set_data_classes_reindex(self): + # GH 39317 + df = DataFrame( + data=[[0, 1, 2], [3, 4, 5], [6, 7, 8]], columns=[0, 1, 2], index=[0, 1, 2] + ) + classes = DataFrame( + data=[["mi", "ma"], ["mu", "mo"]], + columns=[0, 2], + index=[0, 2], + ) + s = Styler(df, uuid_len=0).set_td_classes(classes).to_html() + assert '0' in s + assert '2' in s + assert '4' in s + assert '6' in s + assert '8' in s + + def test_chaining_table_styles(self): + # GH 35607 + df = DataFrame(data=[[0, 1], [1, 2]], columns=["A", "B"]) + styler = df.style.set_table_styles( + [{"selector": "", "props": [("background-color", "yellow")]}] + ).set_table_styles( + [{"selector": ".col0", "props": [("background-color", "blue")]}], + overwrite=False, + ) + assert len(styler.table_styles) == 2 + + def test_column_and_row_styling(self): + # GH 35607 + df = DataFrame(data=[[0, 1], [1, 2]], columns=["A", "B"]) + s = Styler(df, uuid_len=0) + s = s.set_table_styles({"A": [{"selector": "", "props": [("color", "blue")]}]}) + assert "#T_ .col0 {\n color: blue;\n}" in s.to_html() + s = s.set_table_styles( + {0: [{"selector": "", "props": [("color", "blue")]}]}, axis=1 + ) + assert "#T_ .row0 {\n color: blue;\n}" in s.to_html() + + @pytest.mark.parametrize("len_", [1, 5, 32, 33, 100]) + def test_uuid_len(self, len_): + # GH 36345 + df = DataFrame(data=[["A"]]) + s = Styler(df, uuid_len=len_, cell_ids=False).to_html() + strt = s.find('id="T_') + end = s[strt + 6 :].find('"') + if len_ > 32: + assert end == 32 + else: + assert end == len_ + + @pytest.mark.parametrize("len_", [-2, "bad", None]) + def test_uuid_len_raises(self, len_): + # GH 36345 + df = DataFrame(data=[["A"]]) + msg = "``uuid_len`` must be an integer in range \\[0, 32\\]." + with pytest.raises(TypeError, match=msg): + Styler(df, uuid_len=len_, cell_ids=False).to_html() + + @pytest.mark.parametrize( + "slc", + [ + IndexSlice[:, :], + IndexSlice[:, 1], + IndexSlice[1, :], + IndexSlice[[1], [1]], + IndexSlice[1, [1]], + IndexSlice[[1], 1], + IndexSlice[1], + IndexSlice[1, 1], + slice(None, None, None), + [0, 1], + np.array([0, 1]), + Series([0, 1]), + ], + ) + def test_non_reducing_slice(self, slc): + df = DataFrame([[0, 1], [2, 3]]) + + tslice_ = non_reducing_slice(slc) + assert isinstance(df.loc[tslice_], DataFrame) + + @pytest.mark.parametrize("box", [list, Series, np.array]) + def test_list_slice(self, box): + # like dataframe getitem + subset = box(["A"]) + + df = DataFrame({"A": [1, 2], "B": [3, 4]}, index=["A", "B"]) + expected = IndexSlice[:, ["A"]] + + result = non_reducing_slice(subset) + tm.assert_frame_equal(df.loc[result], df.loc[expected]) + + def test_non_reducing_slice_on_multiindex(self): + # GH 19861 + dic = { + ("a", "d"): [1, 4], + ("a", "c"): [2, 3], + ("b", "c"): [3, 2], + ("b", "d"): [4, 1], + } + df = DataFrame(dic, index=[0, 1]) + idx = IndexSlice + slice_ = idx[:, idx["b", "d"]] + tslice_ = non_reducing_slice(slice_) + + result = df.loc[tslice_] + expected = DataFrame({("b", "d"): [4, 1]}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "slice_", + [ + IndexSlice[:, :], + # check cols + IndexSlice[:, IndexSlice[["a"]]], # inferred deeper need list + IndexSlice[:, IndexSlice[["a"], ["c"]]], # inferred deeper need list + IndexSlice[:, IndexSlice["a", "c", :]], + IndexSlice[:, IndexSlice["a", :, "e"]], + IndexSlice[:, IndexSlice[:, "c", "e"]], + IndexSlice[:, IndexSlice["a", ["c", "d"], :]], # check list + IndexSlice[:, IndexSlice["a", ["c", "d", "-"], :]], # don't allow missing + IndexSlice[:, IndexSlice["a", ["c", "d", "-"], "e"]], # no slice + # check rows + IndexSlice[IndexSlice[["U"]], :], # inferred deeper need list + IndexSlice[IndexSlice[["U"], ["W"]], :], # inferred deeper need list + IndexSlice[IndexSlice["U", "W", :], :], + IndexSlice[IndexSlice["U", :, "Y"], :], + IndexSlice[IndexSlice[:, "W", "Y"], :], + IndexSlice[IndexSlice[:, "W", ["Y", "Z"]], :], # check list + IndexSlice[IndexSlice[:, "W", ["Y", "Z", "-"]], :], # don't allow missing + IndexSlice[IndexSlice["U", "W", ["Y", "Z", "-"]], :], # no slice + # check simultaneous + IndexSlice[IndexSlice[:, "W", "Y"], IndexSlice["a", "c", :]], + ], + ) + def test_non_reducing_multi_slice_on_multiindex(self, slice_): + # GH 33562 + cols = MultiIndex.from_product([["a", "b"], ["c", "d"], ["e", "f"]]) + idxs = MultiIndex.from_product([["U", "V"], ["W", "X"], ["Y", "Z"]]) + df = DataFrame(np.arange(64).reshape(8, 8), columns=cols, index=idxs) + + for lvl in [0, 1]: + key = slice_[lvl] + if isinstance(key, tuple): + for subkey in key: + if isinstance(subkey, list) and "-" in subkey: + # not present in the index level, raises KeyError since 2.0 + with pytest.raises(KeyError, match="-"): + df.loc[slice_] + return + + expected = df.loc[slice_] + result = df.loc[non_reducing_slice(slice_)] + tm.assert_frame_equal(result, expected) + + +def test_hidden_index_names(mi_df): + mi_df.index.names = ["Lev0", "Lev1"] + mi_styler = mi_df.style + ctx = mi_styler._translate(True, True) + assert len(ctx["head"]) == 3 # 2 column index levels + 1 index names row + + mi_styler.hide(axis="index", names=True) + ctx = mi_styler._translate(True, True) + assert len(ctx["head"]) == 2 # index names row is unparsed + for i in range(4): + assert ctx["body"][0][i]["is_visible"] # 2 index levels + 2 data values visible + + mi_styler.hide(axis="index", level=1) + ctx = mi_styler._translate(True, True) + assert len(ctx["head"]) == 2 # index names row is still hidden + assert ctx["body"][0][0]["is_visible"] is True + assert ctx["body"][0][1]["is_visible"] is False + + +def test_hidden_column_names(mi_df): + mi_df.columns.names = ["Lev0", "Lev1"] + mi_styler = mi_df.style + ctx = mi_styler._translate(True, True) + assert ctx["head"][0][1]["display_value"] == "Lev0" + assert ctx["head"][1][1]["display_value"] == "Lev1" + + mi_styler.hide(names=True, axis="columns") + ctx = mi_styler._translate(True, True) + assert ctx["head"][0][1]["display_value"] == " " + assert ctx["head"][1][1]["display_value"] == " " + + mi_styler.hide(level=0, axis="columns") + ctx = mi_styler._translate(True, True) + assert len(ctx["head"]) == 1 # no index names and only one visible column headers + assert ctx["head"][0][1]["display_value"] == " " + + +@pytest.mark.parametrize("caption", [1, ("a", "b", "c"), (1, "s")]) +def test_caption_raises(mi_styler, caption): + msg = "`caption` must be either a string or 2-tuple of strings." + with pytest.raises(ValueError, match=msg): + mi_styler.set_caption(caption) + + +def test_hiding_headers_over_index_no_sparsify(): + # GH 43464 + midx = MultiIndex.from_product([[1, 2], ["a", "a", "b"]]) + df = DataFrame(9, index=midx, columns=[0]) + ctx = df.style._translate(False, False) + assert len(ctx["body"]) == 6 + ctx = df.style.hide((1, "a"), axis=0)._translate(False, False) + assert len(ctx["body"]) == 4 + assert "row2" in ctx["body"][0][0]["class"] + + +def test_hiding_headers_over_columns_no_sparsify(): + # GH 43464 + midx = MultiIndex.from_product([[1, 2], ["a", "a", "b"]]) + df = DataFrame(9, columns=midx, index=[0]) + ctx = df.style._translate(False, False) + for ix in [(0, 1), (0, 2), (1, 1), (1, 2)]: + assert ctx["head"][ix[0]][ix[1]]["is_visible"] is True + ctx = df.style.hide((1, "a"), axis="columns")._translate(False, False) + for ix in [(0, 1), (0, 2), (1, 1), (1, 2)]: + assert ctx["head"][ix[0]][ix[1]]["is_visible"] is False + + +def test_get_level_lengths_mi_hidden(): + # GH 43464 + index = MultiIndex.from_arrays([[1, 1, 1, 2, 2, 2], ["a", "a", "b", "a", "a", "b"]]) + expected = { + (0, 2): 1, + (0, 3): 1, + (0, 4): 1, + (0, 5): 1, + (1, 2): 1, + (1, 3): 1, + (1, 4): 1, + (1, 5): 1, + } + result = _get_level_lengths( + index, + sparsify=False, + max_index=100, + hidden_elements=[0, 1, 0, 1], # hidden element can repeat if duplicated index + ) + tm.assert_dict_equal(result, expected) + + +def test_row_trimming_hide_index(): + # gh 43703 + df = DataFrame([[1], [2], [3], [4], [5]]) + with option_context("styler.render.max_rows", 2): + ctx = df.style.hide([0, 1], axis="index")._translate(True, True) + assert len(ctx["body"]) == 3 + for r, val in enumerate(["3", "4", "..."]): + assert ctx["body"][r][1]["display_value"] == val + + +def test_row_trimming_hide_index_mi(): + # gh 44247 + df = DataFrame([[1], [2], [3], [4], [5]]) + df.index = MultiIndex.from_product([[0], [0, 1, 2, 3, 4]]) + with option_context("styler.render.max_rows", 2): + ctx = df.style.hide([(0, 0), (0, 1)], axis="index")._translate(True, True) + assert len(ctx["body"]) == 3 + + # level 0 index headers (sparsified) + assert {"value": 0, "attributes": 'rowspan="2"', "is_visible": True}.items() <= ctx[ + "body" + ][0][0].items() + assert {"value": 0, "attributes": "", "is_visible": False}.items() <= ctx["body"][ + 1 + ][0].items() + assert {"value": "...", "is_visible": True}.items() <= ctx["body"][2][0].items() + + for r, val in enumerate(["2", "3", "..."]): + assert ctx["body"][r][1]["display_value"] == val # level 1 index headers + for r, val in enumerate(["3", "4", "..."]): + assert ctx["body"][r][2]["display_value"] == val # data values + + +def test_col_trimming_hide_columns(): + # gh 44272 + df = DataFrame([[1, 2, 3, 4, 5]]) + with option_context("styler.render.max_columns", 2): + ctx = df.style.hide([0, 1], axis="columns")._translate(True, True) + + assert len(ctx["head"][0]) == 6 # blank, [0, 1 (hidden)], [2 ,3 (visible)], + trim + for c, vals in enumerate([(1, False), (2, True), (3, True), ("...", True)]): + assert ctx["head"][0][c + 2]["value"] == vals[0] + assert ctx["head"][0][c + 2]["is_visible"] == vals[1] + + assert len(ctx["body"][0]) == 6 # index + 2 hidden + 2 visible + trimming col + + +def test_no_empty_apply(mi_styler): + # 45313 + mi_styler.apply(lambda s: ["a:v;"] * 2, subset=[False, False]) + mi_styler._compute() + + +@pytest.mark.parametrize("format", ["html", "latex", "string"]) +def test_output_buffer(mi_styler, format): + # gh 47053 + with tm.ensure_clean(f"delete_me.{format}") as f: + getattr(mi_styler, f"to_{format}")(f) diff --git a/py311/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_to_latex.py b/py311/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_to_latex.py new file mode 100644 index 0000000000000000000000000000000000000000..7f1443c3ee66be040f668f546682924207cfd31e --- /dev/null +++ b/py311/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_to_latex.py @@ -0,0 +1,1090 @@ +from textwrap import dedent + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + MultiIndex, + Series, + option_context, +) + +pytest.importorskip("jinja2") +from pandas.io.formats.style import Styler +from pandas.io.formats.style_render import ( + _parse_latex_cell_styles, + _parse_latex_css_conversion, + _parse_latex_header_span, + _parse_latex_table_styles, + _parse_latex_table_wrapping, +) + + +@pytest.fixture +def df(): + return DataFrame( + {"A": [0, 1], "B": [-0.61, -1.22], "C": Series(["ab", "cd"], dtype=object)} + ) + + +@pytest.fixture +def df_ext(): + return DataFrame( + {"A": [0, 1, 2], "B": [-0.61, -1.22, -2.22], "C": ["ab", "cd", "de"]} + ) + + +@pytest.fixture +def styler(df): + return Styler(df, uuid_len=0, precision=2) + + +def test_minimal_latex_tabular(styler): + expected = dedent( + """\ + \\begin{tabular}{lrrl} + & A & B & C \\\\ + 0 & 0 & -0.61 & ab \\\\ + 1 & 1 & -1.22 & cd \\\\ + \\end{tabular} + """ + ) + assert styler.to_latex() == expected + + +def test_tabular_hrules(styler): + expected = dedent( + """\ + \\begin{tabular}{lrrl} + \\toprule + & A & B & C \\\\ + \\midrule + 0 & 0 & -0.61 & ab \\\\ + 1 & 1 & -1.22 & cd \\\\ + \\bottomrule + \\end{tabular} + """ + ) + assert styler.to_latex(hrules=True) == expected + + +def test_tabular_custom_hrules(styler): + styler.set_table_styles( + [ + {"selector": "toprule", "props": ":hline"}, + {"selector": "bottomrule", "props": ":otherline"}, + ] + ) # no midrule + expected = dedent( + """\ + \\begin{tabular}{lrrl} + \\hline + & A & B & C \\\\ + 0 & 0 & -0.61 & ab \\\\ + 1 & 1 & -1.22 & cd \\\\ + \\otherline + \\end{tabular} + """ + ) + assert styler.to_latex() == expected + + +def test_column_format(styler): + # default setting is already tested in `test_latex_minimal_tabular` + styler.set_table_styles([{"selector": "column_format", "props": ":cccc"}]) + + assert "\\begin{tabular}{rrrr}" in styler.to_latex(column_format="rrrr") + styler.set_table_styles([{"selector": "column_format", "props": ":r|r|cc"}]) + assert "\\begin{tabular}{r|r|cc}" in styler.to_latex() + + +def test_siunitx_cols(styler): + expected = dedent( + """\ + \\begin{tabular}{lSSl} + {} & {A} & {B} & {C} \\\\ + 0 & 0 & -0.61 & ab \\\\ + 1 & 1 & -1.22 & cd \\\\ + \\end{tabular} + """ + ) + assert styler.to_latex(siunitx=True) == expected + + +def test_position(styler): + assert "\\begin{table}[h!]" in styler.to_latex(position="h!") + assert "\\end{table}" in styler.to_latex(position="h!") + styler.set_table_styles([{"selector": "position", "props": ":b!"}]) + assert "\\begin{table}[b!]" in styler.to_latex() + assert "\\end{table}" in styler.to_latex() + + +@pytest.mark.parametrize("env", [None, "longtable"]) +def test_label(styler, env): + assert "\n\\label{text}" in styler.to_latex(label="text", environment=env) + styler.set_table_styles([{"selector": "label", "props": ":{more §text}"}]) + assert "\n\\label{more :text}" in styler.to_latex(environment=env) + + +def test_position_float_raises(styler): + msg = "`position_float` should be one of 'raggedright', 'raggedleft', 'centering'," + with pytest.raises(ValueError, match=msg): + styler.to_latex(position_float="bad_string") + + msg = "`position_float` cannot be used in 'longtable' `environment`" + with pytest.raises(ValueError, match=msg): + styler.to_latex(position_float="centering", environment="longtable") + + +@pytest.mark.parametrize("label", [(None, ""), ("text", "\\label{text}")]) +@pytest.mark.parametrize("position", [(None, ""), ("h!", "{table}[h!]")]) +@pytest.mark.parametrize("caption", [(None, ""), ("text", "\\caption{text}")]) +@pytest.mark.parametrize("column_format", [(None, ""), ("rcrl", "{tabular}{rcrl}")]) +@pytest.mark.parametrize("position_float", [(None, ""), ("centering", "\\centering")]) +def test_kwargs_combinations( + styler, label, position, caption, column_format, position_float +): + result = styler.to_latex( + label=label[0], + position=position[0], + caption=caption[0], + column_format=column_format[0], + position_float=position_float[0], + ) + assert label[1] in result + assert position[1] in result + assert caption[1] in result + assert column_format[1] in result + assert position_float[1] in result + + +def test_custom_table_styles(styler): + styler.set_table_styles( + [ + {"selector": "mycommand", "props": ":{myoptions}"}, + {"selector": "mycommand2", "props": ":{myoptions2}"}, + ] + ) + expected = dedent( + """\ + \\begin{table} + \\mycommand{myoptions} + \\mycommand2{myoptions2} + """ + ) + assert expected in styler.to_latex() + + +def test_cell_styling(styler): + styler.highlight_max(props="itshape:;Huge:--wrap;") + expected = dedent( + """\ + \\begin{tabular}{lrrl} + & A & B & C \\\\ + 0 & 0 & \\itshape {\\Huge -0.61} & ab \\\\ + 1 & \\itshape {\\Huge 1} & -1.22 & \\itshape {\\Huge cd} \\\\ + \\end{tabular} + """ + ) + assert expected == styler.to_latex() + + +def test_multiindex_columns(df): + cidx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")]) + df.columns = cidx + expected = dedent( + """\ + \\begin{tabular}{lrrl} + & \\multicolumn{2}{r}{A} & B \\\\ + & a & b & c \\\\ + 0 & 0 & -0.61 & ab \\\\ + 1 & 1 & -1.22 & cd \\\\ + \\end{tabular} + """ + ) + s = df.style.format(precision=2) + assert expected == s.to_latex() + + # non-sparse + expected = dedent( + """\ + \\begin{tabular}{lrrl} + & A & A & B \\\\ + & a & b & c \\\\ + 0 & 0 & -0.61 & ab \\\\ + 1 & 1 & -1.22 & cd \\\\ + \\end{tabular} + """ + ) + s = df.style.format(precision=2) + assert expected == s.to_latex(sparse_columns=False) + + +def test_multiindex_row(df_ext): + ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")]) + df_ext.index = ridx + expected = dedent( + """\ + \\begin{tabular}{llrrl} + & & A & B & C \\\\ + \\multirow[c]{2}{*}{A} & a & 0 & -0.61 & ab \\\\ + & b & 1 & -1.22 & cd \\\\ + B & c & 2 & -2.22 & de \\\\ + \\end{tabular} + """ + ) + styler = df_ext.style.format(precision=2) + result = styler.to_latex() + assert expected == result + + # non-sparse + expected = dedent( + """\ + \\begin{tabular}{llrrl} + & & A & B & C \\\\ + A & a & 0 & -0.61 & ab \\\\ + A & b & 1 & -1.22 & cd \\\\ + B & c & 2 & -2.22 & de \\\\ + \\end{tabular} + """ + ) + result = styler.to_latex(sparse_index=False) + assert expected == result + + +def test_multirow_naive(df_ext): + ridx = MultiIndex.from_tuples([("X", "x"), ("X", "y"), ("Y", "z")]) + df_ext.index = ridx + expected = dedent( + """\ + \\begin{tabular}{llrrl} + & & A & B & C \\\\ + X & x & 0 & -0.61 & ab \\\\ + & y & 1 & -1.22 & cd \\\\ + Y & z & 2 & -2.22 & de \\\\ + \\end{tabular} + """ + ) + styler = df_ext.style.format(precision=2) + result = styler.to_latex(multirow_align="naive") + assert expected == result + + +def test_multiindex_row_and_col(df_ext): + cidx = MultiIndex.from_tuples([("Z", "a"), ("Z", "b"), ("Y", "c")]) + ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")]) + df_ext.index, df_ext.columns = ridx, cidx + expected = dedent( + """\ + \\begin{tabular}{llrrl} + & & \\multicolumn{2}{l}{Z} & Y \\\\ + & & a & b & c \\\\ + \\multirow[b]{2}{*}{A} & a & 0 & -0.61 & ab \\\\ + & b & 1 & -1.22 & cd \\\\ + B & c & 2 & -2.22 & de \\\\ + \\end{tabular} + """ + ) + styler = df_ext.style.format(precision=2) + result = styler.to_latex(multirow_align="b", multicol_align="l") + assert result == expected + + # non-sparse + expected = dedent( + """\ + \\begin{tabular}{llrrl} + & & Z & Z & Y \\\\ + & & a & b & c \\\\ + A & a & 0 & -0.61 & ab \\\\ + A & b & 1 & -1.22 & cd \\\\ + B & c & 2 & -2.22 & de \\\\ + \\end{tabular} + """ + ) + result = styler.to_latex(sparse_index=False, sparse_columns=False) + assert result == expected + + +@pytest.mark.parametrize( + "multicol_align, siunitx, header", + [ + ("naive-l", False, " & A & &"), + ("naive-r", False, " & & & A"), + ("naive-l", True, "{} & {A} & {} & {}"), + ("naive-r", True, "{} & {} & {} & {A}"), + ], +) +def test_multicol_naive(df, multicol_align, siunitx, header): + ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("A", "c")]) + df.columns = ridx + level1 = " & a & b & c" if not siunitx else "{} & {a} & {b} & {c}" + col_format = "lrrl" if not siunitx else "lSSl" + expected = dedent( + f"""\ + \\begin{{tabular}}{{{col_format}}} + {header} \\\\ + {level1} \\\\ + 0 & 0 & -0.61 & ab \\\\ + 1 & 1 & -1.22 & cd \\\\ + \\end{{tabular}} + """ + ) + styler = df.style.format(precision=2) + result = styler.to_latex(multicol_align=multicol_align, siunitx=siunitx) + assert expected == result + + +def test_multi_options(df_ext): + cidx = MultiIndex.from_tuples([("Z", "a"), ("Z", "b"), ("Y", "c")]) + ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")]) + df_ext.index, df_ext.columns = ridx, cidx + styler = df_ext.style.format(precision=2) + + expected = dedent( + """\ + & & \\multicolumn{2}{r}{Z} & Y \\\\ + & & a & b & c \\\\ + \\multirow[c]{2}{*}{A} & a & 0 & -0.61 & ab \\\\ + """ + ) + result = styler.to_latex() + assert expected in result + + with option_context("styler.latex.multicol_align", "l"): + assert " & & \\multicolumn{2}{l}{Z} & Y \\\\" in styler.to_latex() + + with option_context("styler.latex.multirow_align", "b"): + assert "\\multirow[b]{2}{*}{A} & a & 0 & -0.61 & ab \\\\" in styler.to_latex() + + +def test_multiindex_columns_hidden(): + df = DataFrame([[1, 2, 3, 4]]) + df.columns = MultiIndex.from_tuples([("A", 1), ("A", 2), ("A", 3), ("B", 1)]) + s = df.style + assert "{tabular}{lrrrr}" in s.to_latex() + s.set_table_styles([]) # reset the position command + s.hide([("A", 2)], axis="columns") + assert "{tabular}{lrrr}" in s.to_latex() + + +@pytest.mark.parametrize( + "option, value", + [ + ("styler.sparse.index", True), + ("styler.sparse.index", False), + ("styler.sparse.columns", True), + ("styler.sparse.columns", False), + ], +) +def test_sparse_options(df_ext, option, value): + cidx = MultiIndex.from_tuples([("Z", "a"), ("Z", "b"), ("Y", "c")]) + ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")]) + df_ext.index, df_ext.columns = ridx, cidx + styler = df_ext.style + + latex1 = styler.to_latex() + with option_context(option, value): + latex2 = styler.to_latex() + assert (latex1 == latex2) is value + + +def test_hidden_index(styler): + styler.hide(axis="index") + expected = dedent( + """\ + \\begin{tabular}{rrl} + A & B & C \\\\ + 0 & -0.61 & ab \\\\ + 1 & -1.22 & cd \\\\ + \\end{tabular} + """ + ) + assert styler.to_latex() == expected + + +@pytest.mark.parametrize("environment", ["table", "figure*", None]) +def test_comprehensive(df_ext, environment): + # test as many low level features simultaneously as possible + cidx = MultiIndex.from_tuples([("Z", "a"), ("Z", "b"), ("Y", "c")]) + ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")]) + df_ext.index, df_ext.columns = ridx, cidx + stlr = df_ext.style + stlr.set_caption("mycap") + stlr.set_table_styles( + [ + {"selector": "label", "props": ":{fig§item}"}, + {"selector": "position", "props": ":h!"}, + {"selector": "position_float", "props": ":centering"}, + {"selector": "column_format", "props": ":rlrlr"}, + {"selector": "toprule", "props": ":toprule"}, + {"selector": "midrule", "props": ":midrule"}, + {"selector": "bottomrule", "props": ":bottomrule"}, + {"selector": "rowcolors", "props": ":{3}{pink}{}"}, # custom command + ] + ) + stlr.highlight_max(axis=0, props="textbf:--rwrap;cellcolor:[rgb]{1,1,0.6}--rwrap") + stlr.highlight_max(axis=None, props="Huge:--wrap;", subset=[("Z", "a"), ("Z", "b")]) + + expected = ( + """\ +\\begin{table}[h!] +\\centering +\\caption{mycap} +\\label{fig:item} +\\rowcolors{3}{pink}{} +\\begin{tabular}{rlrlr} +\\toprule + & & \\multicolumn{2}{r}{Z} & Y \\\\ + & & a & b & c \\\\ +\\midrule +\\multirow[c]{2}{*}{A} & a & 0 & \\textbf{\\cellcolor[rgb]{1,1,0.6}{-0.61}} & ab \\\\ + & b & 1 & -1.22 & cd \\\\ +B & c & \\textbf{\\cellcolor[rgb]{1,1,0.6}{{\\Huge 2}}} & -2.22 & """ + """\ +\\textbf{\\cellcolor[rgb]{1,1,0.6}{de}} \\\\ +\\bottomrule +\\end{tabular} +\\end{table} +""" + ).replace("table", environment if environment else "table") + result = stlr.format(precision=2).to_latex(environment=environment) + assert result == expected + + +def test_environment_option(styler): + with option_context("styler.latex.environment", "bar-env"): + assert "\\begin{bar-env}" in styler.to_latex() + assert "\\begin{foo-env}" in styler.to_latex(environment="foo-env") + + +def test_parse_latex_table_styles(styler): + styler.set_table_styles( + [ + {"selector": "foo", "props": [("attr", "value")]}, + {"selector": "bar", "props": [("attr", "overwritten")]}, + {"selector": "bar", "props": [("attr", "baz"), ("attr2", "ignored")]}, + {"selector": "label", "props": [("", "{fig§item}")]}, + ] + ) + assert _parse_latex_table_styles(styler.table_styles, "bar") == "baz" + + # test '§' replaced by ':' [for CSS compatibility] + assert _parse_latex_table_styles(styler.table_styles, "label") == "{fig:item}" + + +def test_parse_latex_cell_styles_basic(): # test nesting + cell_style = [("itshape", "--rwrap"), ("cellcolor", "[rgb]{0,1,1}--rwrap")] + expected = "\\itshape{\\cellcolor[rgb]{0,1,1}{text}}" + assert _parse_latex_cell_styles(cell_style, "text") == expected + + +@pytest.mark.parametrize( + "wrap_arg, expected", + [ # test wrapping + ("", "\\ "), + ("--wrap", "{\\ }"), + ("--nowrap", "\\ "), + ("--lwrap", "{\\} "), + ("--dwrap", "{\\}{}"), + ("--rwrap", "\\{}"), + ], +) +def test_parse_latex_cell_styles_braces(wrap_arg, expected): + cell_style = [("", f"{wrap_arg}")] + assert _parse_latex_cell_styles(cell_style, "") == expected + + +def test_parse_latex_header_span(): + cell = {"attributes": 'colspan="3"', "display_value": "text", "cellstyle": []} + expected = "\\multicolumn{3}{Y}{text}" + assert _parse_latex_header_span(cell, "X", "Y") == expected + + cell = {"attributes": 'rowspan="5"', "display_value": "text", "cellstyle": []} + expected = "\\multirow[X]{5}{*}{text}" + assert _parse_latex_header_span(cell, "X", "Y") == expected + + cell = {"display_value": "text", "cellstyle": []} + assert _parse_latex_header_span(cell, "X", "Y") == "text" + + cell = {"display_value": "text", "cellstyle": [("bfseries", "--rwrap")]} + assert _parse_latex_header_span(cell, "X", "Y") == "\\bfseries{text}" + + +def test_parse_latex_table_wrapping(styler): + styler.set_table_styles( + [ + {"selector": "toprule", "props": ":value"}, + {"selector": "bottomrule", "props": ":value"}, + {"selector": "midrule", "props": ":value"}, + {"selector": "column_format", "props": ":value"}, + ] + ) + assert _parse_latex_table_wrapping(styler.table_styles, styler.caption) is False + assert _parse_latex_table_wrapping(styler.table_styles, "some caption") is True + styler.set_table_styles( + [ + {"selector": "not-ignored", "props": ":value"}, + ], + overwrite=False, + ) + assert _parse_latex_table_wrapping(styler.table_styles, None) is True + + +def test_short_caption(styler): + result = styler.to_latex(caption=("full cap", "short cap")) + assert "\\caption[short cap]{full cap}" in result + + +@pytest.mark.parametrize( + "css, expected", + [ + ([("color", "red")], [("color", "{red}")]), # test color and input format types + ( + [("color", "rgb(128, 128, 128 )")], + [("color", "[rgb]{0.502, 0.502, 0.502}")], + ), + ( + [("color", "rgb(128, 50%, 25% )")], + [("color", "[rgb]{0.502, 0.500, 0.250}")], + ), + ( + [("color", "rgba(128,128,128,1)")], + [("color", "[rgb]{0.502, 0.502, 0.502}")], + ), + ([("color", "#FF00FF")], [("color", "[HTML]{FF00FF}")]), + ([("color", "#F0F")], [("color", "[HTML]{FF00FF}")]), + ([("font-weight", "bold")], [("bfseries", "")]), # test font-weight and types + ([("font-weight", "bolder")], [("bfseries", "")]), + ([("font-weight", "normal")], []), + ([("background-color", "red")], [("cellcolor", "{red}--lwrap")]), + ( + [("background-color", "#FF00FF")], # test background-color command and wrap + [("cellcolor", "[HTML]{FF00FF}--lwrap")], + ), + ([("font-style", "italic")], [("itshape", "")]), # test font-style and types + ([("font-style", "oblique")], [("slshape", "")]), + ([("font-style", "normal")], []), + ([("color", "red /*--dwrap*/")], [("color", "{red}--dwrap")]), # css comments + ([("background-color", "red /* --dwrap */")], [("cellcolor", "{red}--dwrap")]), + ], +) +def test_parse_latex_css_conversion(css, expected): + result = _parse_latex_css_conversion(css) + assert result == expected + + +@pytest.mark.parametrize( + "env, inner_env", + [ + (None, "tabular"), + ("table", "tabular"), + ("longtable", "longtable"), + ], +) +@pytest.mark.parametrize( + "convert, exp", [(True, "bfseries"), (False, "font-weightbold")] +) +def test_parse_latex_css_convert_minimal(styler, env, inner_env, convert, exp): + # parameters ensure longtable template is also tested + styler.highlight_max(props="font-weight:bold;") + result = styler.to_latex(convert_css=convert, environment=env) + expected = dedent( + f"""\ + 0 & 0 & \\{exp} -0.61 & ab \\\\ + 1 & \\{exp} 1 & -1.22 & \\{exp} cd \\\\ + \\end{{{inner_env}}} + """ + ) + assert expected in result + + +def test_parse_latex_css_conversion_option(): + css = [("command", "option--latex--wrap")] + expected = [("command", "option--wrap")] + result = _parse_latex_css_conversion(css) + assert result == expected + + +def test_styler_object_after_render(styler): + # GH 42320 + pre_render = styler._copy(deepcopy=True) + styler.to_latex( + column_format="rllr", + position="h", + position_float="centering", + hrules=True, + label="my lab", + caption="my cap", + ) + + assert pre_render.table_styles == styler.table_styles + assert pre_render.caption == styler.caption + + +def test_longtable_comprehensive(styler): + result = styler.to_latex( + environment="longtable", hrules=True, label="fig:A", caption=("full", "short") + ) + expected = dedent( + """\ + \\begin{longtable}{lrrl} + \\caption[short]{full} \\label{fig:A} \\\\ + \\toprule + & A & B & C \\\\ + \\midrule + \\endfirsthead + \\caption[]{full} \\\\ + \\toprule + & A & B & C \\\\ + \\midrule + \\endhead + \\midrule + \\multicolumn{4}{r}{Continued on next page} \\\\ + \\midrule + \\endfoot + \\bottomrule + \\endlastfoot + 0 & 0 & -0.61 & ab \\\\ + 1 & 1 & -1.22 & cd \\\\ + \\end{longtable} + """ + ) + assert result == expected + + +def test_longtable_minimal(styler): + result = styler.to_latex(environment="longtable") + expected = dedent( + """\ + \\begin{longtable}{lrrl} + & A & B & C \\\\ + \\endfirsthead + & A & B & C \\\\ + \\endhead + \\multicolumn{4}{r}{Continued on next page} \\\\ + \\endfoot + \\endlastfoot + 0 & 0 & -0.61 & ab \\\\ + 1 & 1 & -1.22 & cd \\\\ + \\end{longtable} + """ + ) + assert result == expected + + +@pytest.mark.parametrize( + "sparse, exp, siunitx", + [ + (True, "{} & \\multicolumn{2}{r}{A} & {B}", True), + (False, "{} & {A} & {A} & {B}", True), + (True, " & \\multicolumn{2}{r}{A} & B", False), + (False, " & A & A & B", False), + ], +) +def test_longtable_multiindex_columns(df, sparse, exp, siunitx): + cidx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")]) + df.columns = cidx + with_si = "{} & {a} & {b} & {c} \\\\" + without_si = " & a & b & c \\\\" + expected = dedent( + f"""\ + \\begin{{longtable}}{{l{"SS" if siunitx else "rr"}l}} + {exp} \\\\ + {with_si if siunitx else without_si} + \\endfirsthead + {exp} \\\\ + {with_si if siunitx else without_si} + \\endhead + """ + ) + result = df.style.to_latex( + environment="longtable", sparse_columns=sparse, siunitx=siunitx + ) + assert expected in result + + +@pytest.mark.parametrize( + "caption, cap_exp", + [ + ("full", ("{full}", "")), + (("full", "short"), ("{full}", "[short]")), + ], +) +@pytest.mark.parametrize("label, lab_exp", [(None, ""), ("tab:A", " \\label{tab:A}")]) +def test_longtable_caption_label(styler, caption, cap_exp, label, lab_exp): + cap_exp1 = f"\\caption{cap_exp[1]}{cap_exp[0]}" + cap_exp2 = f"\\caption[]{cap_exp[0]}" + + expected = dedent( + f"""\ + {cap_exp1}{lab_exp} \\\\ + & A & B & C \\\\ + \\endfirsthead + {cap_exp2} \\\\ + """ + ) + assert expected in styler.to_latex( + environment="longtable", caption=caption, label=label + ) + + +@pytest.mark.parametrize("index", [True, False]) +@pytest.mark.parametrize( + "columns, siunitx", + [ + (True, True), + (True, False), + (False, False), + ], +) +def test_apply_map_header_render_mi(df_ext, index, columns, siunitx): + cidx = MultiIndex.from_tuples([("Z", "a"), ("Z", "b"), ("Y", "c")]) + ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")]) + df_ext.index, df_ext.columns = ridx, cidx + styler = df_ext.style + + func = lambda v: "bfseries: --rwrap" if "A" in v or "Z" in v or "c" in v else None + + if index: + styler.map_index(func, axis="index") + if columns: + styler.map_index(func, axis="columns") + + result = styler.to_latex(siunitx=siunitx) + + expected_index = dedent( + """\ + \\multirow[c]{2}{*}{\\bfseries{A}} & a & 0 & -0.610000 & ab \\\\ + \\bfseries{} & b & 1 & -1.220000 & cd \\\\ + B & \\bfseries{c} & 2 & -2.220000 & de \\\\ + """ + ) + assert (expected_index in result) is index + + exp_cols_si = dedent( + """\ + {} & {} & \\multicolumn{2}{r}{\\bfseries{Z}} & {Y} \\\\ + {} & {} & {a} & {b} & {\\bfseries{c}} \\\\ + """ + ) + exp_cols_no_si = """\ + & & \\multicolumn{2}{r}{\\bfseries{Z}} & Y \\\\ + & & a & b & \\bfseries{c} \\\\ +""" + assert ((exp_cols_si if siunitx else exp_cols_no_si) in result) is columns + + +def test_repr_option(styler): + assert "