Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/__pycache__/__init__.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/__pycache__/_typing.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/__pycache__/_version.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/__pycache__/_version_meson.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/__pycache__/conftest.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/__pycache__/testing.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/_testing/__init__.py +639 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/_testing/_hypothesis.py +93 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/_testing/_io.py +170 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/_testing/_warnings.py +232 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/_testing/asserters.py +1435 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/_testing/compat.py +29 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/_testing/contexts.py +257 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/arrays/__init__.py +53 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/errors/__init__.py +850 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/io/feather_format.py +143 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/io/gbq.py +255 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/io/html.py +1259 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/io/pickle.py +210 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/io/pytables.py +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/io/sql.py +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/__init__.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_aggregation.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_common.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_downstream.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_errors.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_expressions.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_flags.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_multilevel.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_nanops.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_optional_dependency.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_register_accessor.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_sorting.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_take.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/arrays/__init__.py +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/arrays/masked_shared.py +154 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/arrays/test_array.py +478 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/arrays/test_datetimelike.py +1344 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/arrays/test_datetimes.py +840 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/arrays/test_ndarray_backed.py +75 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/arrays/test_period.py +184 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/arrays/test_timedeltas.py +313 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/base/__init__.py +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/base/common.py +9 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/base/test_constructors.py +179 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/base/test_conversion.py +562 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/base/test_fillna.py +60 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/base/test_misc.py +191 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/base/test_transpose.py +56 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/base/test_unique.py +124 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (7.71 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/__pycache__/_typing.cpython-312.pyc
ADDED
|
Binary file (14.7 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/__pycache__/_version.cpython-312.pyc
ADDED
|
Binary file (22 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/__pycache__/_version_meson.cpython-312.pyc
ADDED
|
Binary file (312 Bytes). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/__pycache__/conftest.cpython-312.pyc
ADDED
|
Binary file (67.4 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/__pycache__/testing.cpython-312.pyc
ADDED
|
Binary file (478 Bytes). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/_testing/__init__.py
ADDED
|
@@ -0,0 +1,639 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from decimal import Decimal
|
| 4 |
+
import operator
|
| 5 |
+
import os
|
| 6 |
+
from sys import byteorder
|
| 7 |
+
from typing import (
|
| 8 |
+
TYPE_CHECKING,
|
| 9 |
+
Callable,
|
| 10 |
+
ContextManager,
|
| 11 |
+
cast,
|
| 12 |
+
)
|
| 13 |
+
import warnings
|
| 14 |
+
|
| 15 |
+
import numpy as np
|
| 16 |
+
|
| 17 |
+
from pandas._config.localization import (
|
| 18 |
+
can_set_locale,
|
| 19 |
+
get_locales,
|
| 20 |
+
set_locale,
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
from pandas.compat import pa_version_under10p1
|
| 24 |
+
|
| 25 |
+
from pandas.core.dtypes.common import is_string_dtype
|
| 26 |
+
|
| 27 |
+
import pandas as pd
|
| 28 |
+
from pandas import (
|
| 29 |
+
ArrowDtype,
|
| 30 |
+
DataFrame,
|
| 31 |
+
Index,
|
| 32 |
+
MultiIndex,
|
| 33 |
+
RangeIndex,
|
| 34 |
+
Series,
|
| 35 |
+
)
|
| 36 |
+
from pandas._testing._io import (
|
| 37 |
+
round_trip_localpath,
|
| 38 |
+
round_trip_pathlib,
|
| 39 |
+
round_trip_pickle,
|
| 40 |
+
write_to_compressed,
|
| 41 |
+
)
|
| 42 |
+
from pandas._testing._warnings import (
|
| 43 |
+
assert_produces_warning,
|
| 44 |
+
maybe_produces_warning,
|
| 45 |
+
)
|
| 46 |
+
from pandas._testing.asserters import (
|
| 47 |
+
assert_almost_equal,
|
| 48 |
+
assert_attr_equal,
|
| 49 |
+
assert_categorical_equal,
|
| 50 |
+
assert_class_equal,
|
| 51 |
+
assert_contains_all,
|
| 52 |
+
assert_copy,
|
| 53 |
+
assert_datetime_array_equal,
|
| 54 |
+
assert_dict_equal,
|
| 55 |
+
assert_equal,
|
| 56 |
+
assert_extension_array_equal,
|
| 57 |
+
assert_frame_equal,
|
| 58 |
+
assert_index_equal,
|
| 59 |
+
assert_indexing_slices_equivalent,
|
| 60 |
+
assert_interval_array_equal,
|
| 61 |
+
assert_is_sorted,
|
| 62 |
+
assert_is_valid_plot_return_object,
|
| 63 |
+
assert_metadata_equivalent,
|
| 64 |
+
assert_numpy_array_equal,
|
| 65 |
+
assert_period_array_equal,
|
| 66 |
+
assert_series_equal,
|
| 67 |
+
assert_sp_array_equal,
|
| 68 |
+
assert_timedelta_array_equal,
|
| 69 |
+
raise_assert_detail,
|
| 70 |
+
)
|
| 71 |
+
from pandas._testing.compat import (
|
| 72 |
+
get_dtype,
|
| 73 |
+
get_obj,
|
| 74 |
+
)
|
| 75 |
+
from pandas._testing.contexts import (
|
| 76 |
+
assert_cow_warning,
|
| 77 |
+
decompress_file,
|
| 78 |
+
ensure_clean,
|
| 79 |
+
raises_chained_assignment_error,
|
| 80 |
+
set_timezone,
|
| 81 |
+
use_numexpr,
|
| 82 |
+
with_csv_dialect,
|
| 83 |
+
)
|
| 84 |
+
from pandas.core.arrays import (
|
| 85 |
+
BaseMaskedArray,
|
| 86 |
+
ExtensionArray,
|
| 87 |
+
NumpyExtensionArray,
|
| 88 |
+
)
|
| 89 |
+
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
|
| 90 |
+
from pandas.core.construction import extract_array
|
| 91 |
+
|
| 92 |
+
if TYPE_CHECKING:
|
| 93 |
+
from pandas._typing import (
|
| 94 |
+
Dtype,
|
| 95 |
+
NpDtype,
|
| 96 |
+
)
|
| 97 |
+
|
| 98 |
+
from pandas.core.arrays import ArrowExtensionArray
|
| 99 |
+
|
| 100 |
+
UNSIGNED_INT_NUMPY_DTYPES: list[NpDtype] = ["uint8", "uint16", "uint32", "uint64"]
|
| 101 |
+
UNSIGNED_INT_EA_DTYPES: list[Dtype] = ["UInt8", "UInt16", "UInt32", "UInt64"]
|
| 102 |
+
SIGNED_INT_NUMPY_DTYPES: list[NpDtype] = [int, "int8", "int16", "int32", "int64"]
|
| 103 |
+
SIGNED_INT_EA_DTYPES: list[Dtype] = ["Int8", "Int16", "Int32", "Int64"]
|
| 104 |
+
ALL_INT_NUMPY_DTYPES = UNSIGNED_INT_NUMPY_DTYPES + SIGNED_INT_NUMPY_DTYPES
|
| 105 |
+
ALL_INT_EA_DTYPES = UNSIGNED_INT_EA_DTYPES + SIGNED_INT_EA_DTYPES
|
| 106 |
+
ALL_INT_DTYPES: list[Dtype] = [*ALL_INT_NUMPY_DTYPES, *ALL_INT_EA_DTYPES]
|
| 107 |
+
|
| 108 |
+
FLOAT_NUMPY_DTYPES: list[NpDtype] = [float, "float32", "float64"]
|
| 109 |
+
FLOAT_EA_DTYPES: list[Dtype] = ["Float32", "Float64"]
|
| 110 |
+
ALL_FLOAT_DTYPES: list[Dtype] = [*FLOAT_NUMPY_DTYPES, *FLOAT_EA_DTYPES]
|
| 111 |
+
|
| 112 |
+
COMPLEX_DTYPES: list[Dtype] = [complex, "complex64", "complex128"]
|
| 113 |
+
STRING_DTYPES: list[Dtype] = [str, "str", "U"]
|
| 114 |
+
COMPLEX_FLOAT_DTYPES: list[Dtype] = [*COMPLEX_DTYPES, *FLOAT_NUMPY_DTYPES]
|
| 115 |
+
|
| 116 |
+
DATETIME64_DTYPES: list[Dtype] = ["datetime64[ns]", "M8[ns]"]
|
| 117 |
+
TIMEDELTA64_DTYPES: list[Dtype] = ["timedelta64[ns]", "m8[ns]"]
|
| 118 |
+
|
| 119 |
+
BOOL_DTYPES: list[Dtype] = [bool, "bool"]
|
| 120 |
+
BYTES_DTYPES: list[Dtype] = [bytes, "bytes"]
|
| 121 |
+
OBJECT_DTYPES: list[Dtype] = [object, "object"]
|
| 122 |
+
|
| 123 |
+
ALL_REAL_NUMPY_DTYPES = FLOAT_NUMPY_DTYPES + ALL_INT_NUMPY_DTYPES
|
| 124 |
+
ALL_REAL_EXTENSION_DTYPES = FLOAT_EA_DTYPES + ALL_INT_EA_DTYPES
|
| 125 |
+
ALL_REAL_DTYPES: list[Dtype] = [*ALL_REAL_NUMPY_DTYPES, *ALL_REAL_EXTENSION_DTYPES]
|
| 126 |
+
ALL_NUMERIC_DTYPES: list[Dtype] = [*ALL_REAL_DTYPES, *COMPLEX_DTYPES]
|
| 127 |
+
|
| 128 |
+
ALL_NUMPY_DTYPES = (
|
| 129 |
+
ALL_REAL_NUMPY_DTYPES
|
| 130 |
+
+ COMPLEX_DTYPES
|
| 131 |
+
+ STRING_DTYPES
|
| 132 |
+
+ DATETIME64_DTYPES
|
| 133 |
+
+ TIMEDELTA64_DTYPES
|
| 134 |
+
+ BOOL_DTYPES
|
| 135 |
+
+ OBJECT_DTYPES
|
| 136 |
+
+ BYTES_DTYPES
|
| 137 |
+
)
|
| 138 |
+
|
| 139 |
+
NARROW_NP_DTYPES = [
|
| 140 |
+
np.float16,
|
| 141 |
+
np.float32,
|
| 142 |
+
np.int8,
|
| 143 |
+
np.int16,
|
| 144 |
+
np.int32,
|
| 145 |
+
np.uint8,
|
| 146 |
+
np.uint16,
|
| 147 |
+
np.uint32,
|
| 148 |
+
]
|
| 149 |
+
|
| 150 |
+
PYTHON_DATA_TYPES = [
|
| 151 |
+
str,
|
| 152 |
+
int,
|
| 153 |
+
float,
|
| 154 |
+
complex,
|
| 155 |
+
list,
|
| 156 |
+
tuple,
|
| 157 |
+
range,
|
| 158 |
+
dict,
|
| 159 |
+
set,
|
| 160 |
+
frozenset,
|
| 161 |
+
bool,
|
| 162 |
+
bytes,
|
| 163 |
+
bytearray,
|
| 164 |
+
memoryview,
|
| 165 |
+
]
|
| 166 |
+
|
| 167 |
+
ENDIAN = {"little": "<", "big": ">"}[byteorder]
|
| 168 |
+
|
| 169 |
+
NULL_OBJECTS = [None, np.nan, pd.NaT, float("nan"), pd.NA, Decimal("NaN")]
|
| 170 |
+
NP_NAT_OBJECTS = [
|
| 171 |
+
cls("NaT", unit)
|
| 172 |
+
for cls in [np.datetime64, np.timedelta64]
|
| 173 |
+
for unit in [
|
| 174 |
+
"Y",
|
| 175 |
+
"M",
|
| 176 |
+
"W",
|
| 177 |
+
"D",
|
| 178 |
+
"h",
|
| 179 |
+
"m",
|
| 180 |
+
"s",
|
| 181 |
+
"ms",
|
| 182 |
+
"us",
|
| 183 |
+
"ns",
|
| 184 |
+
"ps",
|
| 185 |
+
"fs",
|
| 186 |
+
"as",
|
| 187 |
+
]
|
| 188 |
+
]
|
| 189 |
+
|
| 190 |
+
if not pa_version_under10p1:
|
| 191 |
+
import pyarrow as pa
|
| 192 |
+
|
| 193 |
+
UNSIGNED_INT_PYARROW_DTYPES = [pa.uint8(), pa.uint16(), pa.uint32(), pa.uint64()]
|
| 194 |
+
SIGNED_INT_PYARROW_DTYPES = [pa.int8(), pa.int16(), pa.int32(), pa.int64()]
|
| 195 |
+
ALL_INT_PYARROW_DTYPES = UNSIGNED_INT_PYARROW_DTYPES + SIGNED_INT_PYARROW_DTYPES
|
| 196 |
+
ALL_INT_PYARROW_DTYPES_STR_REPR = [
|
| 197 |
+
str(ArrowDtype(typ)) for typ in ALL_INT_PYARROW_DTYPES
|
| 198 |
+
]
|
| 199 |
+
|
| 200 |
+
# pa.float16 doesn't seem supported
|
| 201 |
+
# https://github.com/apache/arrow/blob/master/python/pyarrow/src/arrow/python/helpers.cc#L86
|
| 202 |
+
FLOAT_PYARROW_DTYPES = [pa.float32(), pa.float64()]
|
| 203 |
+
FLOAT_PYARROW_DTYPES_STR_REPR = [
|
| 204 |
+
str(ArrowDtype(typ)) for typ in FLOAT_PYARROW_DTYPES
|
| 205 |
+
]
|
| 206 |
+
DECIMAL_PYARROW_DTYPES = [pa.decimal128(7, 3)]
|
| 207 |
+
STRING_PYARROW_DTYPES = [pa.string()]
|
| 208 |
+
BINARY_PYARROW_DTYPES = [pa.binary()]
|
| 209 |
+
|
| 210 |
+
TIME_PYARROW_DTYPES = [
|
| 211 |
+
pa.time32("s"),
|
| 212 |
+
pa.time32("ms"),
|
| 213 |
+
pa.time64("us"),
|
| 214 |
+
pa.time64("ns"),
|
| 215 |
+
]
|
| 216 |
+
DATE_PYARROW_DTYPES = [pa.date32(), pa.date64()]
|
| 217 |
+
DATETIME_PYARROW_DTYPES = [
|
| 218 |
+
pa.timestamp(unit=unit, tz=tz)
|
| 219 |
+
for unit in ["s", "ms", "us", "ns"]
|
| 220 |
+
for tz in [None, "UTC", "US/Pacific", "US/Eastern"]
|
| 221 |
+
]
|
| 222 |
+
TIMEDELTA_PYARROW_DTYPES = [pa.duration(unit) for unit in ["s", "ms", "us", "ns"]]
|
| 223 |
+
|
| 224 |
+
BOOL_PYARROW_DTYPES = [pa.bool_()]
|
| 225 |
+
|
| 226 |
+
# TODO: Add container like pyarrow types:
|
| 227 |
+
# https://arrow.apache.org/docs/python/api/datatypes.html#factory-functions
|
| 228 |
+
ALL_PYARROW_DTYPES = (
|
| 229 |
+
ALL_INT_PYARROW_DTYPES
|
| 230 |
+
+ FLOAT_PYARROW_DTYPES
|
| 231 |
+
+ DECIMAL_PYARROW_DTYPES
|
| 232 |
+
+ STRING_PYARROW_DTYPES
|
| 233 |
+
+ BINARY_PYARROW_DTYPES
|
| 234 |
+
+ TIME_PYARROW_DTYPES
|
| 235 |
+
+ DATE_PYARROW_DTYPES
|
| 236 |
+
+ DATETIME_PYARROW_DTYPES
|
| 237 |
+
+ TIMEDELTA_PYARROW_DTYPES
|
| 238 |
+
+ BOOL_PYARROW_DTYPES
|
| 239 |
+
)
|
| 240 |
+
ALL_REAL_PYARROW_DTYPES_STR_REPR = (
|
| 241 |
+
ALL_INT_PYARROW_DTYPES_STR_REPR + FLOAT_PYARROW_DTYPES_STR_REPR
|
| 242 |
+
)
|
| 243 |
+
else:
|
| 244 |
+
FLOAT_PYARROW_DTYPES_STR_REPR = []
|
| 245 |
+
ALL_INT_PYARROW_DTYPES_STR_REPR = []
|
| 246 |
+
ALL_PYARROW_DTYPES = []
|
| 247 |
+
ALL_REAL_PYARROW_DTYPES_STR_REPR = []
|
| 248 |
+
|
| 249 |
+
ALL_REAL_NULLABLE_DTYPES = (
|
| 250 |
+
FLOAT_NUMPY_DTYPES + ALL_REAL_EXTENSION_DTYPES + ALL_REAL_PYARROW_DTYPES_STR_REPR
|
| 251 |
+
)
|
| 252 |
+
|
| 253 |
+
arithmetic_dunder_methods = [
|
| 254 |
+
"__add__",
|
| 255 |
+
"__radd__",
|
| 256 |
+
"__sub__",
|
| 257 |
+
"__rsub__",
|
| 258 |
+
"__mul__",
|
| 259 |
+
"__rmul__",
|
| 260 |
+
"__floordiv__",
|
| 261 |
+
"__rfloordiv__",
|
| 262 |
+
"__truediv__",
|
| 263 |
+
"__rtruediv__",
|
| 264 |
+
"__pow__",
|
| 265 |
+
"__rpow__",
|
| 266 |
+
"__mod__",
|
| 267 |
+
"__rmod__",
|
| 268 |
+
]
|
| 269 |
+
|
| 270 |
+
comparison_dunder_methods = ["__eq__", "__ne__", "__le__", "__lt__", "__ge__", "__gt__"]
|
| 271 |
+
|
| 272 |
+
|
| 273 |
+
# -----------------------------------------------------------------------------
|
| 274 |
+
# Comparators
|
| 275 |
+
|
| 276 |
+
|
| 277 |
+
def box_expected(expected, box_cls, transpose: bool = True):
|
| 278 |
+
"""
|
| 279 |
+
Helper function to wrap the expected output of a test in a given box_class.
|
| 280 |
+
|
| 281 |
+
Parameters
|
| 282 |
+
----------
|
| 283 |
+
expected : np.ndarray, Index, Series
|
| 284 |
+
box_cls : {Index, Series, DataFrame}
|
| 285 |
+
|
| 286 |
+
Returns
|
| 287 |
+
-------
|
| 288 |
+
subclass of box_cls
|
| 289 |
+
"""
|
| 290 |
+
if box_cls is pd.array:
|
| 291 |
+
if isinstance(expected, RangeIndex):
|
| 292 |
+
# pd.array would return an IntegerArray
|
| 293 |
+
expected = NumpyExtensionArray(np.asarray(expected._values))
|
| 294 |
+
else:
|
| 295 |
+
expected = pd.array(expected, copy=False)
|
| 296 |
+
elif box_cls is Index:
|
| 297 |
+
with warnings.catch_warnings():
|
| 298 |
+
warnings.filterwarnings("ignore", "Dtype inference", category=FutureWarning)
|
| 299 |
+
expected = Index(expected)
|
| 300 |
+
elif box_cls is Series:
|
| 301 |
+
with warnings.catch_warnings():
|
| 302 |
+
warnings.filterwarnings("ignore", "Dtype inference", category=FutureWarning)
|
| 303 |
+
expected = Series(expected)
|
| 304 |
+
elif box_cls is DataFrame:
|
| 305 |
+
with warnings.catch_warnings():
|
| 306 |
+
warnings.filterwarnings("ignore", "Dtype inference", category=FutureWarning)
|
| 307 |
+
expected = Series(expected).to_frame()
|
| 308 |
+
if transpose:
|
| 309 |
+
# for vector operations, we need a DataFrame to be a single-row,
|
| 310 |
+
# not a single-column, in order to operate against non-DataFrame
|
| 311 |
+
# vectors of the same length. But convert to two rows to avoid
|
| 312 |
+
# single-row special cases in datetime arithmetic
|
| 313 |
+
expected = expected.T
|
| 314 |
+
expected = pd.concat([expected] * 2, ignore_index=True)
|
| 315 |
+
elif box_cls is np.ndarray or box_cls is np.array:
|
| 316 |
+
expected = np.array(expected)
|
| 317 |
+
elif box_cls is to_array:
|
| 318 |
+
expected = to_array(expected)
|
| 319 |
+
else:
|
| 320 |
+
raise NotImplementedError(box_cls)
|
| 321 |
+
return expected
|
| 322 |
+
|
| 323 |
+
|
| 324 |
+
def to_array(obj):
|
| 325 |
+
"""
|
| 326 |
+
Similar to pd.array, but does not cast numpy dtypes to nullable dtypes.
|
| 327 |
+
"""
|
| 328 |
+
# temporary implementation until we get pd.array in place
|
| 329 |
+
dtype = getattr(obj, "dtype", None)
|
| 330 |
+
|
| 331 |
+
if dtype is None:
|
| 332 |
+
return np.asarray(obj)
|
| 333 |
+
|
| 334 |
+
return extract_array(obj, extract_numpy=True)
|
| 335 |
+
|
| 336 |
+
|
| 337 |
+
class SubclassedSeries(Series):
|
| 338 |
+
_metadata = ["testattr", "name"]
|
| 339 |
+
|
| 340 |
+
@property
|
| 341 |
+
def _constructor(self):
|
| 342 |
+
# For testing, those properties return a generic callable, and not
|
| 343 |
+
# the actual class. In this case that is equivalent, but it is to
|
| 344 |
+
# ensure we don't rely on the property returning a class
|
| 345 |
+
# See https://github.com/pandas-dev/pandas/pull/46018 and
|
| 346 |
+
# https://github.com/pandas-dev/pandas/issues/32638 and linked issues
|
| 347 |
+
return lambda *args, **kwargs: SubclassedSeries(*args, **kwargs)
|
| 348 |
+
|
| 349 |
+
@property
|
| 350 |
+
def _constructor_expanddim(self):
|
| 351 |
+
return lambda *args, **kwargs: SubclassedDataFrame(*args, **kwargs)
|
| 352 |
+
|
| 353 |
+
|
| 354 |
+
class SubclassedDataFrame(DataFrame):
|
| 355 |
+
_metadata = ["testattr"]
|
| 356 |
+
|
| 357 |
+
@property
|
| 358 |
+
def _constructor(self):
|
| 359 |
+
return lambda *args, **kwargs: SubclassedDataFrame(*args, **kwargs)
|
| 360 |
+
|
| 361 |
+
@property
|
| 362 |
+
def _constructor_sliced(self):
|
| 363 |
+
return lambda *args, **kwargs: SubclassedSeries(*args, **kwargs)
|
| 364 |
+
|
| 365 |
+
|
| 366 |
+
def convert_rows_list_to_csv_str(rows_list: list[str]) -> str:
|
| 367 |
+
"""
|
| 368 |
+
Convert list of CSV rows to single CSV-formatted string for current OS.
|
| 369 |
+
|
| 370 |
+
This method is used for creating expected value of to_csv() method.
|
| 371 |
+
|
| 372 |
+
Parameters
|
| 373 |
+
----------
|
| 374 |
+
rows_list : List[str]
|
| 375 |
+
Each element represents the row of csv.
|
| 376 |
+
|
| 377 |
+
Returns
|
| 378 |
+
-------
|
| 379 |
+
str
|
| 380 |
+
Expected output of to_csv() in current OS.
|
| 381 |
+
"""
|
| 382 |
+
sep = os.linesep
|
| 383 |
+
return sep.join(rows_list) + sep
|
| 384 |
+
|
| 385 |
+
|
| 386 |
+
def external_error_raised(expected_exception: type[Exception]) -> ContextManager:
|
| 387 |
+
"""
|
| 388 |
+
Helper function to mark pytest.raises that have an external error message.
|
| 389 |
+
|
| 390 |
+
Parameters
|
| 391 |
+
----------
|
| 392 |
+
expected_exception : Exception
|
| 393 |
+
Expected error to raise.
|
| 394 |
+
|
| 395 |
+
Returns
|
| 396 |
+
-------
|
| 397 |
+
Callable
|
| 398 |
+
Regular `pytest.raises` function with `match` equal to `None`.
|
| 399 |
+
"""
|
| 400 |
+
import pytest
|
| 401 |
+
|
| 402 |
+
return pytest.raises(expected_exception, match=None)
|
| 403 |
+
|
| 404 |
+
|
| 405 |
+
cython_table = pd.core.common._cython_table.items()
|
| 406 |
+
|
| 407 |
+
|
| 408 |
+
def get_cython_table_params(ndframe, func_names_and_expected):
|
| 409 |
+
"""
|
| 410 |
+
Combine frame, functions from com._cython_table
|
| 411 |
+
keys and expected result.
|
| 412 |
+
|
| 413 |
+
Parameters
|
| 414 |
+
----------
|
| 415 |
+
ndframe : DataFrame or Series
|
| 416 |
+
func_names_and_expected : Sequence of two items
|
| 417 |
+
The first item is a name of a NDFrame method ('sum', 'prod') etc.
|
| 418 |
+
The second item is the expected return value.
|
| 419 |
+
|
| 420 |
+
Returns
|
| 421 |
+
-------
|
| 422 |
+
list
|
| 423 |
+
List of three items (DataFrame, function, expected result)
|
| 424 |
+
"""
|
| 425 |
+
results = []
|
| 426 |
+
for func_name, expected in func_names_and_expected:
|
| 427 |
+
results.append((ndframe, func_name, expected))
|
| 428 |
+
results += [
|
| 429 |
+
(ndframe, func, expected)
|
| 430 |
+
for func, name in cython_table
|
| 431 |
+
if name == func_name
|
| 432 |
+
]
|
| 433 |
+
return results
|
| 434 |
+
|
| 435 |
+
|
| 436 |
+
def get_op_from_name(op_name: str) -> Callable:
|
| 437 |
+
"""
|
| 438 |
+
The operator function for a given op name.
|
| 439 |
+
|
| 440 |
+
Parameters
|
| 441 |
+
----------
|
| 442 |
+
op_name : str
|
| 443 |
+
The op name, in form of "add" or "__add__".
|
| 444 |
+
|
| 445 |
+
Returns
|
| 446 |
+
-------
|
| 447 |
+
function
|
| 448 |
+
A function performing the operation.
|
| 449 |
+
"""
|
| 450 |
+
short_opname = op_name.strip("_")
|
| 451 |
+
try:
|
| 452 |
+
op = getattr(operator, short_opname)
|
| 453 |
+
except AttributeError:
|
| 454 |
+
# Assume it is the reverse operator
|
| 455 |
+
rop = getattr(operator, short_opname[1:])
|
| 456 |
+
op = lambda x, y: rop(y, x)
|
| 457 |
+
|
| 458 |
+
return op
|
| 459 |
+
|
| 460 |
+
|
| 461 |
+
# -----------------------------------------------------------------------------
|
| 462 |
+
# Indexing test helpers
|
| 463 |
+
|
| 464 |
+
|
| 465 |
+
def getitem(x):
|
| 466 |
+
return x
|
| 467 |
+
|
| 468 |
+
|
| 469 |
+
def setitem(x):
|
| 470 |
+
return x
|
| 471 |
+
|
| 472 |
+
|
| 473 |
+
def loc(x):
|
| 474 |
+
return x.loc
|
| 475 |
+
|
| 476 |
+
|
| 477 |
+
def iloc(x):
|
| 478 |
+
return x.iloc
|
| 479 |
+
|
| 480 |
+
|
| 481 |
+
def at(x):
|
| 482 |
+
return x.at
|
| 483 |
+
|
| 484 |
+
|
| 485 |
+
def iat(x):
|
| 486 |
+
return x.iat
|
| 487 |
+
|
| 488 |
+
|
| 489 |
+
# -----------------------------------------------------------------------------
|
| 490 |
+
|
| 491 |
+
_UNITS = ["s", "ms", "us", "ns"]
|
| 492 |
+
|
| 493 |
+
|
| 494 |
+
def get_finest_unit(left: str, right: str):
|
| 495 |
+
"""
|
| 496 |
+
Find the higher of two datetime64 units.
|
| 497 |
+
"""
|
| 498 |
+
if _UNITS.index(left) >= _UNITS.index(right):
|
| 499 |
+
return left
|
| 500 |
+
return right
|
| 501 |
+
|
| 502 |
+
|
| 503 |
+
def shares_memory(left, right) -> bool:
|
| 504 |
+
"""
|
| 505 |
+
Pandas-compat for np.shares_memory.
|
| 506 |
+
"""
|
| 507 |
+
if isinstance(left, np.ndarray) and isinstance(right, np.ndarray):
|
| 508 |
+
return np.shares_memory(left, right)
|
| 509 |
+
elif isinstance(left, np.ndarray):
|
| 510 |
+
# Call with reversed args to get to unpacking logic below.
|
| 511 |
+
return shares_memory(right, left)
|
| 512 |
+
|
| 513 |
+
if isinstance(left, RangeIndex):
|
| 514 |
+
return False
|
| 515 |
+
if isinstance(left, MultiIndex):
|
| 516 |
+
return shares_memory(left._codes, right)
|
| 517 |
+
if isinstance(left, (Index, Series)):
|
| 518 |
+
return shares_memory(left._values, right)
|
| 519 |
+
|
| 520 |
+
if isinstance(left, NDArrayBackedExtensionArray):
|
| 521 |
+
return shares_memory(left._ndarray, right)
|
| 522 |
+
if isinstance(left, pd.core.arrays.SparseArray):
|
| 523 |
+
return shares_memory(left.sp_values, right)
|
| 524 |
+
if isinstance(left, pd.core.arrays.IntervalArray):
|
| 525 |
+
return shares_memory(left._left, right) or shares_memory(left._right, right)
|
| 526 |
+
|
| 527 |
+
if (
|
| 528 |
+
isinstance(left, ExtensionArray)
|
| 529 |
+
and is_string_dtype(left.dtype)
|
| 530 |
+
and left.dtype.storage in ("pyarrow", "pyarrow_numpy") # type: ignore[attr-defined]
|
| 531 |
+
):
|
| 532 |
+
# https://github.com/pandas-dev/pandas/pull/43930#discussion_r736862669
|
| 533 |
+
left = cast("ArrowExtensionArray", left)
|
| 534 |
+
if (
|
| 535 |
+
isinstance(right, ExtensionArray)
|
| 536 |
+
and is_string_dtype(right.dtype)
|
| 537 |
+
and right.dtype.storage in ("pyarrow", "pyarrow_numpy") # type: ignore[attr-defined]
|
| 538 |
+
):
|
| 539 |
+
right = cast("ArrowExtensionArray", right)
|
| 540 |
+
left_pa_data = left._pa_array
|
| 541 |
+
right_pa_data = right._pa_array
|
| 542 |
+
left_buf1 = left_pa_data.chunk(0).buffers()[1]
|
| 543 |
+
right_buf1 = right_pa_data.chunk(0).buffers()[1]
|
| 544 |
+
return left_buf1 == right_buf1
|
| 545 |
+
|
| 546 |
+
if isinstance(left, BaseMaskedArray) and isinstance(right, BaseMaskedArray):
|
| 547 |
+
# By convention, we'll say these share memory if they share *either*
|
| 548 |
+
# the _data or the _mask
|
| 549 |
+
return np.shares_memory(left._data, right._data) or np.shares_memory(
|
| 550 |
+
left._mask, right._mask
|
| 551 |
+
)
|
| 552 |
+
|
| 553 |
+
if isinstance(left, DataFrame) and len(left._mgr.arrays) == 1:
|
| 554 |
+
arr = left._mgr.arrays[0]
|
| 555 |
+
return shares_memory(arr, right)
|
| 556 |
+
|
| 557 |
+
raise NotImplementedError(type(left), type(right))
|
| 558 |
+
|
| 559 |
+
|
| 560 |
+
__all__ = [
|
| 561 |
+
"ALL_INT_EA_DTYPES",
|
| 562 |
+
"ALL_INT_NUMPY_DTYPES",
|
| 563 |
+
"ALL_NUMPY_DTYPES",
|
| 564 |
+
"ALL_REAL_NUMPY_DTYPES",
|
| 565 |
+
"assert_almost_equal",
|
| 566 |
+
"assert_attr_equal",
|
| 567 |
+
"assert_categorical_equal",
|
| 568 |
+
"assert_class_equal",
|
| 569 |
+
"assert_contains_all",
|
| 570 |
+
"assert_copy",
|
| 571 |
+
"assert_datetime_array_equal",
|
| 572 |
+
"assert_dict_equal",
|
| 573 |
+
"assert_equal",
|
| 574 |
+
"assert_extension_array_equal",
|
| 575 |
+
"assert_frame_equal",
|
| 576 |
+
"assert_index_equal",
|
| 577 |
+
"assert_indexing_slices_equivalent",
|
| 578 |
+
"assert_interval_array_equal",
|
| 579 |
+
"assert_is_sorted",
|
| 580 |
+
"assert_is_valid_plot_return_object",
|
| 581 |
+
"assert_metadata_equivalent",
|
| 582 |
+
"assert_numpy_array_equal",
|
| 583 |
+
"assert_period_array_equal",
|
| 584 |
+
"assert_produces_warning",
|
| 585 |
+
"assert_series_equal",
|
| 586 |
+
"assert_sp_array_equal",
|
| 587 |
+
"assert_timedelta_array_equal",
|
| 588 |
+
"assert_cow_warning",
|
| 589 |
+
"at",
|
| 590 |
+
"BOOL_DTYPES",
|
| 591 |
+
"box_expected",
|
| 592 |
+
"BYTES_DTYPES",
|
| 593 |
+
"can_set_locale",
|
| 594 |
+
"COMPLEX_DTYPES",
|
| 595 |
+
"convert_rows_list_to_csv_str",
|
| 596 |
+
"DATETIME64_DTYPES",
|
| 597 |
+
"decompress_file",
|
| 598 |
+
"ENDIAN",
|
| 599 |
+
"ensure_clean",
|
| 600 |
+
"external_error_raised",
|
| 601 |
+
"FLOAT_EA_DTYPES",
|
| 602 |
+
"FLOAT_NUMPY_DTYPES",
|
| 603 |
+
"get_cython_table_params",
|
| 604 |
+
"get_dtype",
|
| 605 |
+
"getitem",
|
| 606 |
+
"get_locales",
|
| 607 |
+
"get_finest_unit",
|
| 608 |
+
"get_obj",
|
| 609 |
+
"get_op_from_name",
|
| 610 |
+
"iat",
|
| 611 |
+
"iloc",
|
| 612 |
+
"loc",
|
| 613 |
+
"maybe_produces_warning",
|
| 614 |
+
"NARROW_NP_DTYPES",
|
| 615 |
+
"NP_NAT_OBJECTS",
|
| 616 |
+
"NULL_OBJECTS",
|
| 617 |
+
"OBJECT_DTYPES",
|
| 618 |
+
"raise_assert_detail",
|
| 619 |
+
"raises_chained_assignment_error",
|
| 620 |
+
"round_trip_localpath",
|
| 621 |
+
"round_trip_pathlib",
|
| 622 |
+
"round_trip_pickle",
|
| 623 |
+
"setitem",
|
| 624 |
+
"set_locale",
|
| 625 |
+
"set_timezone",
|
| 626 |
+
"shares_memory",
|
| 627 |
+
"SIGNED_INT_EA_DTYPES",
|
| 628 |
+
"SIGNED_INT_NUMPY_DTYPES",
|
| 629 |
+
"STRING_DTYPES",
|
| 630 |
+
"SubclassedDataFrame",
|
| 631 |
+
"SubclassedSeries",
|
| 632 |
+
"TIMEDELTA64_DTYPES",
|
| 633 |
+
"to_array",
|
| 634 |
+
"UNSIGNED_INT_EA_DTYPES",
|
| 635 |
+
"UNSIGNED_INT_NUMPY_DTYPES",
|
| 636 |
+
"use_numexpr",
|
| 637 |
+
"with_csv_dialect",
|
| 638 |
+
"write_to_compressed",
|
| 639 |
+
]
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/_testing/_hypothesis.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Hypothesis data generator helpers.
|
| 3 |
+
"""
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
|
| 6 |
+
from hypothesis import strategies as st
|
| 7 |
+
from hypothesis.extra.dateutil import timezones as dateutil_timezones
|
| 8 |
+
from hypothesis.extra.pytz import timezones as pytz_timezones
|
| 9 |
+
|
| 10 |
+
from pandas.compat import is_platform_windows
|
| 11 |
+
|
| 12 |
+
import pandas as pd
|
| 13 |
+
|
| 14 |
+
from pandas.tseries.offsets import (
|
| 15 |
+
BMonthBegin,
|
| 16 |
+
BMonthEnd,
|
| 17 |
+
BQuarterBegin,
|
| 18 |
+
BQuarterEnd,
|
| 19 |
+
BYearBegin,
|
| 20 |
+
BYearEnd,
|
| 21 |
+
MonthBegin,
|
| 22 |
+
MonthEnd,
|
| 23 |
+
QuarterBegin,
|
| 24 |
+
QuarterEnd,
|
| 25 |
+
YearBegin,
|
| 26 |
+
YearEnd,
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
OPTIONAL_INTS = st.lists(st.one_of(st.integers(), st.none()), max_size=10, min_size=3)
|
| 30 |
+
|
| 31 |
+
OPTIONAL_FLOATS = st.lists(st.one_of(st.floats(), st.none()), max_size=10, min_size=3)
|
| 32 |
+
|
| 33 |
+
OPTIONAL_TEXT = st.lists(st.one_of(st.none(), st.text()), max_size=10, min_size=3)
|
| 34 |
+
|
| 35 |
+
OPTIONAL_DICTS = st.lists(
|
| 36 |
+
st.one_of(st.none(), st.dictionaries(st.text(), st.integers())),
|
| 37 |
+
max_size=10,
|
| 38 |
+
min_size=3,
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
OPTIONAL_LISTS = st.lists(
|
| 42 |
+
st.one_of(st.none(), st.lists(st.text(), max_size=10, min_size=3)),
|
| 43 |
+
max_size=10,
|
| 44 |
+
min_size=3,
|
| 45 |
+
)
|
| 46 |
+
|
| 47 |
+
OPTIONAL_ONE_OF_ALL = st.one_of(
|
| 48 |
+
OPTIONAL_DICTS, OPTIONAL_FLOATS, OPTIONAL_INTS, OPTIONAL_LISTS, OPTIONAL_TEXT
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
if is_platform_windows():
|
| 52 |
+
DATETIME_NO_TZ = st.datetimes(min_value=datetime(1900, 1, 1))
|
| 53 |
+
else:
|
| 54 |
+
DATETIME_NO_TZ = st.datetimes()
|
| 55 |
+
|
| 56 |
+
DATETIME_JAN_1_1900_OPTIONAL_TZ = st.datetimes(
|
| 57 |
+
min_value=pd.Timestamp(
|
| 58 |
+
1900, 1, 1
|
| 59 |
+
).to_pydatetime(), # pyright: ignore[reportGeneralTypeIssues]
|
| 60 |
+
max_value=pd.Timestamp(
|
| 61 |
+
1900, 1, 1
|
| 62 |
+
).to_pydatetime(), # pyright: ignore[reportGeneralTypeIssues]
|
| 63 |
+
timezones=st.one_of(st.none(), dateutil_timezones(), pytz_timezones()),
|
| 64 |
+
)
|
| 65 |
+
|
| 66 |
+
DATETIME_IN_PD_TIMESTAMP_RANGE_NO_TZ = st.datetimes(
|
| 67 |
+
min_value=pd.Timestamp.min.to_pydatetime(warn=False),
|
| 68 |
+
max_value=pd.Timestamp.max.to_pydatetime(warn=False),
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
INT_NEG_999_TO_POS_999 = st.integers(-999, 999)
|
| 72 |
+
|
| 73 |
+
# The strategy for each type is registered in conftest.py, as they don't carry
|
| 74 |
+
# enough runtime information (e.g. type hints) to infer how to build them.
|
| 75 |
+
YQM_OFFSET = st.one_of(
|
| 76 |
+
*map(
|
| 77 |
+
st.from_type,
|
| 78 |
+
[
|
| 79 |
+
MonthBegin,
|
| 80 |
+
MonthEnd,
|
| 81 |
+
BMonthBegin,
|
| 82 |
+
BMonthEnd,
|
| 83 |
+
QuarterBegin,
|
| 84 |
+
QuarterEnd,
|
| 85 |
+
BQuarterBegin,
|
| 86 |
+
BQuarterEnd,
|
| 87 |
+
YearBegin,
|
| 88 |
+
YearEnd,
|
| 89 |
+
BYearBegin,
|
| 90 |
+
BYearEnd,
|
| 91 |
+
],
|
| 92 |
+
)
|
| 93 |
+
)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/_testing/_io.py
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import gzip
|
| 4 |
+
import io
|
| 5 |
+
import pathlib
|
| 6 |
+
import tarfile
|
| 7 |
+
from typing import (
|
| 8 |
+
TYPE_CHECKING,
|
| 9 |
+
Any,
|
| 10 |
+
Callable,
|
| 11 |
+
)
|
| 12 |
+
import uuid
|
| 13 |
+
import zipfile
|
| 14 |
+
|
| 15 |
+
from pandas.compat import (
|
| 16 |
+
get_bz2_file,
|
| 17 |
+
get_lzma_file,
|
| 18 |
+
)
|
| 19 |
+
from pandas.compat._optional import import_optional_dependency
|
| 20 |
+
|
| 21 |
+
import pandas as pd
|
| 22 |
+
from pandas._testing.contexts import ensure_clean
|
| 23 |
+
|
| 24 |
+
if TYPE_CHECKING:
|
| 25 |
+
from pandas._typing import (
|
| 26 |
+
FilePath,
|
| 27 |
+
ReadPickleBuffer,
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
from pandas import (
|
| 31 |
+
DataFrame,
|
| 32 |
+
Series,
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
# ------------------------------------------------------------------
|
| 36 |
+
# File-IO
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def round_trip_pickle(
|
| 40 |
+
obj: Any, path: FilePath | ReadPickleBuffer | None = None
|
| 41 |
+
) -> DataFrame | Series:
|
| 42 |
+
"""
|
| 43 |
+
Pickle an object and then read it again.
|
| 44 |
+
|
| 45 |
+
Parameters
|
| 46 |
+
----------
|
| 47 |
+
obj : any object
|
| 48 |
+
The object to pickle and then re-read.
|
| 49 |
+
path : str, path object or file-like object, default None
|
| 50 |
+
The path where the pickled object is written and then read.
|
| 51 |
+
|
| 52 |
+
Returns
|
| 53 |
+
-------
|
| 54 |
+
pandas object
|
| 55 |
+
The original object that was pickled and then re-read.
|
| 56 |
+
"""
|
| 57 |
+
_path = path
|
| 58 |
+
if _path is None:
|
| 59 |
+
_path = f"__{uuid.uuid4()}__.pickle"
|
| 60 |
+
with ensure_clean(_path) as temp_path:
|
| 61 |
+
pd.to_pickle(obj, temp_path)
|
| 62 |
+
return pd.read_pickle(temp_path)
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def round_trip_pathlib(writer, reader, path: str | None = None):
|
| 66 |
+
"""
|
| 67 |
+
Write an object to file specified by a pathlib.Path and read it back
|
| 68 |
+
|
| 69 |
+
Parameters
|
| 70 |
+
----------
|
| 71 |
+
writer : callable bound to pandas object
|
| 72 |
+
IO writing function (e.g. DataFrame.to_csv )
|
| 73 |
+
reader : callable
|
| 74 |
+
IO reading function (e.g. pd.read_csv )
|
| 75 |
+
path : str, default None
|
| 76 |
+
The path where the object is written and then read.
|
| 77 |
+
|
| 78 |
+
Returns
|
| 79 |
+
-------
|
| 80 |
+
pandas object
|
| 81 |
+
The original object that was serialized and then re-read.
|
| 82 |
+
"""
|
| 83 |
+
Path = pathlib.Path
|
| 84 |
+
if path is None:
|
| 85 |
+
path = "___pathlib___"
|
| 86 |
+
with ensure_clean(path) as path:
|
| 87 |
+
writer(Path(path)) # type: ignore[arg-type]
|
| 88 |
+
obj = reader(Path(path)) # type: ignore[arg-type]
|
| 89 |
+
return obj
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def round_trip_localpath(writer, reader, path: str | None = None):
|
| 93 |
+
"""
|
| 94 |
+
Write an object to file specified by a py.path LocalPath and read it back.
|
| 95 |
+
|
| 96 |
+
Parameters
|
| 97 |
+
----------
|
| 98 |
+
writer : callable bound to pandas object
|
| 99 |
+
IO writing function (e.g. DataFrame.to_csv )
|
| 100 |
+
reader : callable
|
| 101 |
+
IO reading function (e.g. pd.read_csv )
|
| 102 |
+
path : str, default None
|
| 103 |
+
The path where the object is written and then read.
|
| 104 |
+
|
| 105 |
+
Returns
|
| 106 |
+
-------
|
| 107 |
+
pandas object
|
| 108 |
+
The original object that was serialized and then re-read.
|
| 109 |
+
"""
|
| 110 |
+
import pytest
|
| 111 |
+
|
| 112 |
+
LocalPath = pytest.importorskip("py.path").local
|
| 113 |
+
if path is None:
|
| 114 |
+
path = "___localpath___"
|
| 115 |
+
with ensure_clean(path) as path:
|
| 116 |
+
writer(LocalPath(path))
|
| 117 |
+
obj = reader(LocalPath(path))
|
| 118 |
+
return obj
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
def write_to_compressed(compression, path, data, dest: str = "test") -> None:
|
| 122 |
+
"""
|
| 123 |
+
Write data to a compressed file.
|
| 124 |
+
|
| 125 |
+
Parameters
|
| 126 |
+
----------
|
| 127 |
+
compression : {'gzip', 'bz2', 'zip', 'xz', 'zstd'}
|
| 128 |
+
The compression type to use.
|
| 129 |
+
path : str
|
| 130 |
+
The file path to write the data.
|
| 131 |
+
data : str
|
| 132 |
+
The data to write.
|
| 133 |
+
dest : str, default "test"
|
| 134 |
+
The destination file (for ZIP only)
|
| 135 |
+
|
| 136 |
+
Raises
|
| 137 |
+
------
|
| 138 |
+
ValueError : An invalid compression value was passed in.
|
| 139 |
+
"""
|
| 140 |
+
args: tuple[Any, ...] = (data,)
|
| 141 |
+
mode = "wb"
|
| 142 |
+
method = "write"
|
| 143 |
+
compress_method: Callable
|
| 144 |
+
|
| 145 |
+
if compression == "zip":
|
| 146 |
+
compress_method = zipfile.ZipFile
|
| 147 |
+
mode = "w"
|
| 148 |
+
args = (dest, data)
|
| 149 |
+
method = "writestr"
|
| 150 |
+
elif compression == "tar":
|
| 151 |
+
compress_method = tarfile.TarFile
|
| 152 |
+
mode = "w"
|
| 153 |
+
file = tarfile.TarInfo(name=dest)
|
| 154 |
+
bytes = io.BytesIO(data)
|
| 155 |
+
file.size = len(data)
|
| 156 |
+
args = (file, bytes)
|
| 157 |
+
method = "addfile"
|
| 158 |
+
elif compression == "gzip":
|
| 159 |
+
compress_method = gzip.GzipFile
|
| 160 |
+
elif compression == "bz2":
|
| 161 |
+
compress_method = get_bz2_file()
|
| 162 |
+
elif compression == "zstd":
|
| 163 |
+
compress_method = import_optional_dependency("zstandard").open
|
| 164 |
+
elif compression == "xz":
|
| 165 |
+
compress_method = get_lzma_file()
|
| 166 |
+
else:
|
| 167 |
+
raise ValueError(f"Unrecognized compression type: {compression}")
|
| 168 |
+
|
| 169 |
+
with compress_method(path, mode=mode) as f:
|
| 170 |
+
getattr(f, method)(*args)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/_testing/_warnings.py
ADDED
|
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from contextlib import (
|
| 4 |
+
contextmanager,
|
| 5 |
+
nullcontext,
|
| 6 |
+
)
|
| 7 |
+
import inspect
|
| 8 |
+
import re
|
| 9 |
+
import sys
|
| 10 |
+
from typing import (
|
| 11 |
+
TYPE_CHECKING,
|
| 12 |
+
Literal,
|
| 13 |
+
cast,
|
| 14 |
+
)
|
| 15 |
+
import warnings
|
| 16 |
+
|
| 17 |
+
from pandas.compat import PY311
|
| 18 |
+
|
| 19 |
+
if TYPE_CHECKING:
|
| 20 |
+
from collections.abc import (
|
| 21 |
+
Generator,
|
| 22 |
+
Sequence,
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
@contextmanager
|
| 27 |
+
def assert_produces_warning(
|
| 28 |
+
expected_warning: type[Warning] | bool | tuple[type[Warning], ...] | None = Warning,
|
| 29 |
+
filter_level: Literal[
|
| 30 |
+
"error", "ignore", "always", "default", "module", "once"
|
| 31 |
+
] = "always",
|
| 32 |
+
check_stacklevel: bool = True,
|
| 33 |
+
raise_on_extra_warnings: bool = True,
|
| 34 |
+
match: str | None = None,
|
| 35 |
+
) -> Generator[list[warnings.WarningMessage], None, None]:
|
| 36 |
+
"""
|
| 37 |
+
Context manager for running code expected to either raise a specific warning,
|
| 38 |
+
multiple specific warnings, or not raise any warnings. Verifies that the code
|
| 39 |
+
raises the expected warning(s), and that it does not raise any other unexpected
|
| 40 |
+
warnings. It is basically a wrapper around ``warnings.catch_warnings``.
|
| 41 |
+
|
| 42 |
+
Parameters
|
| 43 |
+
----------
|
| 44 |
+
expected_warning : {Warning, False, tuple[Warning, ...], None}, default Warning
|
| 45 |
+
The type of Exception raised. ``exception.Warning`` is the base
|
| 46 |
+
class for all warnings. To raise multiple types of exceptions,
|
| 47 |
+
pass them as a tuple. To check that no warning is returned,
|
| 48 |
+
specify ``False`` or ``None``.
|
| 49 |
+
filter_level : str or None, default "always"
|
| 50 |
+
Specifies whether warnings are ignored, displayed, or turned
|
| 51 |
+
into errors.
|
| 52 |
+
Valid values are:
|
| 53 |
+
|
| 54 |
+
* "error" - turns matching warnings into exceptions
|
| 55 |
+
* "ignore" - discard the warning
|
| 56 |
+
* "always" - always emit a warning
|
| 57 |
+
* "default" - print the warning the first time it is generated
|
| 58 |
+
from each location
|
| 59 |
+
* "module" - print the warning the first time it is generated
|
| 60 |
+
from each module
|
| 61 |
+
* "once" - print the warning the first time it is generated
|
| 62 |
+
|
| 63 |
+
check_stacklevel : bool, default True
|
| 64 |
+
If True, displays the line that called the function containing
|
| 65 |
+
the warning to show were the function is called. Otherwise, the
|
| 66 |
+
line that implements the function is displayed.
|
| 67 |
+
raise_on_extra_warnings : bool, default True
|
| 68 |
+
Whether extra warnings not of the type `expected_warning` should
|
| 69 |
+
cause the test to fail.
|
| 70 |
+
match : str, optional
|
| 71 |
+
Match warning message.
|
| 72 |
+
|
| 73 |
+
Examples
|
| 74 |
+
--------
|
| 75 |
+
>>> import warnings
|
| 76 |
+
>>> with assert_produces_warning():
|
| 77 |
+
... warnings.warn(UserWarning())
|
| 78 |
+
...
|
| 79 |
+
>>> with assert_produces_warning(False):
|
| 80 |
+
... warnings.warn(RuntimeWarning())
|
| 81 |
+
...
|
| 82 |
+
Traceback (most recent call last):
|
| 83 |
+
...
|
| 84 |
+
AssertionError: Caused unexpected warning(s): ['RuntimeWarning'].
|
| 85 |
+
>>> with assert_produces_warning(UserWarning):
|
| 86 |
+
... warnings.warn(RuntimeWarning())
|
| 87 |
+
Traceback (most recent call last):
|
| 88 |
+
...
|
| 89 |
+
AssertionError: Did not see expected warning of class 'UserWarning'.
|
| 90 |
+
|
| 91 |
+
..warn:: This is *not* thread-safe.
|
| 92 |
+
"""
|
| 93 |
+
__tracebackhide__ = True
|
| 94 |
+
|
| 95 |
+
with warnings.catch_warnings(record=True) as w:
|
| 96 |
+
warnings.simplefilter(filter_level)
|
| 97 |
+
try:
|
| 98 |
+
yield w
|
| 99 |
+
finally:
|
| 100 |
+
if expected_warning:
|
| 101 |
+
expected_warning = cast(type[Warning], expected_warning)
|
| 102 |
+
_assert_caught_expected_warning(
|
| 103 |
+
caught_warnings=w,
|
| 104 |
+
expected_warning=expected_warning,
|
| 105 |
+
match=match,
|
| 106 |
+
check_stacklevel=check_stacklevel,
|
| 107 |
+
)
|
| 108 |
+
if raise_on_extra_warnings:
|
| 109 |
+
_assert_caught_no_extra_warnings(
|
| 110 |
+
caught_warnings=w,
|
| 111 |
+
expected_warning=expected_warning,
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
def maybe_produces_warning(warning: type[Warning], condition: bool, **kwargs):
|
| 116 |
+
"""
|
| 117 |
+
Return a context manager that possibly checks a warning based on the condition
|
| 118 |
+
"""
|
| 119 |
+
if condition:
|
| 120 |
+
return assert_produces_warning(warning, **kwargs)
|
| 121 |
+
else:
|
| 122 |
+
return nullcontext()
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
def _assert_caught_expected_warning(
|
| 126 |
+
*,
|
| 127 |
+
caught_warnings: Sequence[warnings.WarningMessage],
|
| 128 |
+
expected_warning: type[Warning],
|
| 129 |
+
match: str | None,
|
| 130 |
+
check_stacklevel: bool,
|
| 131 |
+
) -> None:
|
| 132 |
+
"""Assert that there was the expected warning among the caught warnings."""
|
| 133 |
+
saw_warning = False
|
| 134 |
+
matched_message = False
|
| 135 |
+
unmatched_messages = []
|
| 136 |
+
|
| 137 |
+
for actual_warning in caught_warnings:
|
| 138 |
+
if issubclass(actual_warning.category, expected_warning):
|
| 139 |
+
saw_warning = True
|
| 140 |
+
|
| 141 |
+
if check_stacklevel:
|
| 142 |
+
_assert_raised_with_correct_stacklevel(actual_warning)
|
| 143 |
+
|
| 144 |
+
if match is not None:
|
| 145 |
+
if re.search(match, str(actual_warning.message)):
|
| 146 |
+
matched_message = True
|
| 147 |
+
else:
|
| 148 |
+
unmatched_messages.append(actual_warning.message)
|
| 149 |
+
|
| 150 |
+
if not saw_warning:
|
| 151 |
+
raise AssertionError(
|
| 152 |
+
f"Did not see expected warning of class "
|
| 153 |
+
f"{repr(expected_warning.__name__)}"
|
| 154 |
+
)
|
| 155 |
+
|
| 156 |
+
if match and not matched_message:
|
| 157 |
+
raise AssertionError(
|
| 158 |
+
f"Did not see warning {repr(expected_warning.__name__)} "
|
| 159 |
+
f"matching '{match}'. The emitted warning messages are "
|
| 160 |
+
f"{unmatched_messages}"
|
| 161 |
+
)
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
def _assert_caught_no_extra_warnings(
|
| 165 |
+
*,
|
| 166 |
+
caught_warnings: Sequence[warnings.WarningMessage],
|
| 167 |
+
expected_warning: type[Warning] | bool | tuple[type[Warning], ...] | None,
|
| 168 |
+
) -> None:
|
| 169 |
+
"""Assert that no extra warnings apart from the expected ones are caught."""
|
| 170 |
+
extra_warnings = []
|
| 171 |
+
|
| 172 |
+
for actual_warning in caught_warnings:
|
| 173 |
+
if _is_unexpected_warning(actual_warning, expected_warning):
|
| 174 |
+
# GH#38630 pytest.filterwarnings does not suppress these.
|
| 175 |
+
if actual_warning.category == ResourceWarning:
|
| 176 |
+
# GH 44732: Don't make the CI flaky by filtering SSL-related
|
| 177 |
+
# ResourceWarning from dependencies
|
| 178 |
+
if "unclosed <ssl.SSLSocket" in str(actual_warning.message):
|
| 179 |
+
continue
|
| 180 |
+
# GH 44844: Matplotlib leaves font files open during the entire process
|
| 181 |
+
# upon import. Don't make CI flaky if ResourceWarning raised
|
| 182 |
+
# due to these open files.
|
| 183 |
+
if any("matplotlib" in mod for mod in sys.modules):
|
| 184 |
+
continue
|
| 185 |
+
if PY311 and actual_warning.category == EncodingWarning:
|
| 186 |
+
# EncodingWarnings are checked in the CI
|
| 187 |
+
# pyproject.toml errors on EncodingWarnings in pandas
|
| 188 |
+
# Ignore EncodingWarnings from other libraries
|
| 189 |
+
continue
|
| 190 |
+
extra_warnings.append(
|
| 191 |
+
(
|
| 192 |
+
actual_warning.category.__name__,
|
| 193 |
+
actual_warning.message,
|
| 194 |
+
actual_warning.filename,
|
| 195 |
+
actual_warning.lineno,
|
| 196 |
+
)
|
| 197 |
+
)
|
| 198 |
+
|
| 199 |
+
if extra_warnings:
|
| 200 |
+
raise AssertionError(f"Caused unexpected warning(s): {repr(extra_warnings)}")
|
| 201 |
+
|
| 202 |
+
|
| 203 |
+
def _is_unexpected_warning(
|
| 204 |
+
actual_warning: warnings.WarningMessage,
|
| 205 |
+
expected_warning: type[Warning] | bool | tuple[type[Warning], ...] | None,
|
| 206 |
+
) -> bool:
|
| 207 |
+
"""Check if the actual warning issued is unexpected."""
|
| 208 |
+
if actual_warning and not expected_warning:
|
| 209 |
+
return True
|
| 210 |
+
expected_warning = cast(type[Warning], expected_warning)
|
| 211 |
+
return bool(not issubclass(actual_warning.category, expected_warning))
|
| 212 |
+
|
| 213 |
+
|
| 214 |
+
def _assert_raised_with_correct_stacklevel(
|
| 215 |
+
actual_warning: warnings.WarningMessage,
|
| 216 |
+
) -> None:
|
| 217 |
+
# https://stackoverflow.com/questions/17407119/python-inspect-stack-is-slow
|
| 218 |
+
frame = inspect.currentframe()
|
| 219 |
+
for _ in range(4):
|
| 220 |
+
frame = frame.f_back # type: ignore[union-attr]
|
| 221 |
+
try:
|
| 222 |
+
caller_filename = inspect.getfile(frame) # type: ignore[arg-type]
|
| 223 |
+
finally:
|
| 224 |
+
# See note in
|
| 225 |
+
# https://docs.python.org/3/library/inspect.html#inspect.Traceback
|
| 226 |
+
del frame
|
| 227 |
+
msg = (
|
| 228 |
+
"Warning not set with correct stacklevel. "
|
| 229 |
+
f"File where warning is raised: {actual_warning.filename} != "
|
| 230 |
+
f"{caller_filename}. Warning message: {actual_warning.message}"
|
| 231 |
+
)
|
| 232 |
+
assert actual_warning.filename == caller_filename, msg
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/_testing/asserters.py
ADDED
|
@@ -0,0 +1,1435 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import operator
|
| 4 |
+
from typing import (
|
| 5 |
+
TYPE_CHECKING,
|
| 6 |
+
Literal,
|
| 7 |
+
NoReturn,
|
| 8 |
+
cast,
|
| 9 |
+
)
|
| 10 |
+
|
| 11 |
+
import numpy as np
|
| 12 |
+
|
| 13 |
+
from pandas._libs import lib
|
| 14 |
+
from pandas._libs.missing import is_matching_na
|
| 15 |
+
from pandas._libs.sparse import SparseIndex
|
| 16 |
+
import pandas._libs.testing as _testing
|
| 17 |
+
from pandas._libs.tslibs.np_datetime import compare_mismatched_resolutions
|
| 18 |
+
|
| 19 |
+
from pandas.core.dtypes.common import (
|
| 20 |
+
is_bool,
|
| 21 |
+
is_float_dtype,
|
| 22 |
+
is_integer_dtype,
|
| 23 |
+
is_number,
|
| 24 |
+
is_numeric_dtype,
|
| 25 |
+
needs_i8_conversion,
|
| 26 |
+
)
|
| 27 |
+
from pandas.core.dtypes.dtypes import (
|
| 28 |
+
CategoricalDtype,
|
| 29 |
+
DatetimeTZDtype,
|
| 30 |
+
ExtensionDtype,
|
| 31 |
+
NumpyEADtype,
|
| 32 |
+
)
|
| 33 |
+
from pandas.core.dtypes.missing import array_equivalent
|
| 34 |
+
|
| 35 |
+
import pandas as pd
|
| 36 |
+
from pandas import (
|
| 37 |
+
Categorical,
|
| 38 |
+
DataFrame,
|
| 39 |
+
DatetimeIndex,
|
| 40 |
+
Index,
|
| 41 |
+
IntervalDtype,
|
| 42 |
+
IntervalIndex,
|
| 43 |
+
MultiIndex,
|
| 44 |
+
PeriodIndex,
|
| 45 |
+
RangeIndex,
|
| 46 |
+
Series,
|
| 47 |
+
TimedeltaIndex,
|
| 48 |
+
)
|
| 49 |
+
from pandas.core.arrays import (
|
| 50 |
+
DatetimeArray,
|
| 51 |
+
ExtensionArray,
|
| 52 |
+
IntervalArray,
|
| 53 |
+
PeriodArray,
|
| 54 |
+
TimedeltaArray,
|
| 55 |
+
)
|
| 56 |
+
from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin
|
| 57 |
+
from pandas.core.arrays.string_ import StringDtype
|
| 58 |
+
from pandas.core.indexes.api import safe_sort_index
|
| 59 |
+
|
| 60 |
+
from pandas.io.formats.printing import pprint_thing
|
| 61 |
+
|
| 62 |
+
if TYPE_CHECKING:
|
| 63 |
+
from pandas._typing import DtypeObj
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def assert_almost_equal(
|
| 67 |
+
left,
|
| 68 |
+
right,
|
| 69 |
+
check_dtype: bool | Literal["equiv"] = "equiv",
|
| 70 |
+
rtol: float = 1.0e-5,
|
| 71 |
+
atol: float = 1.0e-8,
|
| 72 |
+
**kwargs,
|
| 73 |
+
) -> None:
|
| 74 |
+
"""
|
| 75 |
+
Check that the left and right objects are approximately equal.
|
| 76 |
+
|
| 77 |
+
By approximately equal, we refer to objects that are numbers or that
|
| 78 |
+
contain numbers which may be equivalent to specific levels of precision.
|
| 79 |
+
|
| 80 |
+
Parameters
|
| 81 |
+
----------
|
| 82 |
+
left : object
|
| 83 |
+
right : object
|
| 84 |
+
check_dtype : bool or {'equiv'}, default 'equiv'
|
| 85 |
+
Check dtype if both a and b are the same type. If 'equiv' is passed in,
|
| 86 |
+
then `RangeIndex` and `Index` with int64 dtype are also considered
|
| 87 |
+
equivalent when doing type checking.
|
| 88 |
+
rtol : float, default 1e-5
|
| 89 |
+
Relative tolerance.
|
| 90 |
+
atol : float, default 1e-8
|
| 91 |
+
Absolute tolerance.
|
| 92 |
+
"""
|
| 93 |
+
if isinstance(left, Index):
|
| 94 |
+
assert_index_equal(
|
| 95 |
+
left,
|
| 96 |
+
right,
|
| 97 |
+
check_exact=False,
|
| 98 |
+
exact=check_dtype,
|
| 99 |
+
rtol=rtol,
|
| 100 |
+
atol=atol,
|
| 101 |
+
**kwargs,
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
elif isinstance(left, Series):
|
| 105 |
+
assert_series_equal(
|
| 106 |
+
left,
|
| 107 |
+
right,
|
| 108 |
+
check_exact=False,
|
| 109 |
+
check_dtype=check_dtype,
|
| 110 |
+
rtol=rtol,
|
| 111 |
+
atol=atol,
|
| 112 |
+
**kwargs,
|
| 113 |
+
)
|
| 114 |
+
|
| 115 |
+
elif isinstance(left, DataFrame):
|
| 116 |
+
assert_frame_equal(
|
| 117 |
+
left,
|
| 118 |
+
right,
|
| 119 |
+
check_exact=False,
|
| 120 |
+
check_dtype=check_dtype,
|
| 121 |
+
rtol=rtol,
|
| 122 |
+
atol=atol,
|
| 123 |
+
**kwargs,
|
| 124 |
+
)
|
| 125 |
+
|
| 126 |
+
else:
|
| 127 |
+
# Other sequences.
|
| 128 |
+
if check_dtype:
|
| 129 |
+
if is_number(left) and is_number(right):
|
| 130 |
+
# Do not compare numeric classes, like np.float64 and float.
|
| 131 |
+
pass
|
| 132 |
+
elif is_bool(left) and is_bool(right):
|
| 133 |
+
# Do not compare bool classes, like np.bool_ and bool.
|
| 134 |
+
pass
|
| 135 |
+
else:
|
| 136 |
+
if isinstance(left, np.ndarray) or isinstance(right, np.ndarray):
|
| 137 |
+
obj = "numpy array"
|
| 138 |
+
else:
|
| 139 |
+
obj = "Input"
|
| 140 |
+
assert_class_equal(left, right, obj=obj)
|
| 141 |
+
|
| 142 |
+
# if we have "equiv", this becomes True
|
| 143 |
+
_testing.assert_almost_equal(
|
| 144 |
+
left, right, check_dtype=bool(check_dtype), rtol=rtol, atol=atol, **kwargs
|
| 145 |
+
)
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
def _check_isinstance(left, right, cls) -> None:
|
| 149 |
+
"""
|
| 150 |
+
Helper method for our assert_* methods that ensures that
|
| 151 |
+
the two objects being compared have the right type before
|
| 152 |
+
proceeding with the comparison.
|
| 153 |
+
|
| 154 |
+
Parameters
|
| 155 |
+
----------
|
| 156 |
+
left : The first object being compared.
|
| 157 |
+
right : The second object being compared.
|
| 158 |
+
cls : The class type to check against.
|
| 159 |
+
|
| 160 |
+
Raises
|
| 161 |
+
------
|
| 162 |
+
AssertionError : Either `left` or `right` is not an instance of `cls`.
|
| 163 |
+
"""
|
| 164 |
+
cls_name = cls.__name__
|
| 165 |
+
|
| 166 |
+
if not isinstance(left, cls):
|
| 167 |
+
raise AssertionError(
|
| 168 |
+
f"{cls_name} Expected type {cls}, found {type(left)} instead"
|
| 169 |
+
)
|
| 170 |
+
if not isinstance(right, cls):
|
| 171 |
+
raise AssertionError(
|
| 172 |
+
f"{cls_name} Expected type {cls}, found {type(right)} instead"
|
| 173 |
+
)
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
def assert_dict_equal(left, right, compare_keys: bool = True) -> None:
|
| 177 |
+
_check_isinstance(left, right, dict)
|
| 178 |
+
_testing.assert_dict_equal(left, right, compare_keys=compare_keys)
|
| 179 |
+
|
| 180 |
+
|
| 181 |
+
def assert_index_equal(
|
| 182 |
+
left: Index,
|
| 183 |
+
right: Index,
|
| 184 |
+
exact: bool | str = "equiv",
|
| 185 |
+
check_names: bool = True,
|
| 186 |
+
check_exact: bool = True,
|
| 187 |
+
check_categorical: bool = True,
|
| 188 |
+
check_order: bool = True,
|
| 189 |
+
rtol: float = 1.0e-5,
|
| 190 |
+
atol: float = 1.0e-8,
|
| 191 |
+
obj: str = "Index",
|
| 192 |
+
) -> None:
|
| 193 |
+
"""
|
| 194 |
+
Check that left and right Index are equal.
|
| 195 |
+
|
| 196 |
+
Parameters
|
| 197 |
+
----------
|
| 198 |
+
left : Index
|
| 199 |
+
right : Index
|
| 200 |
+
exact : bool or {'equiv'}, default 'equiv'
|
| 201 |
+
Whether to check the Index class, dtype and inferred_type
|
| 202 |
+
are identical. If 'equiv', then RangeIndex can be substituted for
|
| 203 |
+
Index with an int64 dtype as well.
|
| 204 |
+
check_names : bool, default True
|
| 205 |
+
Whether to check the names attribute.
|
| 206 |
+
check_exact : bool, default True
|
| 207 |
+
Whether to compare number exactly.
|
| 208 |
+
check_categorical : bool, default True
|
| 209 |
+
Whether to compare internal Categorical exactly.
|
| 210 |
+
check_order : bool, default True
|
| 211 |
+
Whether to compare the order of index entries as well as their values.
|
| 212 |
+
If True, both indexes must contain the same elements, in the same order.
|
| 213 |
+
If False, both indexes must contain the same elements, but in any order.
|
| 214 |
+
rtol : float, default 1e-5
|
| 215 |
+
Relative tolerance. Only used when check_exact is False.
|
| 216 |
+
atol : float, default 1e-8
|
| 217 |
+
Absolute tolerance. Only used when check_exact is False.
|
| 218 |
+
obj : str, default 'Index'
|
| 219 |
+
Specify object name being compared, internally used to show appropriate
|
| 220 |
+
assertion message.
|
| 221 |
+
|
| 222 |
+
Examples
|
| 223 |
+
--------
|
| 224 |
+
>>> from pandas import testing as tm
|
| 225 |
+
>>> a = pd.Index([1, 2, 3])
|
| 226 |
+
>>> b = pd.Index([1, 2, 3])
|
| 227 |
+
>>> tm.assert_index_equal(a, b)
|
| 228 |
+
"""
|
| 229 |
+
__tracebackhide__ = True
|
| 230 |
+
|
| 231 |
+
def _check_types(left, right, obj: str = "Index") -> None:
|
| 232 |
+
if not exact:
|
| 233 |
+
return
|
| 234 |
+
|
| 235 |
+
assert_class_equal(left, right, exact=exact, obj=obj)
|
| 236 |
+
assert_attr_equal("inferred_type", left, right, obj=obj)
|
| 237 |
+
|
| 238 |
+
# Skip exact dtype checking when `check_categorical` is False
|
| 239 |
+
if isinstance(left.dtype, CategoricalDtype) and isinstance(
|
| 240 |
+
right.dtype, CategoricalDtype
|
| 241 |
+
):
|
| 242 |
+
if check_categorical:
|
| 243 |
+
assert_attr_equal("dtype", left, right, obj=obj)
|
| 244 |
+
assert_index_equal(left.categories, right.categories, exact=exact)
|
| 245 |
+
return
|
| 246 |
+
|
| 247 |
+
assert_attr_equal("dtype", left, right, obj=obj)
|
| 248 |
+
|
| 249 |
+
# instance validation
|
| 250 |
+
_check_isinstance(left, right, Index)
|
| 251 |
+
|
| 252 |
+
# class / dtype comparison
|
| 253 |
+
_check_types(left, right, obj=obj)
|
| 254 |
+
|
| 255 |
+
# level comparison
|
| 256 |
+
if left.nlevels != right.nlevels:
|
| 257 |
+
msg1 = f"{obj} levels are different"
|
| 258 |
+
msg2 = f"{left.nlevels}, {left}"
|
| 259 |
+
msg3 = f"{right.nlevels}, {right}"
|
| 260 |
+
raise_assert_detail(obj, msg1, msg2, msg3)
|
| 261 |
+
|
| 262 |
+
# length comparison
|
| 263 |
+
if len(left) != len(right):
|
| 264 |
+
msg1 = f"{obj} length are different"
|
| 265 |
+
msg2 = f"{len(left)}, {left}"
|
| 266 |
+
msg3 = f"{len(right)}, {right}"
|
| 267 |
+
raise_assert_detail(obj, msg1, msg2, msg3)
|
| 268 |
+
|
| 269 |
+
# If order doesn't matter then sort the index entries
|
| 270 |
+
if not check_order:
|
| 271 |
+
left = safe_sort_index(left)
|
| 272 |
+
right = safe_sort_index(right)
|
| 273 |
+
|
| 274 |
+
# MultiIndex special comparison for little-friendly error messages
|
| 275 |
+
if isinstance(left, MultiIndex):
|
| 276 |
+
right = cast(MultiIndex, right)
|
| 277 |
+
|
| 278 |
+
for level in range(left.nlevels):
|
| 279 |
+
lobj = f"MultiIndex level [{level}]"
|
| 280 |
+
try:
|
| 281 |
+
# try comparison on levels/codes to avoid densifying MultiIndex
|
| 282 |
+
assert_index_equal(
|
| 283 |
+
left.levels[level],
|
| 284 |
+
right.levels[level],
|
| 285 |
+
exact=exact,
|
| 286 |
+
check_names=check_names,
|
| 287 |
+
check_exact=check_exact,
|
| 288 |
+
check_categorical=check_categorical,
|
| 289 |
+
rtol=rtol,
|
| 290 |
+
atol=atol,
|
| 291 |
+
obj=lobj,
|
| 292 |
+
)
|
| 293 |
+
assert_numpy_array_equal(left.codes[level], right.codes[level])
|
| 294 |
+
except AssertionError:
|
| 295 |
+
llevel = left.get_level_values(level)
|
| 296 |
+
rlevel = right.get_level_values(level)
|
| 297 |
+
|
| 298 |
+
assert_index_equal(
|
| 299 |
+
llevel,
|
| 300 |
+
rlevel,
|
| 301 |
+
exact=exact,
|
| 302 |
+
check_names=check_names,
|
| 303 |
+
check_exact=check_exact,
|
| 304 |
+
check_categorical=check_categorical,
|
| 305 |
+
rtol=rtol,
|
| 306 |
+
atol=atol,
|
| 307 |
+
obj=lobj,
|
| 308 |
+
)
|
| 309 |
+
# get_level_values may change dtype
|
| 310 |
+
_check_types(left.levels[level], right.levels[level], obj=obj)
|
| 311 |
+
|
| 312 |
+
# skip exact index checking when `check_categorical` is False
|
| 313 |
+
elif check_exact and check_categorical:
|
| 314 |
+
if not left.equals(right):
|
| 315 |
+
mismatch = left._values != right._values
|
| 316 |
+
|
| 317 |
+
if not isinstance(mismatch, np.ndarray):
|
| 318 |
+
mismatch = cast("ExtensionArray", mismatch).fillna(True)
|
| 319 |
+
|
| 320 |
+
diff = np.sum(mismatch.astype(int)) * 100.0 / len(left)
|
| 321 |
+
msg = f"{obj} values are different ({np.round(diff, 5)} %)"
|
| 322 |
+
raise_assert_detail(obj, msg, left, right)
|
| 323 |
+
else:
|
| 324 |
+
# if we have "equiv", this becomes True
|
| 325 |
+
exact_bool = bool(exact)
|
| 326 |
+
_testing.assert_almost_equal(
|
| 327 |
+
left.values,
|
| 328 |
+
right.values,
|
| 329 |
+
rtol=rtol,
|
| 330 |
+
atol=atol,
|
| 331 |
+
check_dtype=exact_bool,
|
| 332 |
+
obj=obj,
|
| 333 |
+
lobj=left,
|
| 334 |
+
robj=right,
|
| 335 |
+
)
|
| 336 |
+
|
| 337 |
+
# metadata comparison
|
| 338 |
+
if check_names:
|
| 339 |
+
assert_attr_equal("names", left, right, obj=obj)
|
| 340 |
+
if isinstance(left, PeriodIndex) or isinstance(right, PeriodIndex):
|
| 341 |
+
assert_attr_equal("dtype", left, right, obj=obj)
|
| 342 |
+
if isinstance(left, IntervalIndex) or isinstance(right, IntervalIndex):
|
| 343 |
+
assert_interval_array_equal(left._values, right._values)
|
| 344 |
+
|
| 345 |
+
if check_categorical:
|
| 346 |
+
if isinstance(left.dtype, CategoricalDtype) or isinstance(
|
| 347 |
+
right.dtype, CategoricalDtype
|
| 348 |
+
):
|
| 349 |
+
assert_categorical_equal(left._values, right._values, obj=f"{obj} category")
|
| 350 |
+
|
| 351 |
+
|
| 352 |
+
def assert_class_equal(
|
| 353 |
+
left, right, exact: bool | str = True, obj: str = "Input"
|
| 354 |
+
) -> None:
|
| 355 |
+
"""
|
| 356 |
+
Checks classes are equal.
|
| 357 |
+
"""
|
| 358 |
+
__tracebackhide__ = True
|
| 359 |
+
|
| 360 |
+
def repr_class(x):
|
| 361 |
+
if isinstance(x, Index):
|
| 362 |
+
# return Index as it is to include values in the error message
|
| 363 |
+
return x
|
| 364 |
+
|
| 365 |
+
return type(x).__name__
|
| 366 |
+
|
| 367 |
+
def is_class_equiv(idx: Index) -> bool:
|
| 368 |
+
"""Classes that are a RangeIndex (sub-)instance or exactly an `Index` .
|
| 369 |
+
|
| 370 |
+
This only checks class equivalence. There is a separate check that the
|
| 371 |
+
dtype is int64.
|
| 372 |
+
"""
|
| 373 |
+
return type(idx) is Index or isinstance(idx, RangeIndex)
|
| 374 |
+
|
| 375 |
+
if type(left) == type(right):
|
| 376 |
+
return
|
| 377 |
+
|
| 378 |
+
if exact == "equiv":
|
| 379 |
+
if is_class_equiv(left) and is_class_equiv(right):
|
| 380 |
+
return
|
| 381 |
+
|
| 382 |
+
msg = f"{obj} classes are different"
|
| 383 |
+
raise_assert_detail(obj, msg, repr_class(left), repr_class(right))
|
| 384 |
+
|
| 385 |
+
|
| 386 |
+
def assert_attr_equal(attr: str, left, right, obj: str = "Attributes") -> None:
|
| 387 |
+
"""
|
| 388 |
+
Check attributes are equal. Both objects must have attribute.
|
| 389 |
+
|
| 390 |
+
Parameters
|
| 391 |
+
----------
|
| 392 |
+
attr : str
|
| 393 |
+
Attribute name being compared.
|
| 394 |
+
left : object
|
| 395 |
+
right : object
|
| 396 |
+
obj : str, default 'Attributes'
|
| 397 |
+
Specify object name being compared, internally used to show appropriate
|
| 398 |
+
assertion message
|
| 399 |
+
"""
|
| 400 |
+
__tracebackhide__ = True
|
| 401 |
+
|
| 402 |
+
left_attr = getattr(left, attr)
|
| 403 |
+
right_attr = getattr(right, attr)
|
| 404 |
+
|
| 405 |
+
if left_attr is right_attr or is_matching_na(left_attr, right_attr):
|
| 406 |
+
# e.g. both np.nan, both NaT, both pd.NA, ...
|
| 407 |
+
return None
|
| 408 |
+
|
| 409 |
+
try:
|
| 410 |
+
result = left_attr == right_attr
|
| 411 |
+
except TypeError:
|
| 412 |
+
# datetimetz on rhs may raise TypeError
|
| 413 |
+
result = False
|
| 414 |
+
if (left_attr is pd.NA) ^ (right_attr is pd.NA):
|
| 415 |
+
result = False
|
| 416 |
+
elif not isinstance(result, bool):
|
| 417 |
+
result = result.all()
|
| 418 |
+
|
| 419 |
+
if not result:
|
| 420 |
+
msg = f'Attribute "{attr}" are different'
|
| 421 |
+
raise_assert_detail(obj, msg, left_attr, right_attr)
|
| 422 |
+
return None
|
| 423 |
+
|
| 424 |
+
|
| 425 |
+
def assert_is_valid_plot_return_object(objs) -> None:
|
| 426 |
+
from matplotlib.artist import Artist
|
| 427 |
+
from matplotlib.axes import Axes
|
| 428 |
+
|
| 429 |
+
if isinstance(objs, (Series, np.ndarray)):
|
| 430 |
+
if isinstance(objs, Series):
|
| 431 |
+
objs = objs._values
|
| 432 |
+
for el in objs.ravel():
|
| 433 |
+
msg = (
|
| 434 |
+
"one of 'objs' is not a matplotlib Axes instance, "
|
| 435 |
+
f"type encountered {repr(type(el).__name__)}"
|
| 436 |
+
)
|
| 437 |
+
assert isinstance(el, (Axes, dict)), msg
|
| 438 |
+
else:
|
| 439 |
+
msg = (
|
| 440 |
+
"objs is neither an ndarray of Artist instances nor a single "
|
| 441 |
+
"ArtistArtist instance, tuple, or dict, 'objs' is a "
|
| 442 |
+
f"{repr(type(objs).__name__)}"
|
| 443 |
+
)
|
| 444 |
+
assert isinstance(objs, (Artist, tuple, dict)), msg
|
| 445 |
+
|
| 446 |
+
|
| 447 |
+
def assert_is_sorted(seq) -> None:
|
| 448 |
+
"""Assert that the sequence is sorted."""
|
| 449 |
+
if isinstance(seq, (Index, Series)):
|
| 450 |
+
seq = seq.values
|
| 451 |
+
# sorting does not change precisions
|
| 452 |
+
if isinstance(seq, np.ndarray):
|
| 453 |
+
assert_numpy_array_equal(seq, np.sort(np.array(seq)))
|
| 454 |
+
else:
|
| 455 |
+
assert_extension_array_equal(seq, seq[seq.argsort()])
|
| 456 |
+
|
| 457 |
+
|
| 458 |
+
def assert_categorical_equal(
|
| 459 |
+
left,
|
| 460 |
+
right,
|
| 461 |
+
check_dtype: bool = True,
|
| 462 |
+
check_category_order: bool = True,
|
| 463 |
+
obj: str = "Categorical",
|
| 464 |
+
) -> None:
|
| 465 |
+
"""
|
| 466 |
+
Test that Categoricals are equivalent.
|
| 467 |
+
|
| 468 |
+
Parameters
|
| 469 |
+
----------
|
| 470 |
+
left : Categorical
|
| 471 |
+
right : Categorical
|
| 472 |
+
check_dtype : bool, default True
|
| 473 |
+
Check that integer dtype of the codes are the same.
|
| 474 |
+
check_category_order : bool, default True
|
| 475 |
+
Whether the order of the categories should be compared, which
|
| 476 |
+
implies identical integer codes. If False, only the resulting
|
| 477 |
+
values are compared. The ordered attribute is
|
| 478 |
+
checked regardless.
|
| 479 |
+
obj : str, default 'Categorical'
|
| 480 |
+
Specify object name being compared, internally used to show appropriate
|
| 481 |
+
assertion message.
|
| 482 |
+
"""
|
| 483 |
+
_check_isinstance(left, right, Categorical)
|
| 484 |
+
|
| 485 |
+
exact: bool | str
|
| 486 |
+
if isinstance(left.categories, RangeIndex) or isinstance(
|
| 487 |
+
right.categories, RangeIndex
|
| 488 |
+
):
|
| 489 |
+
exact = "equiv"
|
| 490 |
+
else:
|
| 491 |
+
# We still want to require exact matches for Index
|
| 492 |
+
exact = True
|
| 493 |
+
|
| 494 |
+
if check_category_order:
|
| 495 |
+
assert_index_equal(
|
| 496 |
+
left.categories, right.categories, obj=f"{obj}.categories", exact=exact
|
| 497 |
+
)
|
| 498 |
+
assert_numpy_array_equal(
|
| 499 |
+
left.codes, right.codes, check_dtype=check_dtype, obj=f"{obj}.codes"
|
| 500 |
+
)
|
| 501 |
+
else:
|
| 502 |
+
try:
|
| 503 |
+
lc = left.categories.sort_values()
|
| 504 |
+
rc = right.categories.sort_values()
|
| 505 |
+
except TypeError:
|
| 506 |
+
# e.g. '<' not supported between instances of 'int' and 'str'
|
| 507 |
+
lc, rc = left.categories, right.categories
|
| 508 |
+
assert_index_equal(lc, rc, obj=f"{obj}.categories", exact=exact)
|
| 509 |
+
assert_index_equal(
|
| 510 |
+
left.categories.take(left.codes),
|
| 511 |
+
right.categories.take(right.codes),
|
| 512 |
+
obj=f"{obj}.values",
|
| 513 |
+
exact=exact,
|
| 514 |
+
)
|
| 515 |
+
|
| 516 |
+
assert_attr_equal("ordered", left, right, obj=obj)
|
| 517 |
+
|
| 518 |
+
|
| 519 |
+
def assert_interval_array_equal(
|
| 520 |
+
left, right, exact: bool | Literal["equiv"] = "equiv", obj: str = "IntervalArray"
|
| 521 |
+
) -> None:
|
| 522 |
+
"""
|
| 523 |
+
Test that two IntervalArrays are equivalent.
|
| 524 |
+
|
| 525 |
+
Parameters
|
| 526 |
+
----------
|
| 527 |
+
left, right : IntervalArray
|
| 528 |
+
The IntervalArrays to compare.
|
| 529 |
+
exact : bool or {'equiv'}, default 'equiv'
|
| 530 |
+
Whether to check the Index class, dtype and inferred_type
|
| 531 |
+
are identical. If 'equiv', then RangeIndex can be substituted for
|
| 532 |
+
Index with an int64 dtype as well.
|
| 533 |
+
obj : str, default 'IntervalArray'
|
| 534 |
+
Specify object name being compared, internally used to show appropriate
|
| 535 |
+
assertion message
|
| 536 |
+
"""
|
| 537 |
+
_check_isinstance(left, right, IntervalArray)
|
| 538 |
+
|
| 539 |
+
kwargs = {}
|
| 540 |
+
if left._left.dtype.kind in "mM":
|
| 541 |
+
# We have a DatetimeArray or TimedeltaArray
|
| 542 |
+
kwargs["check_freq"] = False
|
| 543 |
+
|
| 544 |
+
assert_equal(left._left, right._left, obj=f"{obj}.left", **kwargs)
|
| 545 |
+
assert_equal(left._right, right._right, obj=f"{obj}.left", **kwargs)
|
| 546 |
+
|
| 547 |
+
assert_attr_equal("closed", left, right, obj=obj)
|
| 548 |
+
|
| 549 |
+
|
| 550 |
+
def assert_period_array_equal(left, right, obj: str = "PeriodArray") -> None:
|
| 551 |
+
_check_isinstance(left, right, PeriodArray)
|
| 552 |
+
|
| 553 |
+
assert_numpy_array_equal(left._ndarray, right._ndarray, obj=f"{obj}._ndarray")
|
| 554 |
+
assert_attr_equal("dtype", left, right, obj=obj)
|
| 555 |
+
|
| 556 |
+
|
| 557 |
+
def assert_datetime_array_equal(
|
| 558 |
+
left, right, obj: str = "DatetimeArray", check_freq: bool = True
|
| 559 |
+
) -> None:
|
| 560 |
+
__tracebackhide__ = True
|
| 561 |
+
_check_isinstance(left, right, DatetimeArray)
|
| 562 |
+
|
| 563 |
+
assert_numpy_array_equal(left._ndarray, right._ndarray, obj=f"{obj}._ndarray")
|
| 564 |
+
if check_freq:
|
| 565 |
+
assert_attr_equal("freq", left, right, obj=obj)
|
| 566 |
+
assert_attr_equal("tz", left, right, obj=obj)
|
| 567 |
+
|
| 568 |
+
|
| 569 |
+
def assert_timedelta_array_equal(
|
| 570 |
+
left, right, obj: str = "TimedeltaArray", check_freq: bool = True
|
| 571 |
+
) -> None:
|
| 572 |
+
__tracebackhide__ = True
|
| 573 |
+
_check_isinstance(left, right, TimedeltaArray)
|
| 574 |
+
assert_numpy_array_equal(left._ndarray, right._ndarray, obj=f"{obj}._ndarray")
|
| 575 |
+
if check_freq:
|
| 576 |
+
assert_attr_equal("freq", left, right, obj=obj)
|
| 577 |
+
|
| 578 |
+
|
| 579 |
+
def raise_assert_detail(
|
| 580 |
+
obj, message, left, right, diff=None, first_diff=None, index_values=None
|
| 581 |
+
) -> NoReturn:
|
| 582 |
+
__tracebackhide__ = True
|
| 583 |
+
|
| 584 |
+
msg = f"""{obj} are different
|
| 585 |
+
|
| 586 |
+
{message}"""
|
| 587 |
+
|
| 588 |
+
if isinstance(index_values, Index):
|
| 589 |
+
index_values = np.asarray(index_values)
|
| 590 |
+
|
| 591 |
+
if isinstance(index_values, np.ndarray):
|
| 592 |
+
msg += f"\n[index]: {pprint_thing(index_values)}"
|
| 593 |
+
|
| 594 |
+
if isinstance(left, np.ndarray):
|
| 595 |
+
left = pprint_thing(left)
|
| 596 |
+
elif isinstance(left, (CategoricalDtype, NumpyEADtype, StringDtype)):
|
| 597 |
+
left = repr(left)
|
| 598 |
+
|
| 599 |
+
if isinstance(right, np.ndarray):
|
| 600 |
+
right = pprint_thing(right)
|
| 601 |
+
elif isinstance(right, (CategoricalDtype, NumpyEADtype, StringDtype)):
|
| 602 |
+
right = repr(right)
|
| 603 |
+
|
| 604 |
+
msg += f"""
|
| 605 |
+
[left]: {left}
|
| 606 |
+
[right]: {right}"""
|
| 607 |
+
|
| 608 |
+
if diff is not None:
|
| 609 |
+
msg += f"\n[diff]: {diff}"
|
| 610 |
+
|
| 611 |
+
if first_diff is not None:
|
| 612 |
+
msg += f"\n{first_diff}"
|
| 613 |
+
|
| 614 |
+
raise AssertionError(msg)
|
| 615 |
+
|
| 616 |
+
|
| 617 |
+
def assert_numpy_array_equal(
|
| 618 |
+
left,
|
| 619 |
+
right,
|
| 620 |
+
strict_nan: bool = False,
|
| 621 |
+
check_dtype: bool | Literal["equiv"] = True,
|
| 622 |
+
err_msg=None,
|
| 623 |
+
check_same=None,
|
| 624 |
+
obj: str = "numpy array",
|
| 625 |
+
index_values=None,
|
| 626 |
+
) -> None:
|
| 627 |
+
"""
|
| 628 |
+
Check that 'np.ndarray' is equivalent.
|
| 629 |
+
|
| 630 |
+
Parameters
|
| 631 |
+
----------
|
| 632 |
+
left, right : numpy.ndarray or iterable
|
| 633 |
+
The two arrays to be compared.
|
| 634 |
+
strict_nan : bool, default False
|
| 635 |
+
If True, consider NaN and None to be different.
|
| 636 |
+
check_dtype : bool, default True
|
| 637 |
+
Check dtype if both a and b are np.ndarray.
|
| 638 |
+
err_msg : str, default None
|
| 639 |
+
If provided, used as assertion message.
|
| 640 |
+
check_same : None|'copy'|'same', default None
|
| 641 |
+
Ensure left and right refer/do not refer to the same memory area.
|
| 642 |
+
obj : str, default 'numpy array'
|
| 643 |
+
Specify object name being compared, internally used to show appropriate
|
| 644 |
+
assertion message.
|
| 645 |
+
index_values : Index | numpy.ndarray, default None
|
| 646 |
+
optional index (shared by both left and right), used in output.
|
| 647 |
+
"""
|
| 648 |
+
__tracebackhide__ = True
|
| 649 |
+
|
| 650 |
+
# instance validation
|
| 651 |
+
# Show a detailed error message when classes are different
|
| 652 |
+
assert_class_equal(left, right, obj=obj)
|
| 653 |
+
# both classes must be an np.ndarray
|
| 654 |
+
_check_isinstance(left, right, np.ndarray)
|
| 655 |
+
|
| 656 |
+
def _get_base(obj):
|
| 657 |
+
return obj.base if getattr(obj, "base", None) is not None else obj
|
| 658 |
+
|
| 659 |
+
left_base = _get_base(left)
|
| 660 |
+
right_base = _get_base(right)
|
| 661 |
+
|
| 662 |
+
if check_same == "same":
|
| 663 |
+
if left_base is not right_base:
|
| 664 |
+
raise AssertionError(f"{repr(left_base)} is not {repr(right_base)}")
|
| 665 |
+
elif check_same == "copy":
|
| 666 |
+
if left_base is right_base:
|
| 667 |
+
raise AssertionError(f"{repr(left_base)} is {repr(right_base)}")
|
| 668 |
+
|
| 669 |
+
def _raise(left, right, err_msg) -> NoReturn:
|
| 670 |
+
if err_msg is None:
|
| 671 |
+
if left.shape != right.shape:
|
| 672 |
+
raise_assert_detail(
|
| 673 |
+
obj, f"{obj} shapes are different", left.shape, right.shape
|
| 674 |
+
)
|
| 675 |
+
|
| 676 |
+
diff = 0
|
| 677 |
+
for left_arr, right_arr in zip(left, right):
|
| 678 |
+
# count up differences
|
| 679 |
+
if not array_equivalent(left_arr, right_arr, strict_nan=strict_nan):
|
| 680 |
+
diff += 1
|
| 681 |
+
|
| 682 |
+
diff = diff * 100.0 / left.size
|
| 683 |
+
msg = f"{obj} values are different ({np.round(diff, 5)} %)"
|
| 684 |
+
raise_assert_detail(obj, msg, left, right, index_values=index_values)
|
| 685 |
+
|
| 686 |
+
raise AssertionError(err_msg)
|
| 687 |
+
|
| 688 |
+
# compare shape and values
|
| 689 |
+
if not array_equivalent(left, right, strict_nan=strict_nan):
|
| 690 |
+
_raise(left, right, err_msg)
|
| 691 |
+
|
| 692 |
+
if check_dtype:
|
| 693 |
+
if isinstance(left, np.ndarray) and isinstance(right, np.ndarray):
|
| 694 |
+
assert_attr_equal("dtype", left, right, obj=obj)
|
| 695 |
+
|
| 696 |
+
|
| 697 |
+
def assert_extension_array_equal(
|
| 698 |
+
left,
|
| 699 |
+
right,
|
| 700 |
+
check_dtype: bool | Literal["equiv"] = True,
|
| 701 |
+
index_values=None,
|
| 702 |
+
check_exact: bool | lib.NoDefault = lib.no_default,
|
| 703 |
+
rtol: float | lib.NoDefault = lib.no_default,
|
| 704 |
+
atol: float | lib.NoDefault = lib.no_default,
|
| 705 |
+
obj: str = "ExtensionArray",
|
| 706 |
+
) -> None:
|
| 707 |
+
"""
|
| 708 |
+
Check that left and right ExtensionArrays are equal.
|
| 709 |
+
|
| 710 |
+
Parameters
|
| 711 |
+
----------
|
| 712 |
+
left, right : ExtensionArray
|
| 713 |
+
The two arrays to compare.
|
| 714 |
+
check_dtype : bool, default True
|
| 715 |
+
Whether to check if the ExtensionArray dtypes are identical.
|
| 716 |
+
index_values : Index | numpy.ndarray, default None
|
| 717 |
+
Optional index (shared by both left and right), used in output.
|
| 718 |
+
check_exact : bool, default False
|
| 719 |
+
Whether to compare number exactly.
|
| 720 |
+
|
| 721 |
+
.. versionchanged:: 2.2.0
|
| 722 |
+
|
| 723 |
+
Defaults to True for integer dtypes if none of
|
| 724 |
+
``check_exact``, ``rtol`` and ``atol`` are specified.
|
| 725 |
+
rtol : float, default 1e-5
|
| 726 |
+
Relative tolerance. Only used when check_exact is False.
|
| 727 |
+
atol : float, default 1e-8
|
| 728 |
+
Absolute tolerance. Only used when check_exact is False.
|
| 729 |
+
obj : str, default 'ExtensionArray'
|
| 730 |
+
Specify object name being compared, internally used to show appropriate
|
| 731 |
+
assertion message.
|
| 732 |
+
|
| 733 |
+
.. versionadded:: 2.0.0
|
| 734 |
+
|
| 735 |
+
Notes
|
| 736 |
+
-----
|
| 737 |
+
Missing values are checked separately from valid values.
|
| 738 |
+
A mask of missing values is computed for each and checked to match.
|
| 739 |
+
The remaining all-valid values are cast to object dtype and checked.
|
| 740 |
+
|
| 741 |
+
Examples
|
| 742 |
+
--------
|
| 743 |
+
>>> from pandas import testing as tm
|
| 744 |
+
>>> a = pd.Series([1, 2, 3, 4])
|
| 745 |
+
>>> b, c = a.array, a.array
|
| 746 |
+
>>> tm.assert_extension_array_equal(b, c)
|
| 747 |
+
"""
|
| 748 |
+
if (
|
| 749 |
+
check_exact is lib.no_default
|
| 750 |
+
and rtol is lib.no_default
|
| 751 |
+
and atol is lib.no_default
|
| 752 |
+
):
|
| 753 |
+
check_exact = (
|
| 754 |
+
is_numeric_dtype(left.dtype)
|
| 755 |
+
and not is_float_dtype(left.dtype)
|
| 756 |
+
or is_numeric_dtype(right.dtype)
|
| 757 |
+
and not is_float_dtype(right.dtype)
|
| 758 |
+
)
|
| 759 |
+
elif check_exact is lib.no_default:
|
| 760 |
+
check_exact = False
|
| 761 |
+
|
| 762 |
+
rtol = rtol if rtol is not lib.no_default else 1.0e-5
|
| 763 |
+
atol = atol if atol is not lib.no_default else 1.0e-8
|
| 764 |
+
|
| 765 |
+
assert isinstance(left, ExtensionArray), "left is not an ExtensionArray"
|
| 766 |
+
assert isinstance(right, ExtensionArray), "right is not an ExtensionArray"
|
| 767 |
+
if check_dtype:
|
| 768 |
+
assert_attr_equal("dtype", left, right, obj=f"Attributes of {obj}")
|
| 769 |
+
|
| 770 |
+
if (
|
| 771 |
+
isinstance(left, DatetimeLikeArrayMixin)
|
| 772 |
+
and isinstance(right, DatetimeLikeArrayMixin)
|
| 773 |
+
and type(right) == type(left)
|
| 774 |
+
):
|
| 775 |
+
# GH 52449
|
| 776 |
+
if not check_dtype and left.dtype.kind in "mM":
|
| 777 |
+
if not isinstance(left.dtype, np.dtype):
|
| 778 |
+
l_unit = cast(DatetimeTZDtype, left.dtype).unit
|
| 779 |
+
else:
|
| 780 |
+
l_unit = np.datetime_data(left.dtype)[0]
|
| 781 |
+
if not isinstance(right.dtype, np.dtype):
|
| 782 |
+
r_unit = cast(DatetimeTZDtype, right.dtype).unit
|
| 783 |
+
else:
|
| 784 |
+
r_unit = np.datetime_data(right.dtype)[0]
|
| 785 |
+
if (
|
| 786 |
+
l_unit != r_unit
|
| 787 |
+
and compare_mismatched_resolutions(
|
| 788 |
+
left._ndarray, right._ndarray, operator.eq
|
| 789 |
+
).all()
|
| 790 |
+
):
|
| 791 |
+
return
|
| 792 |
+
# Avoid slow object-dtype comparisons
|
| 793 |
+
# np.asarray for case where we have a np.MaskedArray
|
| 794 |
+
assert_numpy_array_equal(
|
| 795 |
+
np.asarray(left.asi8),
|
| 796 |
+
np.asarray(right.asi8),
|
| 797 |
+
index_values=index_values,
|
| 798 |
+
obj=obj,
|
| 799 |
+
)
|
| 800 |
+
return
|
| 801 |
+
|
| 802 |
+
left_na = np.asarray(left.isna())
|
| 803 |
+
right_na = np.asarray(right.isna())
|
| 804 |
+
assert_numpy_array_equal(
|
| 805 |
+
left_na, right_na, obj=f"{obj} NA mask", index_values=index_values
|
| 806 |
+
)
|
| 807 |
+
|
| 808 |
+
left_valid = left[~left_na].to_numpy(dtype=object)
|
| 809 |
+
right_valid = right[~right_na].to_numpy(dtype=object)
|
| 810 |
+
if check_exact:
|
| 811 |
+
assert_numpy_array_equal(
|
| 812 |
+
left_valid, right_valid, obj=obj, index_values=index_values
|
| 813 |
+
)
|
| 814 |
+
else:
|
| 815 |
+
_testing.assert_almost_equal(
|
| 816 |
+
left_valid,
|
| 817 |
+
right_valid,
|
| 818 |
+
check_dtype=bool(check_dtype),
|
| 819 |
+
rtol=rtol,
|
| 820 |
+
atol=atol,
|
| 821 |
+
obj=obj,
|
| 822 |
+
index_values=index_values,
|
| 823 |
+
)
|
| 824 |
+
|
| 825 |
+
|
| 826 |
+
# This could be refactored to use the NDFrame.equals method
|
| 827 |
+
def assert_series_equal(
|
| 828 |
+
left,
|
| 829 |
+
right,
|
| 830 |
+
check_dtype: bool | Literal["equiv"] = True,
|
| 831 |
+
check_index_type: bool | Literal["equiv"] = "equiv",
|
| 832 |
+
check_series_type: bool = True,
|
| 833 |
+
check_names: bool = True,
|
| 834 |
+
check_exact: bool | lib.NoDefault = lib.no_default,
|
| 835 |
+
check_datetimelike_compat: bool = False,
|
| 836 |
+
check_categorical: bool = True,
|
| 837 |
+
check_category_order: bool = True,
|
| 838 |
+
check_freq: bool = True,
|
| 839 |
+
check_flags: bool = True,
|
| 840 |
+
rtol: float | lib.NoDefault = lib.no_default,
|
| 841 |
+
atol: float | lib.NoDefault = lib.no_default,
|
| 842 |
+
obj: str = "Series",
|
| 843 |
+
*,
|
| 844 |
+
check_index: bool = True,
|
| 845 |
+
check_like: bool = False,
|
| 846 |
+
) -> None:
|
| 847 |
+
"""
|
| 848 |
+
Check that left and right Series are equal.
|
| 849 |
+
|
| 850 |
+
Parameters
|
| 851 |
+
----------
|
| 852 |
+
left : Series
|
| 853 |
+
right : Series
|
| 854 |
+
check_dtype : bool, default True
|
| 855 |
+
Whether to check the Series dtype is identical.
|
| 856 |
+
check_index_type : bool or {'equiv'}, default 'equiv'
|
| 857 |
+
Whether to check the Index class, dtype and inferred_type
|
| 858 |
+
are identical.
|
| 859 |
+
check_series_type : bool, default True
|
| 860 |
+
Whether to check the Series class is identical.
|
| 861 |
+
check_names : bool, default True
|
| 862 |
+
Whether to check the Series and Index names attribute.
|
| 863 |
+
check_exact : bool, default False
|
| 864 |
+
Whether to compare number exactly.
|
| 865 |
+
|
| 866 |
+
.. versionchanged:: 2.2.0
|
| 867 |
+
|
| 868 |
+
Defaults to True for integer dtypes if none of
|
| 869 |
+
``check_exact``, ``rtol`` and ``atol`` are specified.
|
| 870 |
+
check_datetimelike_compat : bool, default False
|
| 871 |
+
Compare datetime-like which is comparable ignoring dtype.
|
| 872 |
+
check_categorical : bool, default True
|
| 873 |
+
Whether to compare internal Categorical exactly.
|
| 874 |
+
check_category_order : bool, default True
|
| 875 |
+
Whether to compare category order of internal Categoricals.
|
| 876 |
+
check_freq : bool, default True
|
| 877 |
+
Whether to check the `freq` attribute on a DatetimeIndex or TimedeltaIndex.
|
| 878 |
+
check_flags : bool, default True
|
| 879 |
+
Whether to check the `flags` attribute.
|
| 880 |
+
rtol : float, default 1e-5
|
| 881 |
+
Relative tolerance. Only used when check_exact is False.
|
| 882 |
+
atol : float, default 1e-8
|
| 883 |
+
Absolute tolerance. Only used when check_exact is False.
|
| 884 |
+
obj : str, default 'Series'
|
| 885 |
+
Specify object name being compared, internally used to show appropriate
|
| 886 |
+
assertion message.
|
| 887 |
+
check_index : bool, default True
|
| 888 |
+
Whether to check index equivalence. If False, then compare only values.
|
| 889 |
+
|
| 890 |
+
.. versionadded:: 1.3.0
|
| 891 |
+
check_like : bool, default False
|
| 892 |
+
If True, ignore the order of the index. Must be False if check_index is False.
|
| 893 |
+
Note: same labels must be with the same data.
|
| 894 |
+
|
| 895 |
+
.. versionadded:: 1.5.0
|
| 896 |
+
|
| 897 |
+
Examples
|
| 898 |
+
--------
|
| 899 |
+
>>> from pandas import testing as tm
|
| 900 |
+
>>> a = pd.Series([1, 2, 3, 4])
|
| 901 |
+
>>> b = pd.Series([1, 2, 3, 4])
|
| 902 |
+
>>> tm.assert_series_equal(a, b)
|
| 903 |
+
"""
|
| 904 |
+
__tracebackhide__ = True
|
| 905 |
+
check_exact_index = False if check_exact is lib.no_default else check_exact
|
| 906 |
+
if (
|
| 907 |
+
check_exact is lib.no_default
|
| 908 |
+
and rtol is lib.no_default
|
| 909 |
+
and atol is lib.no_default
|
| 910 |
+
):
|
| 911 |
+
check_exact = (
|
| 912 |
+
is_numeric_dtype(left.dtype)
|
| 913 |
+
and not is_float_dtype(left.dtype)
|
| 914 |
+
or is_numeric_dtype(right.dtype)
|
| 915 |
+
and not is_float_dtype(right.dtype)
|
| 916 |
+
)
|
| 917 |
+
elif check_exact is lib.no_default:
|
| 918 |
+
check_exact = False
|
| 919 |
+
|
| 920 |
+
rtol = rtol if rtol is not lib.no_default else 1.0e-5
|
| 921 |
+
atol = atol if atol is not lib.no_default else 1.0e-8
|
| 922 |
+
|
| 923 |
+
if not check_index and check_like:
|
| 924 |
+
raise ValueError("check_like must be False if check_index is False")
|
| 925 |
+
|
| 926 |
+
# instance validation
|
| 927 |
+
_check_isinstance(left, right, Series)
|
| 928 |
+
|
| 929 |
+
if check_series_type:
|
| 930 |
+
assert_class_equal(left, right, obj=obj)
|
| 931 |
+
|
| 932 |
+
# length comparison
|
| 933 |
+
if len(left) != len(right):
|
| 934 |
+
msg1 = f"{len(left)}, {left.index}"
|
| 935 |
+
msg2 = f"{len(right)}, {right.index}"
|
| 936 |
+
raise_assert_detail(obj, "Series length are different", msg1, msg2)
|
| 937 |
+
|
| 938 |
+
if check_flags:
|
| 939 |
+
assert left.flags == right.flags, f"{repr(left.flags)} != {repr(right.flags)}"
|
| 940 |
+
|
| 941 |
+
if check_index:
|
| 942 |
+
# GH #38183
|
| 943 |
+
assert_index_equal(
|
| 944 |
+
left.index,
|
| 945 |
+
right.index,
|
| 946 |
+
exact=check_index_type,
|
| 947 |
+
check_names=check_names,
|
| 948 |
+
check_exact=check_exact_index,
|
| 949 |
+
check_categorical=check_categorical,
|
| 950 |
+
check_order=not check_like,
|
| 951 |
+
rtol=rtol,
|
| 952 |
+
atol=atol,
|
| 953 |
+
obj=f"{obj}.index",
|
| 954 |
+
)
|
| 955 |
+
|
| 956 |
+
if check_like:
|
| 957 |
+
left = left.reindex_like(right)
|
| 958 |
+
|
| 959 |
+
if check_freq and isinstance(left.index, (DatetimeIndex, TimedeltaIndex)):
|
| 960 |
+
lidx = left.index
|
| 961 |
+
ridx = right.index
|
| 962 |
+
assert lidx.freq == ridx.freq, (lidx.freq, ridx.freq)
|
| 963 |
+
|
| 964 |
+
if check_dtype:
|
| 965 |
+
# We want to skip exact dtype checking when `check_categorical`
|
| 966 |
+
# is False. We'll still raise if only one is a `Categorical`,
|
| 967 |
+
# regardless of `check_categorical`
|
| 968 |
+
if (
|
| 969 |
+
isinstance(left.dtype, CategoricalDtype)
|
| 970 |
+
and isinstance(right.dtype, CategoricalDtype)
|
| 971 |
+
and not check_categorical
|
| 972 |
+
):
|
| 973 |
+
pass
|
| 974 |
+
else:
|
| 975 |
+
assert_attr_equal("dtype", left, right, obj=f"Attributes of {obj}")
|
| 976 |
+
if check_exact:
|
| 977 |
+
left_values = left._values
|
| 978 |
+
right_values = right._values
|
| 979 |
+
# Only check exact if dtype is numeric
|
| 980 |
+
if isinstance(left_values, ExtensionArray) and isinstance(
|
| 981 |
+
right_values, ExtensionArray
|
| 982 |
+
):
|
| 983 |
+
assert_extension_array_equal(
|
| 984 |
+
left_values,
|
| 985 |
+
right_values,
|
| 986 |
+
check_dtype=check_dtype,
|
| 987 |
+
index_values=left.index,
|
| 988 |
+
obj=str(obj),
|
| 989 |
+
)
|
| 990 |
+
else:
|
| 991 |
+
# convert both to NumPy if not, check_dtype would raise earlier
|
| 992 |
+
lv, rv = left_values, right_values
|
| 993 |
+
if isinstance(left_values, ExtensionArray):
|
| 994 |
+
lv = left_values.to_numpy()
|
| 995 |
+
if isinstance(right_values, ExtensionArray):
|
| 996 |
+
rv = right_values.to_numpy()
|
| 997 |
+
assert_numpy_array_equal(
|
| 998 |
+
lv,
|
| 999 |
+
rv,
|
| 1000 |
+
check_dtype=check_dtype,
|
| 1001 |
+
obj=str(obj),
|
| 1002 |
+
index_values=left.index,
|
| 1003 |
+
)
|
| 1004 |
+
elif check_datetimelike_compat and (
|
| 1005 |
+
needs_i8_conversion(left.dtype) or needs_i8_conversion(right.dtype)
|
| 1006 |
+
):
|
| 1007 |
+
# we want to check only if we have compat dtypes
|
| 1008 |
+
# e.g. integer and M|m are NOT compat, but we can simply check
|
| 1009 |
+
# the values in that case
|
| 1010 |
+
|
| 1011 |
+
# datetimelike may have different objects (e.g. datetime.datetime
|
| 1012 |
+
# vs Timestamp) but will compare equal
|
| 1013 |
+
if not Index(left._values).equals(Index(right._values)):
|
| 1014 |
+
msg = (
|
| 1015 |
+
f"[datetimelike_compat=True] {left._values} "
|
| 1016 |
+
f"is not equal to {right._values}."
|
| 1017 |
+
)
|
| 1018 |
+
raise AssertionError(msg)
|
| 1019 |
+
elif isinstance(left.dtype, IntervalDtype) and isinstance(
|
| 1020 |
+
right.dtype, IntervalDtype
|
| 1021 |
+
):
|
| 1022 |
+
assert_interval_array_equal(left.array, right.array)
|
| 1023 |
+
elif isinstance(left.dtype, CategoricalDtype) or isinstance(
|
| 1024 |
+
right.dtype, CategoricalDtype
|
| 1025 |
+
):
|
| 1026 |
+
_testing.assert_almost_equal(
|
| 1027 |
+
left._values,
|
| 1028 |
+
right._values,
|
| 1029 |
+
rtol=rtol,
|
| 1030 |
+
atol=atol,
|
| 1031 |
+
check_dtype=bool(check_dtype),
|
| 1032 |
+
obj=str(obj),
|
| 1033 |
+
index_values=left.index,
|
| 1034 |
+
)
|
| 1035 |
+
elif isinstance(left.dtype, ExtensionDtype) and isinstance(
|
| 1036 |
+
right.dtype, ExtensionDtype
|
| 1037 |
+
):
|
| 1038 |
+
assert_extension_array_equal(
|
| 1039 |
+
left._values,
|
| 1040 |
+
right._values,
|
| 1041 |
+
rtol=rtol,
|
| 1042 |
+
atol=atol,
|
| 1043 |
+
check_dtype=check_dtype,
|
| 1044 |
+
index_values=left.index,
|
| 1045 |
+
obj=str(obj),
|
| 1046 |
+
)
|
| 1047 |
+
elif is_extension_array_dtype_and_needs_i8_conversion(
|
| 1048 |
+
left.dtype, right.dtype
|
| 1049 |
+
) or is_extension_array_dtype_and_needs_i8_conversion(right.dtype, left.dtype):
|
| 1050 |
+
assert_extension_array_equal(
|
| 1051 |
+
left._values,
|
| 1052 |
+
right._values,
|
| 1053 |
+
check_dtype=check_dtype,
|
| 1054 |
+
index_values=left.index,
|
| 1055 |
+
obj=str(obj),
|
| 1056 |
+
)
|
| 1057 |
+
elif needs_i8_conversion(left.dtype) and needs_i8_conversion(right.dtype):
|
| 1058 |
+
# DatetimeArray or TimedeltaArray
|
| 1059 |
+
assert_extension_array_equal(
|
| 1060 |
+
left._values,
|
| 1061 |
+
right._values,
|
| 1062 |
+
check_dtype=check_dtype,
|
| 1063 |
+
index_values=left.index,
|
| 1064 |
+
obj=str(obj),
|
| 1065 |
+
)
|
| 1066 |
+
else:
|
| 1067 |
+
_testing.assert_almost_equal(
|
| 1068 |
+
left._values,
|
| 1069 |
+
right._values,
|
| 1070 |
+
rtol=rtol,
|
| 1071 |
+
atol=atol,
|
| 1072 |
+
check_dtype=bool(check_dtype),
|
| 1073 |
+
obj=str(obj),
|
| 1074 |
+
index_values=left.index,
|
| 1075 |
+
)
|
| 1076 |
+
|
| 1077 |
+
# metadata comparison
|
| 1078 |
+
if check_names:
|
| 1079 |
+
assert_attr_equal("name", left, right, obj=obj)
|
| 1080 |
+
|
| 1081 |
+
if check_categorical:
|
| 1082 |
+
if isinstance(left.dtype, CategoricalDtype) or isinstance(
|
| 1083 |
+
right.dtype, CategoricalDtype
|
| 1084 |
+
):
|
| 1085 |
+
assert_categorical_equal(
|
| 1086 |
+
left._values,
|
| 1087 |
+
right._values,
|
| 1088 |
+
obj=f"{obj} category",
|
| 1089 |
+
check_category_order=check_category_order,
|
| 1090 |
+
)
|
| 1091 |
+
|
| 1092 |
+
|
| 1093 |
+
# This could be refactored to use the NDFrame.equals method
|
| 1094 |
+
def assert_frame_equal(
|
| 1095 |
+
left,
|
| 1096 |
+
right,
|
| 1097 |
+
check_dtype: bool | Literal["equiv"] = True,
|
| 1098 |
+
check_index_type: bool | Literal["equiv"] = "equiv",
|
| 1099 |
+
check_column_type: bool | Literal["equiv"] = "equiv",
|
| 1100 |
+
check_frame_type: bool = True,
|
| 1101 |
+
check_names: bool = True,
|
| 1102 |
+
by_blocks: bool = False,
|
| 1103 |
+
check_exact: bool | lib.NoDefault = lib.no_default,
|
| 1104 |
+
check_datetimelike_compat: bool = False,
|
| 1105 |
+
check_categorical: bool = True,
|
| 1106 |
+
check_like: bool = False,
|
| 1107 |
+
check_freq: bool = True,
|
| 1108 |
+
check_flags: bool = True,
|
| 1109 |
+
rtol: float | lib.NoDefault = lib.no_default,
|
| 1110 |
+
atol: float | lib.NoDefault = lib.no_default,
|
| 1111 |
+
obj: str = "DataFrame",
|
| 1112 |
+
) -> None:
|
| 1113 |
+
"""
|
| 1114 |
+
Check that left and right DataFrame are equal.
|
| 1115 |
+
|
| 1116 |
+
This function is intended to compare two DataFrames and output any
|
| 1117 |
+
differences. It is mostly intended for use in unit tests.
|
| 1118 |
+
Additional parameters allow varying the strictness of the
|
| 1119 |
+
equality checks performed.
|
| 1120 |
+
|
| 1121 |
+
Parameters
|
| 1122 |
+
----------
|
| 1123 |
+
left : DataFrame
|
| 1124 |
+
First DataFrame to compare.
|
| 1125 |
+
right : DataFrame
|
| 1126 |
+
Second DataFrame to compare.
|
| 1127 |
+
check_dtype : bool, default True
|
| 1128 |
+
Whether to check the DataFrame dtype is identical.
|
| 1129 |
+
check_index_type : bool or {'equiv'}, default 'equiv'
|
| 1130 |
+
Whether to check the Index class, dtype and inferred_type
|
| 1131 |
+
are identical.
|
| 1132 |
+
check_column_type : bool or {'equiv'}, default 'equiv'
|
| 1133 |
+
Whether to check the columns class, dtype and inferred_type
|
| 1134 |
+
are identical. Is passed as the ``exact`` argument of
|
| 1135 |
+
:func:`assert_index_equal`.
|
| 1136 |
+
check_frame_type : bool, default True
|
| 1137 |
+
Whether to check the DataFrame class is identical.
|
| 1138 |
+
check_names : bool, default True
|
| 1139 |
+
Whether to check that the `names` attribute for both the `index`
|
| 1140 |
+
and `column` attributes of the DataFrame is identical.
|
| 1141 |
+
by_blocks : bool, default False
|
| 1142 |
+
Specify how to compare internal data. If False, compare by columns.
|
| 1143 |
+
If True, compare by blocks.
|
| 1144 |
+
check_exact : bool, default False
|
| 1145 |
+
Whether to compare number exactly.
|
| 1146 |
+
|
| 1147 |
+
.. versionchanged:: 2.2.0
|
| 1148 |
+
|
| 1149 |
+
Defaults to True for integer dtypes if none of
|
| 1150 |
+
``check_exact``, ``rtol`` and ``atol`` are specified.
|
| 1151 |
+
check_datetimelike_compat : bool, default False
|
| 1152 |
+
Compare datetime-like which is comparable ignoring dtype.
|
| 1153 |
+
check_categorical : bool, default True
|
| 1154 |
+
Whether to compare internal Categorical exactly.
|
| 1155 |
+
check_like : bool, default False
|
| 1156 |
+
If True, ignore the order of index & columns.
|
| 1157 |
+
Note: index labels must match their respective rows
|
| 1158 |
+
(same as in columns) - same labels must be with the same data.
|
| 1159 |
+
check_freq : bool, default True
|
| 1160 |
+
Whether to check the `freq` attribute on a DatetimeIndex or TimedeltaIndex.
|
| 1161 |
+
check_flags : bool, default True
|
| 1162 |
+
Whether to check the `flags` attribute.
|
| 1163 |
+
rtol : float, default 1e-5
|
| 1164 |
+
Relative tolerance. Only used when check_exact is False.
|
| 1165 |
+
atol : float, default 1e-8
|
| 1166 |
+
Absolute tolerance. Only used when check_exact is False.
|
| 1167 |
+
obj : str, default 'DataFrame'
|
| 1168 |
+
Specify object name being compared, internally used to show appropriate
|
| 1169 |
+
assertion message.
|
| 1170 |
+
|
| 1171 |
+
See Also
|
| 1172 |
+
--------
|
| 1173 |
+
assert_series_equal : Equivalent method for asserting Series equality.
|
| 1174 |
+
DataFrame.equals : Check DataFrame equality.
|
| 1175 |
+
|
| 1176 |
+
Examples
|
| 1177 |
+
--------
|
| 1178 |
+
This example shows comparing two DataFrames that are equal
|
| 1179 |
+
but with columns of differing dtypes.
|
| 1180 |
+
|
| 1181 |
+
>>> from pandas.testing import assert_frame_equal
|
| 1182 |
+
>>> df1 = pd.DataFrame({'a': [1, 2], 'b': [3, 4]})
|
| 1183 |
+
>>> df2 = pd.DataFrame({'a': [1, 2], 'b': [3.0, 4.0]})
|
| 1184 |
+
|
| 1185 |
+
df1 equals itself.
|
| 1186 |
+
|
| 1187 |
+
>>> assert_frame_equal(df1, df1)
|
| 1188 |
+
|
| 1189 |
+
df1 differs from df2 as column 'b' is of a different type.
|
| 1190 |
+
|
| 1191 |
+
>>> assert_frame_equal(df1, df2)
|
| 1192 |
+
Traceback (most recent call last):
|
| 1193 |
+
...
|
| 1194 |
+
AssertionError: Attributes of DataFrame.iloc[:, 1] (column name="b") are different
|
| 1195 |
+
|
| 1196 |
+
Attribute "dtype" are different
|
| 1197 |
+
[left]: int64
|
| 1198 |
+
[right]: float64
|
| 1199 |
+
|
| 1200 |
+
Ignore differing dtypes in columns with check_dtype.
|
| 1201 |
+
|
| 1202 |
+
>>> assert_frame_equal(df1, df2, check_dtype=False)
|
| 1203 |
+
"""
|
| 1204 |
+
__tracebackhide__ = True
|
| 1205 |
+
_rtol = rtol if rtol is not lib.no_default else 1.0e-5
|
| 1206 |
+
_atol = atol if atol is not lib.no_default else 1.0e-8
|
| 1207 |
+
_check_exact = check_exact if check_exact is not lib.no_default else False
|
| 1208 |
+
|
| 1209 |
+
# instance validation
|
| 1210 |
+
_check_isinstance(left, right, DataFrame)
|
| 1211 |
+
|
| 1212 |
+
if check_frame_type:
|
| 1213 |
+
assert isinstance(left, type(right))
|
| 1214 |
+
# assert_class_equal(left, right, obj=obj)
|
| 1215 |
+
|
| 1216 |
+
# shape comparison
|
| 1217 |
+
if left.shape != right.shape:
|
| 1218 |
+
raise_assert_detail(
|
| 1219 |
+
obj, f"{obj} shape mismatch", f"{repr(left.shape)}", f"{repr(right.shape)}"
|
| 1220 |
+
)
|
| 1221 |
+
|
| 1222 |
+
if check_flags:
|
| 1223 |
+
assert left.flags == right.flags, f"{repr(left.flags)} != {repr(right.flags)}"
|
| 1224 |
+
|
| 1225 |
+
# index comparison
|
| 1226 |
+
assert_index_equal(
|
| 1227 |
+
left.index,
|
| 1228 |
+
right.index,
|
| 1229 |
+
exact=check_index_type,
|
| 1230 |
+
check_names=check_names,
|
| 1231 |
+
check_exact=_check_exact,
|
| 1232 |
+
check_categorical=check_categorical,
|
| 1233 |
+
check_order=not check_like,
|
| 1234 |
+
rtol=_rtol,
|
| 1235 |
+
atol=_atol,
|
| 1236 |
+
obj=f"{obj}.index",
|
| 1237 |
+
)
|
| 1238 |
+
|
| 1239 |
+
# column comparison
|
| 1240 |
+
assert_index_equal(
|
| 1241 |
+
left.columns,
|
| 1242 |
+
right.columns,
|
| 1243 |
+
exact=check_column_type,
|
| 1244 |
+
check_names=check_names,
|
| 1245 |
+
check_exact=_check_exact,
|
| 1246 |
+
check_categorical=check_categorical,
|
| 1247 |
+
check_order=not check_like,
|
| 1248 |
+
rtol=_rtol,
|
| 1249 |
+
atol=_atol,
|
| 1250 |
+
obj=f"{obj}.columns",
|
| 1251 |
+
)
|
| 1252 |
+
|
| 1253 |
+
if check_like:
|
| 1254 |
+
left = left.reindex_like(right)
|
| 1255 |
+
|
| 1256 |
+
# compare by blocks
|
| 1257 |
+
if by_blocks:
|
| 1258 |
+
rblocks = right._to_dict_of_blocks()
|
| 1259 |
+
lblocks = left._to_dict_of_blocks()
|
| 1260 |
+
for dtype in list(set(list(lblocks.keys()) + list(rblocks.keys()))):
|
| 1261 |
+
assert dtype in lblocks
|
| 1262 |
+
assert dtype in rblocks
|
| 1263 |
+
assert_frame_equal(
|
| 1264 |
+
lblocks[dtype], rblocks[dtype], check_dtype=check_dtype, obj=obj
|
| 1265 |
+
)
|
| 1266 |
+
|
| 1267 |
+
# compare by columns
|
| 1268 |
+
else:
|
| 1269 |
+
for i, col in enumerate(left.columns):
|
| 1270 |
+
# We have already checked that columns match, so we can do
|
| 1271 |
+
# fast location-based lookups
|
| 1272 |
+
lcol = left._ixs(i, axis=1)
|
| 1273 |
+
rcol = right._ixs(i, axis=1)
|
| 1274 |
+
|
| 1275 |
+
# GH #38183
|
| 1276 |
+
# use check_index=False, because we do not want to run
|
| 1277 |
+
# assert_index_equal for each column,
|
| 1278 |
+
# as we already checked it for the whole dataframe before.
|
| 1279 |
+
assert_series_equal(
|
| 1280 |
+
lcol,
|
| 1281 |
+
rcol,
|
| 1282 |
+
check_dtype=check_dtype,
|
| 1283 |
+
check_index_type=check_index_type,
|
| 1284 |
+
check_exact=check_exact,
|
| 1285 |
+
check_names=check_names,
|
| 1286 |
+
check_datetimelike_compat=check_datetimelike_compat,
|
| 1287 |
+
check_categorical=check_categorical,
|
| 1288 |
+
check_freq=check_freq,
|
| 1289 |
+
obj=f'{obj}.iloc[:, {i}] (column name="{col}")',
|
| 1290 |
+
rtol=rtol,
|
| 1291 |
+
atol=atol,
|
| 1292 |
+
check_index=False,
|
| 1293 |
+
check_flags=False,
|
| 1294 |
+
)
|
| 1295 |
+
|
| 1296 |
+
|
| 1297 |
+
def assert_equal(left, right, **kwargs) -> None:
|
| 1298 |
+
"""
|
| 1299 |
+
Wrapper for tm.assert_*_equal to dispatch to the appropriate test function.
|
| 1300 |
+
|
| 1301 |
+
Parameters
|
| 1302 |
+
----------
|
| 1303 |
+
left, right : Index, Series, DataFrame, ExtensionArray, or np.ndarray
|
| 1304 |
+
The two items to be compared.
|
| 1305 |
+
**kwargs
|
| 1306 |
+
All keyword arguments are passed through to the underlying assert method.
|
| 1307 |
+
"""
|
| 1308 |
+
__tracebackhide__ = True
|
| 1309 |
+
|
| 1310 |
+
if isinstance(left, Index):
|
| 1311 |
+
assert_index_equal(left, right, **kwargs)
|
| 1312 |
+
if isinstance(left, (DatetimeIndex, TimedeltaIndex)):
|
| 1313 |
+
assert left.freq == right.freq, (left.freq, right.freq)
|
| 1314 |
+
elif isinstance(left, Series):
|
| 1315 |
+
assert_series_equal(left, right, **kwargs)
|
| 1316 |
+
elif isinstance(left, DataFrame):
|
| 1317 |
+
assert_frame_equal(left, right, **kwargs)
|
| 1318 |
+
elif isinstance(left, IntervalArray):
|
| 1319 |
+
assert_interval_array_equal(left, right, **kwargs)
|
| 1320 |
+
elif isinstance(left, PeriodArray):
|
| 1321 |
+
assert_period_array_equal(left, right, **kwargs)
|
| 1322 |
+
elif isinstance(left, DatetimeArray):
|
| 1323 |
+
assert_datetime_array_equal(left, right, **kwargs)
|
| 1324 |
+
elif isinstance(left, TimedeltaArray):
|
| 1325 |
+
assert_timedelta_array_equal(left, right, **kwargs)
|
| 1326 |
+
elif isinstance(left, ExtensionArray):
|
| 1327 |
+
assert_extension_array_equal(left, right, **kwargs)
|
| 1328 |
+
elif isinstance(left, np.ndarray):
|
| 1329 |
+
assert_numpy_array_equal(left, right, **kwargs)
|
| 1330 |
+
elif isinstance(left, str):
|
| 1331 |
+
assert kwargs == {}
|
| 1332 |
+
assert left == right
|
| 1333 |
+
else:
|
| 1334 |
+
assert kwargs == {}
|
| 1335 |
+
assert_almost_equal(left, right)
|
| 1336 |
+
|
| 1337 |
+
|
| 1338 |
+
def assert_sp_array_equal(left, right) -> None:
|
| 1339 |
+
"""
|
| 1340 |
+
Check that the left and right SparseArray are equal.
|
| 1341 |
+
|
| 1342 |
+
Parameters
|
| 1343 |
+
----------
|
| 1344 |
+
left : SparseArray
|
| 1345 |
+
right : SparseArray
|
| 1346 |
+
"""
|
| 1347 |
+
_check_isinstance(left, right, pd.arrays.SparseArray)
|
| 1348 |
+
|
| 1349 |
+
assert_numpy_array_equal(left.sp_values, right.sp_values)
|
| 1350 |
+
|
| 1351 |
+
# SparseIndex comparison
|
| 1352 |
+
assert isinstance(left.sp_index, SparseIndex)
|
| 1353 |
+
assert isinstance(right.sp_index, SparseIndex)
|
| 1354 |
+
|
| 1355 |
+
left_index = left.sp_index
|
| 1356 |
+
right_index = right.sp_index
|
| 1357 |
+
|
| 1358 |
+
if not left_index.equals(right_index):
|
| 1359 |
+
raise_assert_detail(
|
| 1360 |
+
"SparseArray.index", "index are not equal", left_index, right_index
|
| 1361 |
+
)
|
| 1362 |
+
else:
|
| 1363 |
+
# Just ensure a
|
| 1364 |
+
pass
|
| 1365 |
+
|
| 1366 |
+
assert_attr_equal("fill_value", left, right)
|
| 1367 |
+
assert_attr_equal("dtype", left, right)
|
| 1368 |
+
assert_numpy_array_equal(left.to_dense(), right.to_dense())
|
| 1369 |
+
|
| 1370 |
+
|
| 1371 |
+
def assert_contains_all(iterable, dic) -> None:
|
| 1372 |
+
for k in iterable:
|
| 1373 |
+
assert k in dic, f"Did not contain item: {repr(k)}"
|
| 1374 |
+
|
| 1375 |
+
|
| 1376 |
+
def assert_copy(iter1, iter2, **eql_kwargs) -> None:
|
| 1377 |
+
"""
|
| 1378 |
+
iter1, iter2: iterables that produce elements
|
| 1379 |
+
comparable with assert_almost_equal
|
| 1380 |
+
|
| 1381 |
+
Checks that the elements are equal, but not
|
| 1382 |
+
the same object. (Does not check that items
|
| 1383 |
+
in sequences are also not the same object)
|
| 1384 |
+
"""
|
| 1385 |
+
for elem1, elem2 in zip(iter1, iter2):
|
| 1386 |
+
assert_almost_equal(elem1, elem2, **eql_kwargs)
|
| 1387 |
+
msg = (
|
| 1388 |
+
f"Expected object {repr(type(elem1))} and object {repr(type(elem2))} to be "
|
| 1389 |
+
"different objects, but they were the same object."
|
| 1390 |
+
)
|
| 1391 |
+
assert elem1 is not elem2, msg
|
| 1392 |
+
|
| 1393 |
+
|
| 1394 |
+
def is_extension_array_dtype_and_needs_i8_conversion(
|
| 1395 |
+
left_dtype: DtypeObj, right_dtype: DtypeObj
|
| 1396 |
+
) -> bool:
|
| 1397 |
+
"""
|
| 1398 |
+
Checks that we have the combination of an ExtensionArraydtype and
|
| 1399 |
+
a dtype that should be converted to int64
|
| 1400 |
+
|
| 1401 |
+
Returns
|
| 1402 |
+
-------
|
| 1403 |
+
bool
|
| 1404 |
+
|
| 1405 |
+
Related to issue #37609
|
| 1406 |
+
"""
|
| 1407 |
+
return isinstance(left_dtype, ExtensionDtype) and needs_i8_conversion(right_dtype)
|
| 1408 |
+
|
| 1409 |
+
|
| 1410 |
+
def assert_indexing_slices_equivalent(ser: Series, l_slc: slice, i_slc: slice) -> None:
|
| 1411 |
+
"""
|
| 1412 |
+
Check that ser.iloc[i_slc] matches ser.loc[l_slc] and, if applicable,
|
| 1413 |
+
ser[l_slc].
|
| 1414 |
+
"""
|
| 1415 |
+
expected = ser.iloc[i_slc]
|
| 1416 |
+
|
| 1417 |
+
assert_series_equal(ser.loc[l_slc], expected)
|
| 1418 |
+
|
| 1419 |
+
if not is_integer_dtype(ser.index):
|
| 1420 |
+
# For integer indices, .loc and plain getitem are position-based.
|
| 1421 |
+
assert_series_equal(ser[l_slc], expected)
|
| 1422 |
+
|
| 1423 |
+
|
| 1424 |
+
def assert_metadata_equivalent(
|
| 1425 |
+
left: DataFrame | Series, right: DataFrame | Series | None = None
|
| 1426 |
+
) -> None:
|
| 1427 |
+
"""
|
| 1428 |
+
Check that ._metadata attributes are equivalent.
|
| 1429 |
+
"""
|
| 1430 |
+
for attr in left._metadata:
|
| 1431 |
+
val = getattr(left, attr, None)
|
| 1432 |
+
if right is None:
|
| 1433 |
+
assert val is None
|
| 1434 |
+
else:
|
| 1435 |
+
assert val == getattr(right, attr, None)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/_testing/compat.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Helpers for sharing tests between DataFrame/Series
|
| 3 |
+
"""
|
| 4 |
+
from __future__ import annotations
|
| 5 |
+
|
| 6 |
+
from typing import TYPE_CHECKING
|
| 7 |
+
|
| 8 |
+
from pandas import DataFrame
|
| 9 |
+
|
| 10 |
+
if TYPE_CHECKING:
|
| 11 |
+
from pandas._typing import DtypeObj
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def get_dtype(obj) -> DtypeObj:
|
| 15 |
+
if isinstance(obj, DataFrame):
|
| 16 |
+
# Note: we are assuming only one column
|
| 17 |
+
return obj.dtypes.iat[0]
|
| 18 |
+
else:
|
| 19 |
+
return obj.dtype
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def get_obj(df: DataFrame, klass):
|
| 23 |
+
"""
|
| 24 |
+
For sharing tests using frame_or_series, either return the DataFrame
|
| 25 |
+
unchanged or return it's first column as a Series.
|
| 26 |
+
"""
|
| 27 |
+
if klass is DataFrame:
|
| 28 |
+
return df
|
| 29 |
+
return df._ixs(0, axis=1)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/_testing/contexts.py
ADDED
|
@@ -0,0 +1,257 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from contextlib import contextmanager
|
| 4 |
+
import os
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
import tempfile
|
| 7 |
+
from typing import (
|
| 8 |
+
IO,
|
| 9 |
+
TYPE_CHECKING,
|
| 10 |
+
Any,
|
| 11 |
+
)
|
| 12 |
+
import uuid
|
| 13 |
+
|
| 14 |
+
from pandas._config import using_copy_on_write
|
| 15 |
+
|
| 16 |
+
from pandas.compat import PYPY
|
| 17 |
+
from pandas.errors import ChainedAssignmentError
|
| 18 |
+
|
| 19 |
+
from pandas import set_option
|
| 20 |
+
|
| 21 |
+
from pandas.io.common import get_handle
|
| 22 |
+
|
| 23 |
+
if TYPE_CHECKING:
|
| 24 |
+
from collections.abc import Generator
|
| 25 |
+
|
| 26 |
+
from pandas._typing import (
|
| 27 |
+
BaseBuffer,
|
| 28 |
+
CompressionOptions,
|
| 29 |
+
FilePath,
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
@contextmanager
|
| 34 |
+
def decompress_file(
|
| 35 |
+
path: FilePath | BaseBuffer, compression: CompressionOptions
|
| 36 |
+
) -> Generator[IO[bytes], None, None]:
|
| 37 |
+
"""
|
| 38 |
+
Open a compressed file and return a file object.
|
| 39 |
+
|
| 40 |
+
Parameters
|
| 41 |
+
----------
|
| 42 |
+
path : str
|
| 43 |
+
The path where the file is read from.
|
| 44 |
+
|
| 45 |
+
compression : {'gzip', 'bz2', 'zip', 'xz', 'zstd', None}
|
| 46 |
+
Name of the decompression to use
|
| 47 |
+
|
| 48 |
+
Returns
|
| 49 |
+
-------
|
| 50 |
+
file object
|
| 51 |
+
"""
|
| 52 |
+
with get_handle(path, "rb", compression=compression, is_text=False) as handle:
|
| 53 |
+
yield handle.handle
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
@contextmanager
|
| 57 |
+
def set_timezone(tz: str) -> Generator[None, None, None]:
|
| 58 |
+
"""
|
| 59 |
+
Context manager for temporarily setting a timezone.
|
| 60 |
+
|
| 61 |
+
Parameters
|
| 62 |
+
----------
|
| 63 |
+
tz : str
|
| 64 |
+
A string representing a valid timezone.
|
| 65 |
+
|
| 66 |
+
Examples
|
| 67 |
+
--------
|
| 68 |
+
>>> from datetime import datetime
|
| 69 |
+
>>> from dateutil.tz import tzlocal
|
| 70 |
+
>>> tzlocal().tzname(datetime(2021, 1, 1)) # doctest: +SKIP
|
| 71 |
+
'IST'
|
| 72 |
+
|
| 73 |
+
>>> with set_timezone('US/Eastern'):
|
| 74 |
+
... tzlocal().tzname(datetime(2021, 1, 1))
|
| 75 |
+
...
|
| 76 |
+
'EST'
|
| 77 |
+
"""
|
| 78 |
+
import time
|
| 79 |
+
|
| 80 |
+
def setTZ(tz) -> None:
|
| 81 |
+
if tz is None:
|
| 82 |
+
try:
|
| 83 |
+
del os.environ["TZ"]
|
| 84 |
+
except KeyError:
|
| 85 |
+
pass
|
| 86 |
+
else:
|
| 87 |
+
os.environ["TZ"] = tz
|
| 88 |
+
time.tzset()
|
| 89 |
+
|
| 90 |
+
orig_tz = os.environ.get("TZ")
|
| 91 |
+
setTZ(tz)
|
| 92 |
+
try:
|
| 93 |
+
yield
|
| 94 |
+
finally:
|
| 95 |
+
setTZ(orig_tz)
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
@contextmanager
|
| 99 |
+
def ensure_clean(
|
| 100 |
+
filename=None, return_filelike: bool = False, **kwargs: Any
|
| 101 |
+
) -> Generator[Any, None, None]:
|
| 102 |
+
"""
|
| 103 |
+
Gets a temporary path and agrees to remove on close.
|
| 104 |
+
|
| 105 |
+
This implementation does not use tempfile.mkstemp to avoid having a file handle.
|
| 106 |
+
If the code using the returned path wants to delete the file itself, windows
|
| 107 |
+
requires that no program has a file handle to it.
|
| 108 |
+
|
| 109 |
+
Parameters
|
| 110 |
+
----------
|
| 111 |
+
filename : str (optional)
|
| 112 |
+
suffix of the created file.
|
| 113 |
+
return_filelike : bool (default False)
|
| 114 |
+
if True, returns a file-like which is *always* cleaned. Necessary for
|
| 115 |
+
savefig and other functions which want to append extensions.
|
| 116 |
+
**kwargs
|
| 117 |
+
Additional keywords are passed to open().
|
| 118 |
+
|
| 119 |
+
"""
|
| 120 |
+
folder = Path(tempfile.gettempdir())
|
| 121 |
+
|
| 122 |
+
if filename is None:
|
| 123 |
+
filename = ""
|
| 124 |
+
filename = str(uuid.uuid4()) + filename
|
| 125 |
+
path = folder / filename
|
| 126 |
+
|
| 127 |
+
path.touch()
|
| 128 |
+
|
| 129 |
+
handle_or_str: str | IO = str(path)
|
| 130 |
+
encoding = kwargs.pop("encoding", None)
|
| 131 |
+
if return_filelike:
|
| 132 |
+
kwargs.setdefault("mode", "w+b")
|
| 133 |
+
if encoding is None and "b" not in kwargs["mode"]:
|
| 134 |
+
encoding = "utf-8"
|
| 135 |
+
handle_or_str = open(path, encoding=encoding, **kwargs)
|
| 136 |
+
|
| 137 |
+
try:
|
| 138 |
+
yield handle_or_str
|
| 139 |
+
finally:
|
| 140 |
+
if not isinstance(handle_or_str, str):
|
| 141 |
+
handle_or_str.close()
|
| 142 |
+
if path.is_file():
|
| 143 |
+
path.unlink()
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
@contextmanager
|
| 147 |
+
def with_csv_dialect(name: str, **kwargs) -> Generator[None, None, None]:
|
| 148 |
+
"""
|
| 149 |
+
Context manager to temporarily register a CSV dialect for parsing CSV.
|
| 150 |
+
|
| 151 |
+
Parameters
|
| 152 |
+
----------
|
| 153 |
+
name : str
|
| 154 |
+
The name of the dialect.
|
| 155 |
+
kwargs : mapping
|
| 156 |
+
The parameters for the dialect.
|
| 157 |
+
|
| 158 |
+
Raises
|
| 159 |
+
------
|
| 160 |
+
ValueError : the name of the dialect conflicts with a builtin one.
|
| 161 |
+
|
| 162 |
+
See Also
|
| 163 |
+
--------
|
| 164 |
+
csv : Python's CSV library.
|
| 165 |
+
"""
|
| 166 |
+
import csv
|
| 167 |
+
|
| 168 |
+
_BUILTIN_DIALECTS = {"excel", "excel-tab", "unix"}
|
| 169 |
+
|
| 170 |
+
if name in _BUILTIN_DIALECTS:
|
| 171 |
+
raise ValueError("Cannot override builtin dialect.")
|
| 172 |
+
|
| 173 |
+
csv.register_dialect(name, **kwargs)
|
| 174 |
+
try:
|
| 175 |
+
yield
|
| 176 |
+
finally:
|
| 177 |
+
csv.unregister_dialect(name)
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
@contextmanager
|
| 181 |
+
def use_numexpr(use, min_elements=None) -> Generator[None, None, None]:
|
| 182 |
+
from pandas.core.computation import expressions as expr
|
| 183 |
+
|
| 184 |
+
if min_elements is None:
|
| 185 |
+
min_elements = expr._MIN_ELEMENTS
|
| 186 |
+
|
| 187 |
+
olduse = expr.USE_NUMEXPR
|
| 188 |
+
oldmin = expr._MIN_ELEMENTS
|
| 189 |
+
set_option("compute.use_numexpr", use)
|
| 190 |
+
expr._MIN_ELEMENTS = min_elements
|
| 191 |
+
try:
|
| 192 |
+
yield
|
| 193 |
+
finally:
|
| 194 |
+
expr._MIN_ELEMENTS = oldmin
|
| 195 |
+
set_option("compute.use_numexpr", olduse)
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
def raises_chained_assignment_error(warn=True, extra_warnings=(), extra_match=()):
|
| 199 |
+
from pandas._testing import assert_produces_warning
|
| 200 |
+
|
| 201 |
+
if not warn:
|
| 202 |
+
from contextlib import nullcontext
|
| 203 |
+
|
| 204 |
+
return nullcontext()
|
| 205 |
+
|
| 206 |
+
if PYPY and not extra_warnings:
|
| 207 |
+
from contextlib import nullcontext
|
| 208 |
+
|
| 209 |
+
return nullcontext()
|
| 210 |
+
elif PYPY and extra_warnings:
|
| 211 |
+
return assert_produces_warning(
|
| 212 |
+
extra_warnings,
|
| 213 |
+
match="|".join(extra_match),
|
| 214 |
+
)
|
| 215 |
+
else:
|
| 216 |
+
if using_copy_on_write():
|
| 217 |
+
warning = ChainedAssignmentError
|
| 218 |
+
match = (
|
| 219 |
+
"A value is trying to be set on a copy of a DataFrame or Series "
|
| 220 |
+
"through chained assignment"
|
| 221 |
+
)
|
| 222 |
+
else:
|
| 223 |
+
warning = FutureWarning # type: ignore[assignment]
|
| 224 |
+
# TODO update match
|
| 225 |
+
match = "ChainedAssignmentError"
|
| 226 |
+
if extra_warnings:
|
| 227 |
+
warning = (warning, *extra_warnings) # type: ignore[assignment]
|
| 228 |
+
return assert_produces_warning(
|
| 229 |
+
warning,
|
| 230 |
+
match="|".join((match, *extra_match)),
|
| 231 |
+
)
|
| 232 |
+
|
| 233 |
+
|
| 234 |
+
def assert_cow_warning(warn=True, match=None, **kwargs):
|
| 235 |
+
"""
|
| 236 |
+
Assert that a warning is raised in the CoW warning mode.
|
| 237 |
+
|
| 238 |
+
Parameters
|
| 239 |
+
----------
|
| 240 |
+
warn : bool, default True
|
| 241 |
+
By default, check that a warning is raised. Can be turned off by passing False.
|
| 242 |
+
match : str
|
| 243 |
+
The warning message to match against, if different from the default.
|
| 244 |
+
kwargs
|
| 245 |
+
Passed through to assert_produces_warning
|
| 246 |
+
"""
|
| 247 |
+
from pandas._testing import assert_produces_warning
|
| 248 |
+
|
| 249 |
+
if not warn:
|
| 250 |
+
from contextlib import nullcontext
|
| 251 |
+
|
| 252 |
+
return nullcontext()
|
| 253 |
+
|
| 254 |
+
if not match:
|
| 255 |
+
match = "Setting a value on a view"
|
| 256 |
+
|
| 257 |
+
return assert_produces_warning(FutureWarning, match=match, **kwargs)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/arrays/__init__.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
All of pandas' ExtensionArrays.
|
| 3 |
+
|
| 4 |
+
See :ref:`extending.extension-types` for more.
|
| 5 |
+
"""
|
| 6 |
+
from pandas.core.arrays import (
|
| 7 |
+
ArrowExtensionArray,
|
| 8 |
+
ArrowStringArray,
|
| 9 |
+
BooleanArray,
|
| 10 |
+
Categorical,
|
| 11 |
+
DatetimeArray,
|
| 12 |
+
FloatingArray,
|
| 13 |
+
IntegerArray,
|
| 14 |
+
IntervalArray,
|
| 15 |
+
NumpyExtensionArray,
|
| 16 |
+
PeriodArray,
|
| 17 |
+
SparseArray,
|
| 18 |
+
StringArray,
|
| 19 |
+
TimedeltaArray,
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
__all__ = [
|
| 23 |
+
"ArrowExtensionArray",
|
| 24 |
+
"ArrowStringArray",
|
| 25 |
+
"BooleanArray",
|
| 26 |
+
"Categorical",
|
| 27 |
+
"DatetimeArray",
|
| 28 |
+
"FloatingArray",
|
| 29 |
+
"IntegerArray",
|
| 30 |
+
"IntervalArray",
|
| 31 |
+
"NumpyExtensionArray",
|
| 32 |
+
"PeriodArray",
|
| 33 |
+
"SparseArray",
|
| 34 |
+
"StringArray",
|
| 35 |
+
"TimedeltaArray",
|
| 36 |
+
]
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def __getattr__(name: str) -> type[NumpyExtensionArray]:
|
| 40 |
+
if name == "PandasArray":
|
| 41 |
+
# GH#53694
|
| 42 |
+
import warnings
|
| 43 |
+
|
| 44 |
+
from pandas.util._exceptions import find_stack_level
|
| 45 |
+
|
| 46 |
+
warnings.warn(
|
| 47 |
+
"PandasArray has been renamed NumpyExtensionArray. Use that "
|
| 48 |
+
"instead. This alias will be removed in a future version.",
|
| 49 |
+
FutureWarning,
|
| 50 |
+
stacklevel=find_stack_level(),
|
| 51 |
+
)
|
| 52 |
+
return NumpyExtensionArray
|
| 53 |
+
raise AttributeError(f"module 'pandas.arrays' has no attribute '{name}'")
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/errors/__init__.py
ADDED
|
@@ -0,0 +1,850 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Expose public exceptions & warnings
|
| 3 |
+
"""
|
| 4 |
+
from __future__ import annotations
|
| 5 |
+
|
| 6 |
+
import ctypes
|
| 7 |
+
|
| 8 |
+
from pandas._config.config import OptionError
|
| 9 |
+
|
| 10 |
+
from pandas._libs.tslibs import (
|
| 11 |
+
OutOfBoundsDatetime,
|
| 12 |
+
OutOfBoundsTimedelta,
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
from pandas.util.version import InvalidVersion
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class IntCastingNaNError(ValueError):
|
| 19 |
+
"""
|
| 20 |
+
Exception raised when converting (``astype``) an array with NaN to an integer type.
|
| 21 |
+
|
| 22 |
+
Examples
|
| 23 |
+
--------
|
| 24 |
+
>>> pd.DataFrame(np.array([[1, np.nan], [2, 3]]), dtype="i8")
|
| 25 |
+
Traceback (most recent call last):
|
| 26 |
+
IntCastingNaNError: Cannot convert non-finite values (NA or inf) to integer
|
| 27 |
+
"""
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
class NullFrequencyError(ValueError):
|
| 31 |
+
"""
|
| 32 |
+
Exception raised when a ``freq`` cannot be null.
|
| 33 |
+
|
| 34 |
+
Particularly ``DatetimeIndex.shift``, ``TimedeltaIndex.shift``,
|
| 35 |
+
``PeriodIndex.shift``.
|
| 36 |
+
|
| 37 |
+
Examples
|
| 38 |
+
--------
|
| 39 |
+
>>> df = pd.DatetimeIndex(["2011-01-01 10:00", "2011-01-01"], freq=None)
|
| 40 |
+
>>> df.shift(2)
|
| 41 |
+
Traceback (most recent call last):
|
| 42 |
+
NullFrequencyError: Cannot shift with no freq
|
| 43 |
+
"""
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
class PerformanceWarning(Warning):
|
| 47 |
+
"""
|
| 48 |
+
Warning raised when there is a possible performance impact.
|
| 49 |
+
|
| 50 |
+
Examples
|
| 51 |
+
--------
|
| 52 |
+
>>> df = pd.DataFrame({"jim": [0, 0, 1, 1],
|
| 53 |
+
... "joe": ["x", "x", "z", "y"],
|
| 54 |
+
... "jolie": [1, 2, 3, 4]})
|
| 55 |
+
>>> df = df.set_index(["jim", "joe"])
|
| 56 |
+
>>> df
|
| 57 |
+
jolie
|
| 58 |
+
jim joe
|
| 59 |
+
0 x 1
|
| 60 |
+
x 2
|
| 61 |
+
1 z 3
|
| 62 |
+
y 4
|
| 63 |
+
>>> df.loc[(1, 'z')] # doctest: +SKIP
|
| 64 |
+
# PerformanceWarning: indexing past lexsort depth may impact performance.
|
| 65 |
+
df.loc[(1, 'z')]
|
| 66 |
+
jolie
|
| 67 |
+
jim joe
|
| 68 |
+
1 z 3
|
| 69 |
+
"""
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
class UnsupportedFunctionCall(ValueError):
|
| 73 |
+
"""
|
| 74 |
+
Exception raised when attempting to call a unsupported numpy function.
|
| 75 |
+
|
| 76 |
+
For example, ``np.cumsum(groupby_object)``.
|
| 77 |
+
|
| 78 |
+
Examples
|
| 79 |
+
--------
|
| 80 |
+
>>> df = pd.DataFrame({"A": [0, 0, 1, 1],
|
| 81 |
+
... "B": ["x", "x", "z", "y"],
|
| 82 |
+
... "C": [1, 2, 3, 4]}
|
| 83 |
+
... )
|
| 84 |
+
>>> np.cumsum(df.groupby(["A"]))
|
| 85 |
+
Traceback (most recent call last):
|
| 86 |
+
UnsupportedFunctionCall: numpy operations are not valid with groupby.
|
| 87 |
+
Use .groupby(...).cumsum() instead
|
| 88 |
+
"""
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
class UnsortedIndexError(KeyError):
|
| 92 |
+
"""
|
| 93 |
+
Error raised when slicing a MultiIndex which has not been lexsorted.
|
| 94 |
+
|
| 95 |
+
Subclass of `KeyError`.
|
| 96 |
+
|
| 97 |
+
Examples
|
| 98 |
+
--------
|
| 99 |
+
>>> df = pd.DataFrame({"cat": [0, 0, 1, 1],
|
| 100 |
+
... "color": ["white", "white", "brown", "black"],
|
| 101 |
+
... "lives": [4, 4, 3, 7]},
|
| 102 |
+
... )
|
| 103 |
+
>>> df = df.set_index(["cat", "color"])
|
| 104 |
+
>>> df
|
| 105 |
+
lives
|
| 106 |
+
cat color
|
| 107 |
+
0 white 4
|
| 108 |
+
white 4
|
| 109 |
+
1 brown 3
|
| 110 |
+
black 7
|
| 111 |
+
>>> df.loc[(0, "black"):(1, "white")]
|
| 112 |
+
Traceback (most recent call last):
|
| 113 |
+
UnsortedIndexError: 'Key length (2) was greater
|
| 114 |
+
than MultiIndex lexsort depth (1)'
|
| 115 |
+
"""
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
class ParserError(ValueError):
|
| 119 |
+
"""
|
| 120 |
+
Exception that is raised by an error encountered in parsing file contents.
|
| 121 |
+
|
| 122 |
+
This is a generic error raised for errors encountered when functions like
|
| 123 |
+
`read_csv` or `read_html` are parsing contents of a file.
|
| 124 |
+
|
| 125 |
+
See Also
|
| 126 |
+
--------
|
| 127 |
+
read_csv : Read CSV (comma-separated) file into a DataFrame.
|
| 128 |
+
read_html : Read HTML table into a DataFrame.
|
| 129 |
+
|
| 130 |
+
Examples
|
| 131 |
+
--------
|
| 132 |
+
>>> data = '''a,b,c
|
| 133 |
+
... cat,foo,bar
|
| 134 |
+
... dog,foo,"baz'''
|
| 135 |
+
>>> from io import StringIO
|
| 136 |
+
>>> pd.read_csv(StringIO(data), skipfooter=1, engine='python')
|
| 137 |
+
Traceback (most recent call last):
|
| 138 |
+
ParserError: ',' expected after '"'. Error could possibly be due
|
| 139 |
+
to parsing errors in the skipped footer rows
|
| 140 |
+
"""
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
class DtypeWarning(Warning):
|
| 144 |
+
"""
|
| 145 |
+
Warning raised when reading different dtypes in a column from a file.
|
| 146 |
+
|
| 147 |
+
Raised for a dtype incompatibility. This can happen whenever `read_csv`
|
| 148 |
+
or `read_table` encounter non-uniform dtypes in a column(s) of a given
|
| 149 |
+
CSV file.
|
| 150 |
+
|
| 151 |
+
See Also
|
| 152 |
+
--------
|
| 153 |
+
read_csv : Read CSV (comma-separated) file into a DataFrame.
|
| 154 |
+
read_table : Read general delimited file into a DataFrame.
|
| 155 |
+
|
| 156 |
+
Notes
|
| 157 |
+
-----
|
| 158 |
+
This warning is issued when dealing with larger files because the dtype
|
| 159 |
+
checking happens per chunk read.
|
| 160 |
+
|
| 161 |
+
Despite the warning, the CSV file is read with mixed types in a single
|
| 162 |
+
column which will be an object type. See the examples below to better
|
| 163 |
+
understand this issue.
|
| 164 |
+
|
| 165 |
+
Examples
|
| 166 |
+
--------
|
| 167 |
+
This example creates and reads a large CSV file with a column that contains
|
| 168 |
+
`int` and `str`.
|
| 169 |
+
|
| 170 |
+
>>> df = pd.DataFrame({'a': (['1'] * 100000 + ['X'] * 100000 +
|
| 171 |
+
... ['1'] * 100000),
|
| 172 |
+
... 'b': ['b'] * 300000}) # doctest: +SKIP
|
| 173 |
+
>>> df.to_csv('test.csv', index=False) # doctest: +SKIP
|
| 174 |
+
>>> df2 = pd.read_csv('test.csv') # doctest: +SKIP
|
| 175 |
+
... # DtypeWarning: Columns (0) have mixed types
|
| 176 |
+
|
| 177 |
+
Important to notice that ``df2`` will contain both `str` and `int` for the
|
| 178 |
+
same input, '1'.
|
| 179 |
+
|
| 180 |
+
>>> df2.iloc[262140, 0] # doctest: +SKIP
|
| 181 |
+
'1'
|
| 182 |
+
>>> type(df2.iloc[262140, 0]) # doctest: +SKIP
|
| 183 |
+
<class 'str'>
|
| 184 |
+
>>> df2.iloc[262150, 0] # doctest: +SKIP
|
| 185 |
+
1
|
| 186 |
+
>>> type(df2.iloc[262150, 0]) # doctest: +SKIP
|
| 187 |
+
<class 'int'>
|
| 188 |
+
|
| 189 |
+
One way to solve this issue is using the `dtype` parameter in the
|
| 190 |
+
`read_csv` and `read_table` functions to explicit the conversion:
|
| 191 |
+
|
| 192 |
+
>>> df2 = pd.read_csv('test.csv', sep=',', dtype={'a': str}) # doctest: +SKIP
|
| 193 |
+
|
| 194 |
+
No warning was issued.
|
| 195 |
+
"""
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
class EmptyDataError(ValueError):
|
| 199 |
+
"""
|
| 200 |
+
Exception raised in ``pd.read_csv`` when empty data or header is encountered.
|
| 201 |
+
|
| 202 |
+
Examples
|
| 203 |
+
--------
|
| 204 |
+
>>> from io import StringIO
|
| 205 |
+
>>> empty = StringIO()
|
| 206 |
+
>>> pd.read_csv(empty)
|
| 207 |
+
Traceback (most recent call last):
|
| 208 |
+
EmptyDataError: No columns to parse from file
|
| 209 |
+
"""
|
| 210 |
+
|
| 211 |
+
|
| 212 |
+
class ParserWarning(Warning):
|
| 213 |
+
"""
|
| 214 |
+
Warning raised when reading a file that doesn't use the default 'c' parser.
|
| 215 |
+
|
| 216 |
+
Raised by `pd.read_csv` and `pd.read_table` when it is necessary to change
|
| 217 |
+
parsers, generally from the default 'c' parser to 'python'.
|
| 218 |
+
|
| 219 |
+
It happens due to a lack of support or functionality for parsing a
|
| 220 |
+
particular attribute of a CSV file with the requested engine.
|
| 221 |
+
|
| 222 |
+
Currently, 'c' unsupported options include the following parameters:
|
| 223 |
+
|
| 224 |
+
1. `sep` other than a single character (e.g. regex separators)
|
| 225 |
+
2. `skipfooter` higher than 0
|
| 226 |
+
3. `sep=None` with `delim_whitespace=False`
|
| 227 |
+
|
| 228 |
+
The warning can be avoided by adding `engine='python'` as a parameter in
|
| 229 |
+
`pd.read_csv` and `pd.read_table` methods.
|
| 230 |
+
|
| 231 |
+
See Also
|
| 232 |
+
--------
|
| 233 |
+
pd.read_csv : Read CSV (comma-separated) file into DataFrame.
|
| 234 |
+
pd.read_table : Read general delimited file into DataFrame.
|
| 235 |
+
|
| 236 |
+
Examples
|
| 237 |
+
--------
|
| 238 |
+
Using a `sep` in `pd.read_csv` other than a single character:
|
| 239 |
+
|
| 240 |
+
>>> import io
|
| 241 |
+
>>> csv = '''a;b;c
|
| 242 |
+
... 1;1,8
|
| 243 |
+
... 1;2,1'''
|
| 244 |
+
>>> df = pd.read_csv(io.StringIO(csv), sep='[;,]') # doctest: +SKIP
|
| 245 |
+
... # ParserWarning: Falling back to the 'python' engine...
|
| 246 |
+
|
| 247 |
+
Adding `engine='python'` to `pd.read_csv` removes the Warning:
|
| 248 |
+
|
| 249 |
+
>>> df = pd.read_csv(io.StringIO(csv), sep='[;,]', engine='python')
|
| 250 |
+
"""
|
| 251 |
+
|
| 252 |
+
|
| 253 |
+
class MergeError(ValueError):
|
| 254 |
+
"""
|
| 255 |
+
Exception raised when merging data.
|
| 256 |
+
|
| 257 |
+
Subclass of ``ValueError``.
|
| 258 |
+
|
| 259 |
+
Examples
|
| 260 |
+
--------
|
| 261 |
+
>>> left = pd.DataFrame({"a": ["a", "b", "b", "d"],
|
| 262 |
+
... "b": ["cat", "dog", "weasel", "horse"]},
|
| 263 |
+
... index=range(4))
|
| 264 |
+
>>> right = pd.DataFrame({"a": ["a", "b", "c", "d"],
|
| 265 |
+
... "c": ["meow", "bark", "chirp", "nay"]},
|
| 266 |
+
... index=range(4)).set_index("a")
|
| 267 |
+
>>> left.join(right, on="a", validate="one_to_one",)
|
| 268 |
+
Traceback (most recent call last):
|
| 269 |
+
MergeError: Merge keys are not unique in left dataset; not a one-to-one merge
|
| 270 |
+
"""
|
| 271 |
+
|
| 272 |
+
|
| 273 |
+
class AbstractMethodError(NotImplementedError):
|
| 274 |
+
"""
|
| 275 |
+
Raise this error instead of NotImplementedError for abstract methods.
|
| 276 |
+
|
| 277 |
+
Examples
|
| 278 |
+
--------
|
| 279 |
+
>>> class Foo:
|
| 280 |
+
... @classmethod
|
| 281 |
+
... def classmethod(cls):
|
| 282 |
+
... raise pd.errors.AbstractMethodError(cls, methodtype="classmethod")
|
| 283 |
+
... def method(self):
|
| 284 |
+
... raise pd.errors.AbstractMethodError(self)
|
| 285 |
+
>>> test = Foo.classmethod()
|
| 286 |
+
Traceback (most recent call last):
|
| 287 |
+
AbstractMethodError: This classmethod must be defined in the concrete class Foo
|
| 288 |
+
|
| 289 |
+
>>> test2 = Foo().method()
|
| 290 |
+
Traceback (most recent call last):
|
| 291 |
+
AbstractMethodError: This classmethod must be defined in the concrete class Foo
|
| 292 |
+
"""
|
| 293 |
+
|
| 294 |
+
def __init__(self, class_instance, methodtype: str = "method") -> None:
|
| 295 |
+
types = {"method", "classmethod", "staticmethod", "property"}
|
| 296 |
+
if methodtype not in types:
|
| 297 |
+
raise ValueError(
|
| 298 |
+
f"methodtype must be one of {methodtype}, got {types} instead."
|
| 299 |
+
)
|
| 300 |
+
self.methodtype = methodtype
|
| 301 |
+
self.class_instance = class_instance
|
| 302 |
+
|
| 303 |
+
def __str__(self) -> str:
|
| 304 |
+
if self.methodtype == "classmethod":
|
| 305 |
+
name = self.class_instance.__name__
|
| 306 |
+
else:
|
| 307 |
+
name = type(self.class_instance).__name__
|
| 308 |
+
return f"This {self.methodtype} must be defined in the concrete class {name}"
|
| 309 |
+
|
| 310 |
+
|
| 311 |
+
class NumbaUtilError(Exception):
|
| 312 |
+
"""
|
| 313 |
+
Error raised for unsupported Numba engine routines.
|
| 314 |
+
|
| 315 |
+
Examples
|
| 316 |
+
--------
|
| 317 |
+
>>> df = pd.DataFrame({"key": ["a", "a", "b", "b"], "data": [1, 2, 3, 4]},
|
| 318 |
+
... columns=["key", "data"])
|
| 319 |
+
>>> def incorrect_function(x):
|
| 320 |
+
... return sum(x) * 2.7
|
| 321 |
+
>>> df.groupby("key").agg(incorrect_function, engine="numba")
|
| 322 |
+
Traceback (most recent call last):
|
| 323 |
+
NumbaUtilError: The first 2 arguments to incorrect_function
|
| 324 |
+
must be ['values', 'index']
|
| 325 |
+
"""
|
| 326 |
+
|
| 327 |
+
|
| 328 |
+
class DuplicateLabelError(ValueError):
|
| 329 |
+
"""
|
| 330 |
+
Error raised when an operation would introduce duplicate labels.
|
| 331 |
+
|
| 332 |
+
Examples
|
| 333 |
+
--------
|
| 334 |
+
>>> s = pd.Series([0, 1, 2], index=['a', 'b', 'c']).set_flags(
|
| 335 |
+
... allows_duplicate_labels=False
|
| 336 |
+
... )
|
| 337 |
+
>>> s.reindex(['a', 'a', 'b'])
|
| 338 |
+
Traceback (most recent call last):
|
| 339 |
+
...
|
| 340 |
+
DuplicateLabelError: Index has duplicates.
|
| 341 |
+
positions
|
| 342 |
+
label
|
| 343 |
+
a [0, 1]
|
| 344 |
+
"""
|
| 345 |
+
|
| 346 |
+
|
| 347 |
+
class InvalidIndexError(Exception):
|
| 348 |
+
"""
|
| 349 |
+
Exception raised when attempting to use an invalid index key.
|
| 350 |
+
|
| 351 |
+
Examples
|
| 352 |
+
--------
|
| 353 |
+
>>> idx = pd.MultiIndex.from_product([["x", "y"], [0, 1]])
|
| 354 |
+
>>> df = pd.DataFrame([[1, 1, 2, 2],
|
| 355 |
+
... [3, 3, 4, 4]], columns=idx)
|
| 356 |
+
>>> df
|
| 357 |
+
x y
|
| 358 |
+
0 1 0 1
|
| 359 |
+
0 1 1 2 2
|
| 360 |
+
1 3 3 4 4
|
| 361 |
+
>>> df[:, 0]
|
| 362 |
+
Traceback (most recent call last):
|
| 363 |
+
InvalidIndexError: (slice(None, None, None), 0)
|
| 364 |
+
"""
|
| 365 |
+
|
| 366 |
+
|
| 367 |
+
class DataError(Exception):
|
| 368 |
+
"""
|
| 369 |
+
Exceptionn raised when performing an operation on non-numerical data.
|
| 370 |
+
|
| 371 |
+
For example, calling ``ohlc`` on a non-numerical column or a function
|
| 372 |
+
on a rolling window.
|
| 373 |
+
|
| 374 |
+
Examples
|
| 375 |
+
--------
|
| 376 |
+
>>> ser = pd.Series(['a', 'b', 'c'])
|
| 377 |
+
>>> ser.rolling(2).sum()
|
| 378 |
+
Traceback (most recent call last):
|
| 379 |
+
DataError: No numeric types to aggregate
|
| 380 |
+
"""
|
| 381 |
+
|
| 382 |
+
|
| 383 |
+
class SpecificationError(Exception):
|
| 384 |
+
"""
|
| 385 |
+
Exception raised by ``agg`` when the functions are ill-specified.
|
| 386 |
+
|
| 387 |
+
The exception raised in two scenarios.
|
| 388 |
+
|
| 389 |
+
The first way is calling ``agg`` on a
|
| 390 |
+
Dataframe or Series using a nested renamer (dict-of-dict).
|
| 391 |
+
|
| 392 |
+
The second way is calling ``agg`` on a Dataframe with duplicated functions
|
| 393 |
+
names without assigning column name.
|
| 394 |
+
|
| 395 |
+
Examples
|
| 396 |
+
--------
|
| 397 |
+
>>> df = pd.DataFrame({'A': [1, 1, 1, 2, 2],
|
| 398 |
+
... 'B': range(5),
|
| 399 |
+
... 'C': range(5)})
|
| 400 |
+
>>> df.groupby('A').B.agg({'foo': 'count'}) # doctest: +SKIP
|
| 401 |
+
... # SpecificationError: nested renamer is not supported
|
| 402 |
+
|
| 403 |
+
>>> df.groupby('A').agg({'B': {'foo': ['sum', 'max']}}) # doctest: +SKIP
|
| 404 |
+
... # SpecificationError: nested renamer is not supported
|
| 405 |
+
|
| 406 |
+
>>> df.groupby('A').agg(['min', 'min']) # doctest: +SKIP
|
| 407 |
+
... # SpecificationError: nested renamer is not supported
|
| 408 |
+
"""
|
| 409 |
+
|
| 410 |
+
|
| 411 |
+
class SettingWithCopyError(ValueError):
|
| 412 |
+
"""
|
| 413 |
+
Exception raised when trying to set on a copied slice from a ``DataFrame``.
|
| 414 |
+
|
| 415 |
+
The ``mode.chained_assignment`` needs to be set to set to 'raise.' This can
|
| 416 |
+
happen unintentionally when chained indexing.
|
| 417 |
+
|
| 418 |
+
For more information on evaluation order,
|
| 419 |
+
see :ref:`the user guide<indexing.evaluation_order>`.
|
| 420 |
+
|
| 421 |
+
For more information on view vs. copy,
|
| 422 |
+
see :ref:`the user guide<indexing.view_versus_copy>`.
|
| 423 |
+
|
| 424 |
+
Examples
|
| 425 |
+
--------
|
| 426 |
+
>>> pd.options.mode.chained_assignment = 'raise'
|
| 427 |
+
>>> df = pd.DataFrame({'A': [1, 1, 1, 2, 2]}, columns=['A'])
|
| 428 |
+
>>> df.loc[0:3]['A'] = 'a' # doctest: +SKIP
|
| 429 |
+
... # SettingWithCopyError: A value is trying to be set on a copy of a...
|
| 430 |
+
"""
|
| 431 |
+
|
| 432 |
+
|
| 433 |
+
class SettingWithCopyWarning(Warning):
|
| 434 |
+
"""
|
| 435 |
+
Warning raised when trying to set on a copied slice from a ``DataFrame``.
|
| 436 |
+
|
| 437 |
+
The ``mode.chained_assignment`` needs to be set to set to 'warn.'
|
| 438 |
+
'Warn' is the default option. This can happen unintentionally when
|
| 439 |
+
chained indexing.
|
| 440 |
+
|
| 441 |
+
For more information on evaluation order,
|
| 442 |
+
see :ref:`the user guide<indexing.evaluation_order>`.
|
| 443 |
+
|
| 444 |
+
For more information on view vs. copy,
|
| 445 |
+
see :ref:`the user guide<indexing.view_versus_copy>`.
|
| 446 |
+
|
| 447 |
+
Examples
|
| 448 |
+
--------
|
| 449 |
+
>>> df = pd.DataFrame({'A': [1, 1, 1, 2, 2]}, columns=['A'])
|
| 450 |
+
>>> df.loc[0:3]['A'] = 'a' # doctest: +SKIP
|
| 451 |
+
... # SettingWithCopyWarning: A value is trying to be set on a copy of a...
|
| 452 |
+
"""
|
| 453 |
+
|
| 454 |
+
|
| 455 |
+
class ChainedAssignmentError(Warning):
|
| 456 |
+
"""
|
| 457 |
+
Warning raised when trying to set using chained assignment.
|
| 458 |
+
|
| 459 |
+
When the ``mode.copy_on_write`` option is enabled, chained assignment can
|
| 460 |
+
never work. In such a situation, we are always setting into a temporary
|
| 461 |
+
object that is the result of an indexing operation (getitem), which under
|
| 462 |
+
Copy-on-Write always behaves as a copy. Thus, assigning through a chain
|
| 463 |
+
can never update the original Series or DataFrame.
|
| 464 |
+
|
| 465 |
+
For more information on view vs. copy,
|
| 466 |
+
see :ref:`the user guide<indexing.view_versus_copy>`.
|
| 467 |
+
|
| 468 |
+
Examples
|
| 469 |
+
--------
|
| 470 |
+
>>> pd.options.mode.copy_on_write = True
|
| 471 |
+
>>> df = pd.DataFrame({'A': [1, 1, 1, 2, 2]}, columns=['A'])
|
| 472 |
+
>>> df["A"][0:3] = 10 # doctest: +SKIP
|
| 473 |
+
... # ChainedAssignmentError: ...
|
| 474 |
+
>>> pd.options.mode.copy_on_write = False
|
| 475 |
+
"""
|
| 476 |
+
|
| 477 |
+
|
| 478 |
+
_chained_assignment_msg = (
|
| 479 |
+
"A value is trying to be set on a copy of a DataFrame or Series "
|
| 480 |
+
"through chained assignment.\n"
|
| 481 |
+
"When using the Copy-on-Write mode, such chained assignment never works "
|
| 482 |
+
"to update the original DataFrame or Series, because the intermediate "
|
| 483 |
+
"object on which we are setting values always behaves as a copy.\n\n"
|
| 484 |
+
"Try using '.loc[row_indexer, col_indexer] = value' instead, to perform "
|
| 485 |
+
"the assignment in a single step.\n\n"
|
| 486 |
+
"See the caveats in the documentation: "
|
| 487 |
+
"https://pandas.pydata.org/pandas-docs/stable/user_guide/"
|
| 488 |
+
"indexing.html#returning-a-view-versus-a-copy"
|
| 489 |
+
)
|
| 490 |
+
|
| 491 |
+
|
| 492 |
+
_chained_assignment_method_msg = (
|
| 493 |
+
"A value is trying to be set on a copy of a DataFrame or Series "
|
| 494 |
+
"through chained assignment using an inplace method.\n"
|
| 495 |
+
"When using the Copy-on-Write mode, such inplace method never works "
|
| 496 |
+
"to update the original DataFrame or Series, because the intermediate "
|
| 497 |
+
"object on which we are setting values always behaves as a copy.\n\n"
|
| 498 |
+
"For example, when doing 'df[col].method(value, inplace=True)', try "
|
| 499 |
+
"using 'df.method({col: value}, inplace=True)' instead, to perform "
|
| 500 |
+
"the operation inplace on the original object.\n\n"
|
| 501 |
+
)
|
| 502 |
+
|
| 503 |
+
|
| 504 |
+
_chained_assignment_warning_msg = (
|
| 505 |
+
"ChainedAssignmentError: behaviour will change in pandas 3.0!\n"
|
| 506 |
+
"You are setting values through chained assignment. Currently this works "
|
| 507 |
+
"in certain cases, but when using Copy-on-Write (which will become the "
|
| 508 |
+
"default behaviour in pandas 3.0) this will never work to update the "
|
| 509 |
+
"original DataFrame or Series, because the intermediate object on which "
|
| 510 |
+
"we are setting values will behave as a copy.\n"
|
| 511 |
+
"A typical example is when you are setting values in a column of a "
|
| 512 |
+
"DataFrame, like:\n\n"
|
| 513 |
+
'df["col"][row_indexer] = value\n\n'
|
| 514 |
+
'Use `df.loc[row_indexer, "col"] = values` instead, to perform the '
|
| 515 |
+
"assignment in a single step and ensure this keeps updating the original `df`.\n\n"
|
| 516 |
+
"See the caveats in the documentation: "
|
| 517 |
+
"https://pandas.pydata.org/pandas-docs/stable/user_guide/"
|
| 518 |
+
"indexing.html#returning-a-view-versus-a-copy\n"
|
| 519 |
+
)
|
| 520 |
+
|
| 521 |
+
|
| 522 |
+
_chained_assignment_warning_method_msg = (
|
| 523 |
+
"A value is trying to be set on a copy of a DataFrame or Series "
|
| 524 |
+
"through chained assignment using an inplace method.\n"
|
| 525 |
+
"The behavior will change in pandas 3.0. This inplace method will "
|
| 526 |
+
"never work because the intermediate object on which we are setting "
|
| 527 |
+
"values always behaves as a copy.\n\n"
|
| 528 |
+
"For example, when doing 'df[col].method(value, inplace=True)', try "
|
| 529 |
+
"using 'df.method({col: value}, inplace=True)' or "
|
| 530 |
+
"df[col] = df[col].method(value) instead, to perform "
|
| 531 |
+
"the operation inplace on the original object.\n\n"
|
| 532 |
+
)
|
| 533 |
+
|
| 534 |
+
|
| 535 |
+
def _check_cacher(obj):
|
| 536 |
+
# This is a mess, selection paths that return a view set the _cacher attribute
|
| 537 |
+
# on the Series; most of them also set _item_cache which adds 1 to our relevant
|
| 538 |
+
# reference count, but iloc does not, so we have to check if we are actually
|
| 539 |
+
# in the item cache
|
| 540 |
+
if hasattr(obj, "_cacher"):
|
| 541 |
+
parent = obj._cacher[1]()
|
| 542 |
+
# parent could be dead
|
| 543 |
+
if parent is None:
|
| 544 |
+
return False
|
| 545 |
+
if hasattr(parent, "_item_cache"):
|
| 546 |
+
if obj._cacher[0] in parent._item_cache:
|
| 547 |
+
# Check if we are actually the item from item_cache, iloc creates a
|
| 548 |
+
# new object
|
| 549 |
+
return obj is parent._item_cache[obj._cacher[0]]
|
| 550 |
+
return False
|
| 551 |
+
|
| 552 |
+
|
| 553 |
+
class NumExprClobberingError(NameError):
|
| 554 |
+
"""
|
| 555 |
+
Exception raised when trying to use a built-in numexpr name as a variable name.
|
| 556 |
+
|
| 557 |
+
``eval`` or ``query`` will throw the error if the engine is set
|
| 558 |
+
to 'numexpr'. 'numexpr' is the default engine value for these methods if the
|
| 559 |
+
numexpr package is installed.
|
| 560 |
+
|
| 561 |
+
Examples
|
| 562 |
+
--------
|
| 563 |
+
>>> df = pd.DataFrame({'abs': [1, 1, 1]})
|
| 564 |
+
>>> df.query("abs > 2") # doctest: +SKIP
|
| 565 |
+
... # NumExprClobberingError: Variables in expression "(abs) > (2)" overlap...
|
| 566 |
+
>>> sin, a = 1, 2
|
| 567 |
+
>>> pd.eval("sin + a", engine='numexpr') # doctest: +SKIP
|
| 568 |
+
... # NumExprClobberingError: Variables in expression "(sin) + (a)" overlap...
|
| 569 |
+
"""
|
| 570 |
+
|
| 571 |
+
|
| 572 |
+
class UndefinedVariableError(NameError):
|
| 573 |
+
"""
|
| 574 |
+
Exception raised by ``query`` or ``eval`` when using an undefined variable name.
|
| 575 |
+
|
| 576 |
+
It will also specify whether the undefined variable is local or not.
|
| 577 |
+
|
| 578 |
+
Examples
|
| 579 |
+
--------
|
| 580 |
+
>>> df = pd.DataFrame({'A': [1, 1, 1]})
|
| 581 |
+
>>> df.query("A > x") # doctest: +SKIP
|
| 582 |
+
... # UndefinedVariableError: name 'x' is not defined
|
| 583 |
+
>>> df.query("A > @y") # doctest: +SKIP
|
| 584 |
+
... # UndefinedVariableError: local variable 'y' is not defined
|
| 585 |
+
>>> pd.eval('x + 1') # doctest: +SKIP
|
| 586 |
+
... # UndefinedVariableError: name 'x' is not defined
|
| 587 |
+
"""
|
| 588 |
+
|
| 589 |
+
def __init__(self, name: str, is_local: bool | None = None) -> None:
|
| 590 |
+
base_msg = f"{repr(name)} is not defined"
|
| 591 |
+
if is_local:
|
| 592 |
+
msg = f"local variable {base_msg}"
|
| 593 |
+
else:
|
| 594 |
+
msg = f"name {base_msg}"
|
| 595 |
+
super().__init__(msg)
|
| 596 |
+
|
| 597 |
+
|
| 598 |
+
class IndexingError(Exception):
|
| 599 |
+
"""
|
| 600 |
+
Exception is raised when trying to index and there is a mismatch in dimensions.
|
| 601 |
+
|
| 602 |
+
Examples
|
| 603 |
+
--------
|
| 604 |
+
>>> df = pd.DataFrame({'A': [1, 1, 1]})
|
| 605 |
+
>>> df.loc[..., ..., 'A'] # doctest: +SKIP
|
| 606 |
+
... # IndexingError: indexer may only contain one '...' entry
|
| 607 |
+
>>> df = pd.DataFrame({'A': [1, 1, 1]})
|
| 608 |
+
>>> df.loc[1, ..., ...] # doctest: +SKIP
|
| 609 |
+
... # IndexingError: Too many indexers
|
| 610 |
+
>>> df[pd.Series([True], dtype=bool)] # doctest: +SKIP
|
| 611 |
+
... # IndexingError: Unalignable boolean Series provided as indexer...
|
| 612 |
+
>>> s = pd.Series(range(2),
|
| 613 |
+
... index = pd.MultiIndex.from_product([["a", "b"], ["c"]]))
|
| 614 |
+
>>> s.loc["a", "c", "d"] # doctest: +SKIP
|
| 615 |
+
... # IndexingError: Too many indexers
|
| 616 |
+
"""
|
| 617 |
+
|
| 618 |
+
|
| 619 |
+
class PyperclipException(RuntimeError):
|
| 620 |
+
"""
|
| 621 |
+
Exception raised when clipboard functionality is unsupported.
|
| 622 |
+
|
| 623 |
+
Raised by ``to_clipboard()`` and ``read_clipboard()``.
|
| 624 |
+
"""
|
| 625 |
+
|
| 626 |
+
|
| 627 |
+
class PyperclipWindowsException(PyperclipException):
|
| 628 |
+
"""
|
| 629 |
+
Exception raised when clipboard functionality is unsupported by Windows.
|
| 630 |
+
|
| 631 |
+
Access to the clipboard handle would be denied due to some other
|
| 632 |
+
window process is accessing it.
|
| 633 |
+
"""
|
| 634 |
+
|
| 635 |
+
def __init__(self, message: str) -> None:
|
| 636 |
+
# attr only exists on Windows, so typing fails on other platforms
|
| 637 |
+
message += f" ({ctypes.WinError()})" # type: ignore[attr-defined]
|
| 638 |
+
super().__init__(message)
|
| 639 |
+
|
| 640 |
+
|
| 641 |
+
class CSSWarning(UserWarning):
|
| 642 |
+
"""
|
| 643 |
+
Warning is raised when converting css styling fails.
|
| 644 |
+
|
| 645 |
+
This can be due to the styling not having an equivalent value or because the
|
| 646 |
+
styling isn't properly formatted.
|
| 647 |
+
|
| 648 |
+
Examples
|
| 649 |
+
--------
|
| 650 |
+
>>> df = pd.DataFrame({'A': [1, 1, 1]})
|
| 651 |
+
>>> df.style.applymap(
|
| 652 |
+
... lambda x: 'background-color: blueGreenRed;'
|
| 653 |
+
... ).to_excel('styled.xlsx') # doctest: +SKIP
|
| 654 |
+
CSSWarning: Unhandled color format: 'blueGreenRed'
|
| 655 |
+
>>> df.style.applymap(
|
| 656 |
+
... lambda x: 'border: 1px solid red red;'
|
| 657 |
+
... ).to_excel('styled.xlsx') # doctest: +SKIP
|
| 658 |
+
CSSWarning: Unhandled color format: 'blueGreenRed'
|
| 659 |
+
"""
|
| 660 |
+
|
| 661 |
+
|
| 662 |
+
class PossibleDataLossError(Exception):
|
| 663 |
+
"""
|
| 664 |
+
Exception raised when trying to open a HDFStore file when already opened.
|
| 665 |
+
|
| 666 |
+
Examples
|
| 667 |
+
--------
|
| 668 |
+
>>> store = pd.HDFStore('my-store', 'a') # doctest: +SKIP
|
| 669 |
+
>>> store.open("w") # doctest: +SKIP
|
| 670 |
+
... # PossibleDataLossError: Re-opening the file [my-store] with mode [a]...
|
| 671 |
+
"""
|
| 672 |
+
|
| 673 |
+
|
| 674 |
+
class ClosedFileError(Exception):
|
| 675 |
+
"""
|
| 676 |
+
Exception is raised when trying to perform an operation on a closed HDFStore file.
|
| 677 |
+
|
| 678 |
+
Examples
|
| 679 |
+
--------
|
| 680 |
+
>>> store = pd.HDFStore('my-store', 'a') # doctest: +SKIP
|
| 681 |
+
>>> store.close() # doctest: +SKIP
|
| 682 |
+
>>> store.keys() # doctest: +SKIP
|
| 683 |
+
... # ClosedFileError: my-store file is not open!
|
| 684 |
+
"""
|
| 685 |
+
|
| 686 |
+
|
| 687 |
+
class IncompatibilityWarning(Warning):
|
| 688 |
+
"""
|
| 689 |
+
Warning raised when trying to use where criteria on an incompatible HDF5 file.
|
| 690 |
+
"""
|
| 691 |
+
|
| 692 |
+
|
| 693 |
+
class AttributeConflictWarning(Warning):
|
| 694 |
+
"""
|
| 695 |
+
Warning raised when index attributes conflict when using HDFStore.
|
| 696 |
+
|
| 697 |
+
Occurs when attempting to append an index with a different
|
| 698 |
+
name than the existing index on an HDFStore or attempting to append an index with a
|
| 699 |
+
different frequency than the existing index on an HDFStore.
|
| 700 |
+
|
| 701 |
+
Examples
|
| 702 |
+
--------
|
| 703 |
+
>>> idx1 = pd.Index(['a', 'b'], name='name1')
|
| 704 |
+
>>> df1 = pd.DataFrame([[1, 2], [3, 4]], index=idx1)
|
| 705 |
+
>>> df1.to_hdf('file', 'data', 'w', append=True) # doctest: +SKIP
|
| 706 |
+
>>> idx2 = pd.Index(['c', 'd'], name='name2')
|
| 707 |
+
>>> df2 = pd.DataFrame([[5, 6], [7, 8]], index=idx2)
|
| 708 |
+
>>> df2.to_hdf('file', 'data', 'a', append=True) # doctest: +SKIP
|
| 709 |
+
AttributeConflictWarning: the [index_name] attribute of the existing index is
|
| 710 |
+
[name1] which conflicts with the new [name2]...
|
| 711 |
+
"""
|
| 712 |
+
|
| 713 |
+
|
| 714 |
+
class DatabaseError(OSError):
|
| 715 |
+
"""
|
| 716 |
+
Error is raised when executing sql with bad syntax or sql that throws an error.
|
| 717 |
+
|
| 718 |
+
Examples
|
| 719 |
+
--------
|
| 720 |
+
>>> from sqlite3 import connect
|
| 721 |
+
>>> conn = connect(':memory:')
|
| 722 |
+
>>> pd.read_sql('select * test', conn) # doctest: +SKIP
|
| 723 |
+
... # DatabaseError: Execution failed on sql 'test': near "test": syntax error
|
| 724 |
+
"""
|
| 725 |
+
|
| 726 |
+
|
| 727 |
+
class PossiblePrecisionLoss(Warning):
|
| 728 |
+
"""
|
| 729 |
+
Warning raised by to_stata on a column with a value outside or equal to int64.
|
| 730 |
+
|
| 731 |
+
When the column value is outside or equal to the int64 value the column is
|
| 732 |
+
converted to a float64 dtype.
|
| 733 |
+
|
| 734 |
+
Examples
|
| 735 |
+
--------
|
| 736 |
+
>>> df = pd.DataFrame({"s": pd.Series([1, 2**53], dtype=np.int64)})
|
| 737 |
+
>>> df.to_stata('test') # doctest: +SKIP
|
| 738 |
+
... # PossiblePrecisionLoss: Column converted from int64 to float64...
|
| 739 |
+
"""
|
| 740 |
+
|
| 741 |
+
|
| 742 |
+
class ValueLabelTypeMismatch(Warning):
|
| 743 |
+
"""
|
| 744 |
+
Warning raised by to_stata on a category column that contains non-string values.
|
| 745 |
+
|
| 746 |
+
Examples
|
| 747 |
+
--------
|
| 748 |
+
>>> df = pd.DataFrame({"categories": pd.Series(["a", 2], dtype="category")})
|
| 749 |
+
>>> df.to_stata('test') # doctest: +SKIP
|
| 750 |
+
... # ValueLabelTypeMismatch: Stata value labels (pandas categories) must be str...
|
| 751 |
+
"""
|
| 752 |
+
|
| 753 |
+
|
| 754 |
+
class InvalidColumnName(Warning):
|
| 755 |
+
"""
|
| 756 |
+
Warning raised by to_stata the column contains a non-valid stata name.
|
| 757 |
+
|
| 758 |
+
Because the column name is an invalid Stata variable, the name needs to be
|
| 759 |
+
converted.
|
| 760 |
+
|
| 761 |
+
Examples
|
| 762 |
+
--------
|
| 763 |
+
>>> df = pd.DataFrame({"0categories": pd.Series([2, 2])})
|
| 764 |
+
>>> df.to_stata('test') # doctest: +SKIP
|
| 765 |
+
... # InvalidColumnName: Not all pandas column names were valid Stata variable...
|
| 766 |
+
"""
|
| 767 |
+
|
| 768 |
+
|
| 769 |
+
class CategoricalConversionWarning(Warning):
|
| 770 |
+
"""
|
| 771 |
+
Warning is raised when reading a partial labeled Stata file using a iterator.
|
| 772 |
+
|
| 773 |
+
Examples
|
| 774 |
+
--------
|
| 775 |
+
>>> from pandas.io.stata import StataReader
|
| 776 |
+
>>> with StataReader('dta_file', chunksize=2) as reader: # doctest: +SKIP
|
| 777 |
+
... for i, block in enumerate(reader):
|
| 778 |
+
... print(i, block)
|
| 779 |
+
... # CategoricalConversionWarning: One or more series with value labels...
|
| 780 |
+
"""
|
| 781 |
+
|
| 782 |
+
|
| 783 |
+
class LossySetitemError(Exception):
|
| 784 |
+
"""
|
| 785 |
+
Raised when trying to do a __setitem__ on an np.ndarray that is not lossless.
|
| 786 |
+
|
| 787 |
+
Notes
|
| 788 |
+
-----
|
| 789 |
+
This is an internal error.
|
| 790 |
+
"""
|
| 791 |
+
|
| 792 |
+
|
| 793 |
+
class NoBufferPresent(Exception):
|
| 794 |
+
"""
|
| 795 |
+
Exception is raised in _get_data_buffer to signal that there is no requested buffer.
|
| 796 |
+
"""
|
| 797 |
+
|
| 798 |
+
|
| 799 |
+
class InvalidComparison(Exception):
|
| 800 |
+
"""
|
| 801 |
+
Exception is raised by _validate_comparison_value to indicate an invalid comparison.
|
| 802 |
+
|
| 803 |
+
Notes
|
| 804 |
+
-----
|
| 805 |
+
This is an internal error.
|
| 806 |
+
"""
|
| 807 |
+
|
| 808 |
+
|
| 809 |
+
__all__ = [
|
| 810 |
+
"AbstractMethodError",
|
| 811 |
+
"AttributeConflictWarning",
|
| 812 |
+
"CategoricalConversionWarning",
|
| 813 |
+
"ClosedFileError",
|
| 814 |
+
"CSSWarning",
|
| 815 |
+
"DatabaseError",
|
| 816 |
+
"DataError",
|
| 817 |
+
"DtypeWarning",
|
| 818 |
+
"DuplicateLabelError",
|
| 819 |
+
"EmptyDataError",
|
| 820 |
+
"IncompatibilityWarning",
|
| 821 |
+
"IntCastingNaNError",
|
| 822 |
+
"InvalidColumnName",
|
| 823 |
+
"InvalidComparison",
|
| 824 |
+
"InvalidIndexError",
|
| 825 |
+
"InvalidVersion",
|
| 826 |
+
"IndexingError",
|
| 827 |
+
"LossySetitemError",
|
| 828 |
+
"MergeError",
|
| 829 |
+
"NoBufferPresent",
|
| 830 |
+
"NullFrequencyError",
|
| 831 |
+
"NumbaUtilError",
|
| 832 |
+
"NumExprClobberingError",
|
| 833 |
+
"OptionError",
|
| 834 |
+
"OutOfBoundsDatetime",
|
| 835 |
+
"OutOfBoundsTimedelta",
|
| 836 |
+
"ParserError",
|
| 837 |
+
"ParserWarning",
|
| 838 |
+
"PerformanceWarning",
|
| 839 |
+
"PossibleDataLossError",
|
| 840 |
+
"PossiblePrecisionLoss",
|
| 841 |
+
"PyperclipException",
|
| 842 |
+
"PyperclipWindowsException",
|
| 843 |
+
"SettingWithCopyError",
|
| 844 |
+
"SettingWithCopyWarning",
|
| 845 |
+
"SpecificationError",
|
| 846 |
+
"UndefinedVariableError",
|
| 847 |
+
"UnsortedIndexError",
|
| 848 |
+
"UnsupportedFunctionCall",
|
| 849 |
+
"ValueLabelTypeMismatch",
|
| 850 |
+
]
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/io/feather_format.py
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
""" feather-format compat """
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
from typing import (
|
| 5 |
+
TYPE_CHECKING,
|
| 6 |
+
Any,
|
| 7 |
+
)
|
| 8 |
+
|
| 9 |
+
from pandas._config import using_pyarrow_string_dtype
|
| 10 |
+
|
| 11 |
+
from pandas._libs import lib
|
| 12 |
+
from pandas.compat._optional import import_optional_dependency
|
| 13 |
+
from pandas.util._decorators import doc
|
| 14 |
+
from pandas.util._validators import check_dtype_backend
|
| 15 |
+
|
| 16 |
+
import pandas as pd
|
| 17 |
+
from pandas.core.api import DataFrame
|
| 18 |
+
from pandas.core.shared_docs import _shared_docs
|
| 19 |
+
|
| 20 |
+
from pandas.io._util import arrow_string_types_mapper
|
| 21 |
+
from pandas.io.common import get_handle
|
| 22 |
+
|
| 23 |
+
if TYPE_CHECKING:
|
| 24 |
+
from collections.abc import (
|
| 25 |
+
Hashable,
|
| 26 |
+
Sequence,
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
from pandas._typing import (
|
| 30 |
+
DtypeBackend,
|
| 31 |
+
FilePath,
|
| 32 |
+
ReadBuffer,
|
| 33 |
+
StorageOptions,
|
| 34 |
+
WriteBuffer,
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
@doc(storage_options=_shared_docs["storage_options"])
|
| 39 |
+
def to_feather(
|
| 40 |
+
df: DataFrame,
|
| 41 |
+
path: FilePath | WriteBuffer[bytes],
|
| 42 |
+
storage_options: StorageOptions | None = None,
|
| 43 |
+
**kwargs: Any,
|
| 44 |
+
) -> None:
|
| 45 |
+
"""
|
| 46 |
+
Write a DataFrame to the binary Feather format.
|
| 47 |
+
|
| 48 |
+
Parameters
|
| 49 |
+
----------
|
| 50 |
+
df : DataFrame
|
| 51 |
+
path : str, path object, or file-like object
|
| 52 |
+
{storage_options}
|
| 53 |
+
**kwargs :
|
| 54 |
+
Additional keywords passed to `pyarrow.feather.write_feather`.
|
| 55 |
+
|
| 56 |
+
"""
|
| 57 |
+
import_optional_dependency("pyarrow")
|
| 58 |
+
from pyarrow import feather
|
| 59 |
+
|
| 60 |
+
if not isinstance(df, DataFrame):
|
| 61 |
+
raise ValueError("feather only support IO with DataFrames")
|
| 62 |
+
|
| 63 |
+
with get_handle(
|
| 64 |
+
path, "wb", storage_options=storage_options, is_text=False
|
| 65 |
+
) as handles:
|
| 66 |
+
feather.write_feather(df, handles.handle, **kwargs)
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
@doc(storage_options=_shared_docs["storage_options"])
|
| 70 |
+
def read_feather(
|
| 71 |
+
path: FilePath | ReadBuffer[bytes],
|
| 72 |
+
columns: Sequence[Hashable] | None = None,
|
| 73 |
+
use_threads: bool = True,
|
| 74 |
+
storage_options: StorageOptions | None = None,
|
| 75 |
+
dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
|
| 76 |
+
) -> DataFrame:
|
| 77 |
+
"""
|
| 78 |
+
Load a feather-format object from the file path.
|
| 79 |
+
|
| 80 |
+
Parameters
|
| 81 |
+
----------
|
| 82 |
+
path : str, path object, or file-like object
|
| 83 |
+
String, path object (implementing ``os.PathLike[str]``), or file-like
|
| 84 |
+
object implementing a binary ``read()`` function. The string could be a URL.
|
| 85 |
+
Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is
|
| 86 |
+
expected. A local file could be: ``file://localhost/path/to/table.feather``.
|
| 87 |
+
columns : sequence, default None
|
| 88 |
+
If not provided, all columns are read.
|
| 89 |
+
use_threads : bool, default True
|
| 90 |
+
Whether to parallelize reading using multiple threads.
|
| 91 |
+
{storage_options}
|
| 92 |
+
|
| 93 |
+
dtype_backend : {{'numpy_nullable', 'pyarrow'}}, default 'numpy_nullable'
|
| 94 |
+
Back-end data type applied to the resultant :class:`DataFrame`
|
| 95 |
+
(still experimental). Behaviour is as follows:
|
| 96 |
+
|
| 97 |
+
* ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
|
| 98 |
+
(default).
|
| 99 |
+
* ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`
|
| 100 |
+
DataFrame.
|
| 101 |
+
|
| 102 |
+
.. versionadded:: 2.0
|
| 103 |
+
|
| 104 |
+
Returns
|
| 105 |
+
-------
|
| 106 |
+
type of object stored in file
|
| 107 |
+
|
| 108 |
+
Examples
|
| 109 |
+
--------
|
| 110 |
+
>>> df = pd.read_feather("path/to/file.feather") # doctest: +SKIP
|
| 111 |
+
"""
|
| 112 |
+
import_optional_dependency("pyarrow")
|
| 113 |
+
from pyarrow import feather
|
| 114 |
+
|
| 115 |
+
# import utils to register the pyarrow extension types
|
| 116 |
+
import pandas.core.arrays.arrow.extension_types # pyright: ignore[reportUnusedImport] # noqa: F401
|
| 117 |
+
|
| 118 |
+
check_dtype_backend(dtype_backend)
|
| 119 |
+
|
| 120 |
+
with get_handle(
|
| 121 |
+
path, "rb", storage_options=storage_options, is_text=False
|
| 122 |
+
) as handles:
|
| 123 |
+
if dtype_backend is lib.no_default and not using_pyarrow_string_dtype():
|
| 124 |
+
return feather.read_feather(
|
| 125 |
+
handles.handle, columns=columns, use_threads=bool(use_threads)
|
| 126 |
+
)
|
| 127 |
+
|
| 128 |
+
pa_table = feather.read_table(
|
| 129 |
+
handles.handle, columns=columns, use_threads=bool(use_threads)
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
if dtype_backend == "numpy_nullable":
|
| 133 |
+
from pandas.io._util import _arrow_dtype_mapping
|
| 134 |
+
|
| 135 |
+
return pa_table.to_pandas(types_mapper=_arrow_dtype_mapping().get)
|
| 136 |
+
|
| 137 |
+
elif dtype_backend == "pyarrow":
|
| 138 |
+
return pa_table.to_pandas(types_mapper=pd.ArrowDtype)
|
| 139 |
+
|
| 140 |
+
elif using_pyarrow_string_dtype():
|
| 141 |
+
return pa_table.to_pandas(types_mapper=arrow_string_types_mapper())
|
| 142 |
+
else:
|
| 143 |
+
raise NotImplementedError
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/io/gbq.py
ADDED
|
@@ -0,0 +1,255 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
""" Google BigQuery support """
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
from typing import (
|
| 5 |
+
TYPE_CHECKING,
|
| 6 |
+
Any,
|
| 7 |
+
)
|
| 8 |
+
import warnings
|
| 9 |
+
|
| 10 |
+
from pandas.compat._optional import import_optional_dependency
|
| 11 |
+
from pandas.util._exceptions import find_stack_level
|
| 12 |
+
|
| 13 |
+
if TYPE_CHECKING:
|
| 14 |
+
from google.auth.credentials import Credentials
|
| 15 |
+
|
| 16 |
+
from pandas import DataFrame
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def _try_import():
|
| 20 |
+
# since pandas is a dependency of pandas-gbq
|
| 21 |
+
# we need to import on first use
|
| 22 |
+
msg = (
|
| 23 |
+
"pandas-gbq is required to load data from Google BigQuery. "
|
| 24 |
+
"See the docs: https://pandas-gbq.readthedocs.io."
|
| 25 |
+
)
|
| 26 |
+
pandas_gbq = import_optional_dependency("pandas_gbq", extra=msg)
|
| 27 |
+
return pandas_gbq
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def read_gbq(
|
| 31 |
+
query: str,
|
| 32 |
+
project_id: str | None = None,
|
| 33 |
+
index_col: str | None = None,
|
| 34 |
+
col_order: list[str] | None = None,
|
| 35 |
+
reauth: bool = False,
|
| 36 |
+
auth_local_webserver: bool = True,
|
| 37 |
+
dialect: str | None = None,
|
| 38 |
+
location: str | None = None,
|
| 39 |
+
configuration: dict[str, Any] | None = None,
|
| 40 |
+
credentials: Credentials | None = None,
|
| 41 |
+
use_bqstorage_api: bool | None = None,
|
| 42 |
+
max_results: int | None = None,
|
| 43 |
+
progress_bar_type: str | None = None,
|
| 44 |
+
) -> DataFrame:
|
| 45 |
+
"""
|
| 46 |
+
Load data from Google BigQuery.
|
| 47 |
+
|
| 48 |
+
.. deprecated:: 2.2.0
|
| 49 |
+
|
| 50 |
+
Please use ``pandas_gbq.read_gbq`` instead.
|
| 51 |
+
|
| 52 |
+
This function requires the `pandas-gbq package
|
| 53 |
+
<https://pandas-gbq.readthedocs.io>`__.
|
| 54 |
+
|
| 55 |
+
See the `How to authenticate with Google BigQuery
|
| 56 |
+
<https://pandas-gbq.readthedocs.io/en/latest/howto/authentication.html>`__
|
| 57 |
+
guide for authentication instructions.
|
| 58 |
+
|
| 59 |
+
Parameters
|
| 60 |
+
----------
|
| 61 |
+
query : str
|
| 62 |
+
SQL-Like Query to return data values.
|
| 63 |
+
project_id : str, optional
|
| 64 |
+
Google BigQuery Account project ID. Optional when available from
|
| 65 |
+
the environment.
|
| 66 |
+
index_col : str, optional
|
| 67 |
+
Name of result column to use for index in results DataFrame.
|
| 68 |
+
col_order : list(str), optional
|
| 69 |
+
List of BigQuery column names in the desired order for results
|
| 70 |
+
DataFrame.
|
| 71 |
+
reauth : bool, default False
|
| 72 |
+
Force Google BigQuery to re-authenticate the user. This is useful
|
| 73 |
+
if multiple accounts are used.
|
| 74 |
+
auth_local_webserver : bool, default True
|
| 75 |
+
Use the `local webserver flow`_ instead of the `console flow`_
|
| 76 |
+
when getting user credentials.
|
| 77 |
+
|
| 78 |
+
.. _local webserver flow:
|
| 79 |
+
https://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server
|
| 80 |
+
.. _console flow:
|
| 81 |
+
https://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console
|
| 82 |
+
|
| 83 |
+
*New in version 0.2.0 of pandas-gbq*.
|
| 84 |
+
|
| 85 |
+
.. versionchanged:: 1.5.0
|
| 86 |
+
Default value is changed to ``True``. Google has deprecated the
|
| 87 |
+
``auth_local_webserver = False`` `"out of band" (copy-paste)
|
| 88 |
+
flow
|
| 89 |
+
<https://developers.googleblog.com/2022/02/making-oauth-flows-safer.html?m=1#disallowed-oob>`_.
|
| 90 |
+
dialect : str, default 'legacy'
|
| 91 |
+
Note: The default value is changing to 'standard' in a future version.
|
| 92 |
+
|
| 93 |
+
SQL syntax dialect to use. Value can be one of:
|
| 94 |
+
|
| 95 |
+
``'legacy'``
|
| 96 |
+
Use BigQuery's legacy SQL dialect. For more information see
|
| 97 |
+
`BigQuery Legacy SQL Reference
|
| 98 |
+
<https://cloud.google.com/bigquery/docs/reference/legacy-sql>`__.
|
| 99 |
+
``'standard'``
|
| 100 |
+
Use BigQuery's standard SQL, which is
|
| 101 |
+
compliant with the SQL 2011 standard. For more information
|
| 102 |
+
see `BigQuery Standard SQL Reference
|
| 103 |
+
<https://cloud.google.com/bigquery/docs/reference/standard-sql/>`__.
|
| 104 |
+
location : str, optional
|
| 105 |
+
Location where the query job should run. See the `BigQuery locations
|
| 106 |
+
documentation
|
| 107 |
+
<https://cloud.google.com/bigquery/docs/dataset-locations>`__ for a
|
| 108 |
+
list of available locations. The location must match that of any
|
| 109 |
+
datasets used in the query.
|
| 110 |
+
|
| 111 |
+
*New in version 0.5.0 of pandas-gbq*.
|
| 112 |
+
configuration : dict, optional
|
| 113 |
+
Query config parameters for job processing.
|
| 114 |
+
For example:
|
| 115 |
+
|
| 116 |
+
configuration = {'query': {'useQueryCache': False}}
|
| 117 |
+
|
| 118 |
+
For more information see `BigQuery REST API Reference
|
| 119 |
+
<https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query>`__.
|
| 120 |
+
credentials : google.auth.credentials.Credentials, optional
|
| 121 |
+
Credentials for accessing Google APIs. Use this parameter to override
|
| 122 |
+
default credentials, such as to use Compute Engine
|
| 123 |
+
:class:`google.auth.compute_engine.Credentials` or Service Account
|
| 124 |
+
:class:`google.oauth2.service_account.Credentials` directly.
|
| 125 |
+
|
| 126 |
+
*New in version 0.8.0 of pandas-gbq*.
|
| 127 |
+
use_bqstorage_api : bool, default False
|
| 128 |
+
Use the `BigQuery Storage API
|
| 129 |
+
<https://cloud.google.com/bigquery/docs/reference/storage/>`__ to
|
| 130 |
+
download query results quickly, but at an increased cost. To use this
|
| 131 |
+
API, first `enable it in the Cloud Console
|
| 132 |
+
<https://console.cloud.google.com/apis/library/bigquerystorage.googleapis.com>`__.
|
| 133 |
+
You must also have the `bigquery.readsessions.create
|
| 134 |
+
<https://cloud.google.com/bigquery/docs/access-control#roles>`__
|
| 135 |
+
permission on the project you are billing queries to.
|
| 136 |
+
|
| 137 |
+
This feature requires version 0.10.0 or later of the ``pandas-gbq``
|
| 138 |
+
package. It also requires the ``google-cloud-bigquery-storage`` and
|
| 139 |
+
``fastavro`` packages.
|
| 140 |
+
|
| 141 |
+
max_results : int, optional
|
| 142 |
+
If set, limit the maximum number of rows to fetch from the query
|
| 143 |
+
results.
|
| 144 |
+
|
| 145 |
+
progress_bar_type : Optional, str
|
| 146 |
+
If set, use the `tqdm <https://tqdm.github.io/>`__ library to
|
| 147 |
+
display a progress bar while the data downloads. Install the
|
| 148 |
+
``tqdm`` package to use this feature.
|
| 149 |
+
|
| 150 |
+
Possible values of ``progress_bar_type`` include:
|
| 151 |
+
|
| 152 |
+
``None``
|
| 153 |
+
No progress bar.
|
| 154 |
+
``'tqdm'``
|
| 155 |
+
Use the :func:`tqdm.tqdm` function to print a progress bar
|
| 156 |
+
to :data:`sys.stderr`.
|
| 157 |
+
``'tqdm_notebook'``
|
| 158 |
+
Use the :func:`tqdm.tqdm_notebook` function to display a
|
| 159 |
+
progress bar as a Jupyter notebook widget.
|
| 160 |
+
``'tqdm_gui'``
|
| 161 |
+
Use the :func:`tqdm.tqdm_gui` function to display a
|
| 162 |
+
progress bar as a graphical dialog box.
|
| 163 |
+
|
| 164 |
+
Returns
|
| 165 |
+
-------
|
| 166 |
+
df: DataFrame
|
| 167 |
+
DataFrame representing results of query.
|
| 168 |
+
|
| 169 |
+
See Also
|
| 170 |
+
--------
|
| 171 |
+
pandas_gbq.read_gbq : This function in the pandas-gbq library.
|
| 172 |
+
DataFrame.to_gbq : Write a DataFrame to Google BigQuery.
|
| 173 |
+
|
| 174 |
+
Examples
|
| 175 |
+
--------
|
| 176 |
+
Example taken from `Google BigQuery documentation
|
| 177 |
+
<https://cloud.google.com/bigquery/docs/pandas-gbq-migration>`_
|
| 178 |
+
|
| 179 |
+
>>> sql = "SELECT name FROM table_name WHERE state = 'TX' LIMIT 100;"
|
| 180 |
+
>>> df = pd.read_gbq(sql, dialect="standard") # doctest: +SKIP
|
| 181 |
+
>>> project_id = "your-project-id" # doctest: +SKIP
|
| 182 |
+
>>> df = pd.read_gbq(sql,
|
| 183 |
+
... project_id=project_id,
|
| 184 |
+
... dialect="standard"
|
| 185 |
+
... ) # doctest: +SKIP
|
| 186 |
+
"""
|
| 187 |
+
warnings.warn(
|
| 188 |
+
"read_gbq is deprecated and will be removed in a future version. "
|
| 189 |
+
"Please use pandas_gbq.read_gbq instead: "
|
| 190 |
+
"https://pandas-gbq.readthedocs.io/en/latest/api.html#pandas_gbq.read_gbq",
|
| 191 |
+
FutureWarning,
|
| 192 |
+
stacklevel=find_stack_level(),
|
| 193 |
+
)
|
| 194 |
+
pandas_gbq = _try_import()
|
| 195 |
+
|
| 196 |
+
kwargs: dict[str, str | bool | int | None] = {}
|
| 197 |
+
|
| 198 |
+
# START: new kwargs. Don't populate unless explicitly set.
|
| 199 |
+
if use_bqstorage_api is not None:
|
| 200 |
+
kwargs["use_bqstorage_api"] = use_bqstorage_api
|
| 201 |
+
if max_results is not None:
|
| 202 |
+
kwargs["max_results"] = max_results
|
| 203 |
+
|
| 204 |
+
kwargs["progress_bar_type"] = progress_bar_type
|
| 205 |
+
# END: new kwargs
|
| 206 |
+
|
| 207 |
+
return pandas_gbq.read_gbq(
|
| 208 |
+
query,
|
| 209 |
+
project_id=project_id,
|
| 210 |
+
index_col=index_col,
|
| 211 |
+
col_order=col_order,
|
| 212 |
+
reauth=reauth,
|
| 213 |
+
auth_local_webserver=auth_local_webserver,
|
| 214 |
+
dialect=dialect,
|
| 215 |
+
location=location,
|
| 216 |
+
configuration=configuration,
|
| 217 |
+
credentials=credentials,
|
| 218 |
+
**kwargs,
|
| 219 |
+
)
|
| 220 |
+
|
| 221 |
+
|
| 222 |
+
def to_gbq(
|
| 223 |
+
dataframe: DataFrame,
|
| 224 |
+
destination_table: str,
|
| 225 |
+
project_id: str | None = None,
|
| 226 |
+
chunksize: int | None = None,
|
| 227 |
+
reauth: bool = False,
|
| 228 |
+
if_exists: str = "fail",
|
| 229 |
+
auth_local_webserver: bool = True,
|
| 230 |
+
table_schema: list[dict[str, str]] | None = None,
|
| 231 |
+
location: str | None = None,
|
| 232 |
+
progress_bar: bool = True,
|
| 233 |
+
credentials: Credentials | None = None,
|
| 234 |
+
) -> None:
|
| 235 |
+
warnings.warn(
|
| 236 |
+
"to_gbq is deprecated and will be removed in a future version. "
|
| 237 |
+
"Please use pandas_gbq.to_gbq instead: "
|
| 238 |
+
"https://pandas-gbq.readthedocs.io/en/latest/api.html#pandas_gbq.to_gbq",
|
| 239 |
+
FutureWarning,
|
| 240 |
+
stacklevel=find_stack_level(),
|
| 241 |
+
)
|
| 242 |
+
pandas_gbq = _try_import()
|
| 243 |
+
pandas_gbq.to_gbq(
|
| 244 |
+
dataframe,
|
| 245 |
+
destination_table,
|
| 246 |
+
project_id=project_id,
|
| 247 |
+
chunksize=chunksize,
|
| 248 |
+
reauth=reauth,
|
| 249 |
+
if_exists=if_exists,
|
| 250 |
+
auth_local_webserver=auth_local_webserver,
|
| 251 |
+
table_schema=table_schema,
|
| 252 |
+
location=location,
|
| 253 |
+
progress_bar=progress_bar,
|
| 254 |
+
credentials=credentials,
|
| 255 |
+
)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/io/html.py
ADDED
|
@@ -0,0 +1,1259 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
:mod:`pandas.io.html` is a module containing functionality for dealing with
|
| 3 |
+
HTML IO.
|
| 4 |
+
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
|
| 9 |
+
from collections import abc
|
| 10 |
+
import numbers
|
| 11 |
+
import re
|
| 12 |
+
from re import Pattern
|
| 13 |
+
from typing import (
|
| 14 |
+
TYPE_CHECKING,
|
| 15 |
+
Literal,
|
| 16 |
+
cast,
|
| 17 |
+
)
|
| 18 |
+
import warnings
|
| 19 |
+
|
| 20 |
+
from pandas._libs import lib
|
| 21 |
+
from pandas.compat._optional import import_optional_dependency
|
| 22 |
+
from pandas.errors import (
|
| 23 |
+
AbstractMethodError,
|
| 24 |
+
EmptyDataError,
|
| 25 |
+
)
|
| 26 |
+
from pandas.util._decorators import doc
|
| 27 |
+
from pandas.util._exceptions import find_stack_level
|
| 28 |
+
from pandas.util._validators import check_dtype_backend
|
| 29 |
+
|
| 30 |
+
from pandas.core.dtypes.common import is_list_like
|
| 31 |
+
|
| 32 |
+
from pandas import isna
|
| 33 |
+
from pandas.core.indexes.base import Index
|
| 34 |
+
from pandas.core.indexes.multi import MultiIndex
|
| 35 |
+
from pandas.core.series import Series
|
| 36 |
+
from pandas.core.shared_docs import _shared_docs
|
| 37 |
+
|
| 38 |
+
from pandas.io.common import (
|
| 39 |
+
file_exists,
|
| 40 |
+
get_handle,
|
| 41 |
+
is_file_like,
|
| 42 |
+
is_fsspec_url,
|
| 43 |
+
is_url,
|
| 44 |
+
stringify_path,
|
| 45 |
+
validate_header_arg,
|
| 46 |
+
)
|
| 47 |
+
from pandas.io.formats.printing import pprint_thing
|
| 48 |
+
from pandas.io.parsers import TextParser
|
| 49 |
+
|
| 50 |
+
if TYPE_CHECKING:
|
| 51 |
+
from collections.abc import (
|
| 52 |
+
Iterable,
|
| 53 |
+
Sequence,
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
from pandas._typing import (
|
| 57 |
+
BaseBuffer,
|
| 58 |
+
DtypeBackend,
|
| 59 |
+
FilePath,
|
| 60 |
+
HTMLFlavors,
|
| 61 |
+
ReadBuffer,
|
| 62 |
+
StorageOptions,
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
from pandas import DataFrame
|
| 66 |
+
|
| 67 |
+
#############
|
| 68 |
+
# READ HTML #
|
| 69 |
+
#############
|
| 70 |
+
_RE_WHITESPACE = re.compile(r"[\r\n]+|\s{2,}")
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def _remove_whitespace(s: str, regex: Pattern = _RE_WHITESPACE) -> str:
|
| 74 |
+
"""
|
| 75 |
+
Replace extra whitespace inside of a string with a single space.
|
| 76 |
+
|
| 77 |
+
Parameters
|
| 78 |
+
----------
|
| 79 |
+
s : str or unicode
|
| 80 |
+
The string from which to remove extra whitespace.
|
| 81 |
+
regex : re.Pattern
|
| 82 |
+
The regular expression to use to remove extra whitespace.
|
| 83 |
+
|
| 84 |
+
Returns
|
| 85 |
+
-------
|
| 86 |
+
subd : str or unicode
|
| 87 |
+
`s` with all extra whitespace replaced with a single space.
|
| 88 |
+
"""
|
| 89 |
+
return regex.sub(" ", s.strip())
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def _get_skiprows(skiprows: int | Sequence[int] | slice | None) -> int | Sequence[int]:
|
| 93 |
+
"""
|
| 94 |
+
Get an iterator given an integer, slice or container.
|
| 95 |
+
|
| 96 |
+
Parameters
|
| 97 |
+
----------
|
| 98 |
+
skiprows : int, slice, container
|
| 99 |
+
The iterator to use to skip rows; can also be a slice.
|
| 100 |
+
|
| 101 |
+
Raises
|
| 102 |
+
------
|
| 103 |
+
TypeError
|
| 104 |
+
* If `skiprows` is not a slice, integer, or Container
|
| 105 |
+
|
| 106 |
+
Returns
|
| 107 |
+
-------
|
| 108 |
+
it : iterable
|
| 109 |
+
A proper iterator to use to skip rows of a DataFrame.
|
| 110 |
+
"""
|
| 111 |
+
if isinstance(skiprows, slice):
|
| 112 |
+
start, step = skiprows.start or 0, skiprows.step or 1
|
| 113 |
+
return list(range(start, skiprows.stop, step))
|
| 114 |
+
elif isinstance(skiprows, numbers.Integral) or is_list_like(skiprows):
|
| 115 |
+
return cast("int | Sequence[int]", skiprows)
|
| 116 |
+
elif skiprows is None:
|
| 117 |
+
return 0
|
| 118 |
+
raise TypeError(f"{type(skiprows).__name__} is not a valid type for skipping rows")
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
def _read(
|
| 122 |
+
obj: FilePath | BaseBuffer,
|
| 123 |
+
encoding: str | None,
|
| 124 |
+
storage_options: StorageOptions | None,
|
| 125 |
+
) -> str | bytes:
|
| 126 |
+
"""
|
| 127 |
+
Try to read from a url, file or string.
|
| 128 |
+
|
| 129 |
+
Parameters
|
| 130 |
+
----------
|
| 131 |
+
obj : str, unicode, path object, or file-like object
|
| 132 |
+
|
| 133 |
+
Returns
|
| 134 |
+
-------
|
| 135 |
+
raw_text : str
|
| 136 |
+
"""
|
| 137 |
+
text: str | bytes
|
| 138 |
+
if (
|
| 139 |
+
is_url(obj)
|
| 140 |
+
or hasattr(obj, "read")
|
| 141 |
+
or (isinstance(obj, str) and file_exists(obj))
|
| 142 |
+
):
|
| 143 |
+
with get_handle(
|
| 144 |
+
obj, "r", encoding=encoding, storage_options=storage_options
|
| 145 |
+
) as handles:
|
| 146 |
+
text = handles.handle.read()
|
| 147 |
+
elif isinstance(obj, (str, bytes)):
|
| 148 |
+
text = obj
|
| 149 |
+
else:
|
| 150 |
+
raise TypeError(f"Cannot read object of type '{type(obj).__name__}'")
|
| 151 |
+
return text
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
class _HtmlFrameParser:
|
| 155 |
+
"""
|
| 156 |
+
Base class for parsers that parse HTML into DataFrames.
|
| 157 |
+
|
| 158 |
+
Parameters
|
| 159 |
+
----------
|
| 160 |
+
io : str or file-like
|
| 161 |
+
This can be either a string of raw HTML, a valid URL using the HTTP,
|
| 162 |
+
FTP, or FILE protocols or a file-like object.
|
| 163 |
+
|
| 164 |
+
match : str or regex
|
| 165 |
+
The text to match in the document.
|
| 166 |
+
|
| 167 |
+
attrs : dict
|
| 168 |
+
List of HTML <table> element attributes to match.
|
| 169 |
+
|
| 170 |
+
encoding : str
|
| 171 |
+
Encoding to be used by parser
|
| 172 |
+
|
| 173 |
+
displayed_only : bool
|
| 174 |
+
Whether or not items with "display:none" should be ignored
|
| 175 |
+
|
| 176 |
+
extract_links : {None, "all", "header", "body", "footer"}
|
| 177 |
+
Table elements in the specified section(s) with <a> tags will have their
|
| 178 |
+
href extracted.
|
| 179 |
+
|
| 180 |
+
.. versionadded:: 1.5.0
|
| 181 |
+
|
| 182 |
+
Attributes
|
| 183 |
+
----------
|
| 184 |
+
io : str or file-like
|
| 185 |
+
raw HTML, URL, or file-like object
|
| 186 |
+
|
| 187 |
+
match : regex
|
| 188 |
+
The text to match in the raw HTML
|
| 189 |
+
|
| 190 |
+
attrs : dict-like
|
| 191 |
+
A dictionary of valid table attributes to use to search for table
|
| 192 |
+
elements.
|
| 193 |
+
|
| 194 |
+
encoding : str
|
| 195 |
+
Encoding to be used by parser
|
| 196 |
+
|
| 197 |
+
displayed_only : bool
|
| 198 |
+
Whether or not items with "display:none" should be ignored
|
| 199 |
+
|
| 200 |
+
extract_links : {None, "all", "header", "body", "footer"}
|
| 201 |
+
Table elements in the specified section(s) with <a> tags will have their
|
| 202 |
+
href extracted.
|
| 203 |
+
|
| 204 |
+
.. versionadded:: 1.5.0
|
| 205 |
+
|
| 206 |
+
Notes
|
| 207 |
+
-----
|
| 208 |
+
To subclass this class effectively you must override the following methods:
|
| 209 |
+
* :func:`_build_doc`
|
| 210 |
+
* :func:`_attr_getter`
|
| 211 |
+
* :func:`_href_getter`
|
| 212 |
+
* :func:`_text_getter`
|
| 213 |
+
* :func:`_parse_td`
|
| 214 |
+
* :func:`_parse_thead_tr`
|
| 215 |
+
* :func:`_parse_tbody_tr`
|
| 216 |
+
* :func:`_parse_tfoot_tr`
|
| 217 |
+
* :func:`_parse_tables`
|
| 218 |
+
* :func:`_equals_tag`
|
| 219 |
+
See each method's respective documentation for details on their
|
| 220 |
+
functionality.
|
| 221 |
+
"""
|
| 222 |
+
|
| 223 |
+
def __init__(
|
| 224 |
+
self,
|
| 225 |
+
io: FilePath | ReadBuffer[str] | ReadBuffer[bytes],
|
| 226 |
+
match: str | Pattern,
|
| 227 |
+
attrs: dict[str, str] | None,
|
| 228 |
+
encoding: str,
|
| 229 |
+
displayed_only: bool,
|
| 230 |
+
extract_links: Literal[None, "header", "footer", "body", "all"],
|
| 231 |
+
storage_options: StorageOptions = None,
|
| 232 |
+
) -> None:
|
| 233 |
+
self.io = io
|
| 234 |
+
self.match = match
|
| 235 |
+
self.attrs = attrs
|
| 236 |
+
self.encoding = encoding
|
| 237 |
+
self.displayed_only = displayed_only
|
| 238 |
+
self.extract_links = extract_links
|
| 239 |
+
self.storage_options = storage_options
|
| 240 |
+
|
| 241 |
+
def parse_tables(self):
|
| 242 |
+
"""
|
| 243 |
+
Parse and return all tables from the DOM.
|
| 244 |
+
|
| 245 |
+
Returns
|
| 246 |
+
-------
|
| 247 |
+
list of parsed (header, body, footer) tuples from tables.
|
| 248 |
+
"""
|
| 249 |
+
tables = self._parse_tables(self._build_doc(), self.match, self.attrs)
|
| 250 |
+
return (self._parse_thead_tbody_tfoot(table) for table in tables)
|
| 251 |
+
|
| 252 |
+
def _attr_getter(self, obj, attr):
|
| 253 |
+
"""
|
| 254 |
+
Return the attribute value of an individual DOM node.
|
| 255 |
+
|
| 256 |
+
Parameters
|
| 257 |
+
----------
|
| 258 |
+
obj : node-like
|
| 259 |
+
A DOM node.
|
| 260 |
+
|
| 261 |
+
attr : str or unicode
|
| 262 |
+
The attribute, such as "colspan"
|
| 263 |
+
|
| 264 |
+
Returns
|
| 265 |
+
-------
|
| 266 |
+
str or unicode
|
| 267 |
+
The attribute value.
|
| 268 |
+
"""
|
| 269 |
+
# Both lxml and BeautifulSoup have the same implementation:
|
| 270 |
+
return obj.get(attr)
|
| 271 |
+
|
| 272 |
+
def _href_getter(self, obj) -> str | None:
|
| 273 |
+
"""
|
| 274 |
+
Return a href if the DOM node contains a child <a> or None.
|
| 275 |
+
|
| 276 |
+
Parameters
|
| 277 |
+
----------
|
| 278 |
+
obj : node-like
|
| 279 |
+
A DOM node.
|
| 280 |
+
|
| 281 |
+
Returns
|
| 282 |
+
-------
|
| 283 |
+
href : str or unicode
|
| 284 |
+
The href from the <a> child of the DOM node.
|
| 285 |
+
"""
|
| 286 |
+
raise AbstractMethodError(self)
|
| 287 |
+
|
| 288 |
+
def _text_getter(self, obj):
|
| 289 |
+
"""
|
| 290 |
+
Return the text of an individual DOM node.
|
| 291 |
+
|
| 292 |
+
Parameters
|
| 293 |
+
----------
|
| 294 |
+
obj : node-like
|
| 295 |
+
A DOM node.
|
| 296 |
+
|
| 297 |
+
Returns
|
| 298 |
+
-------
|
| 299 |
+
text : str or unicode
|
| 300 |
+
The text from an individual DOM node.
|
| 301 |
+
"""
|
| 302 |
+
raise AbstractMethodError(self)
|
| 303 |
+
|
| 304 |
+
def _parse_td(self, obj):
|
| 305 |
+
"""
|
| 306 |
+
Return the td elements from a row element.
|
| 307 |
+
|
| 308 |
+
Parameters
|
| 309 |
+
----------
|
| 310 |
+
obj : node-like
|
| 311 |
+
A DOM <tr> node.
|
| 312 |
+
|
| 313 |
+
Returns
|
| 314 |
+
-------
|
| 315 |
+
list of node-like
|
| 316 |
+
These are the elements of each row, i.e., the columns.
|
| 317 |
+
"""
|
| 318 |
+
raise AbstractMethodError(self)
|
| 319 |
+
|
| 320 |
+
def _parse_thead_tr(self, table):
|
| 321 |
+
"""
|
| 322 |
+
Return the list of thead row elements from the parsed table element.
|
| 323 |
+
|
| 324 |
+
Parameters
|
| 325 |
+
----------
|
| 326 |
+
table : a table element that contains zero or more thead elements.
|
| 327 |
+
|
| 328 |
+
Returns
|
| 329 |
+
-------
|
| 330 |
+
list of node-like
|
| 331 |
+
These are the <tr> row elements of a table.
|
| 332 |
+
"""
|
| 333 |
+
raise AbstractMethodError(self)
|
| 334 |
+
|
| 335 |
+
def _parse_tbody_tr(self, table):
|
| 336 |
+
"""
|
| 337 |
+
Return the list of tbody row elements from the parsed table element.
|
| 338 |
+
|
| 339 |
+
HTML5 table bodies consist of either 0 or more <tbody> elements (which
|
| 340 |
+
only contain <tr> elements) or 0 or more <tr> elements. This method
|
| 341 |
+
checks for both structures.
|
| 342 |
+
|
| 343 |
+
Parameters
|
| 344 |
+
----------
|
| 345 |
+
table : a table element that contains row elements.
|
| 346 |
+
|
| 347 |
+
Returns
|
| 348 |
+
-------
|
| 349 |
+
list of node-like
|
| 350 |
+
These are the <tr> row elements of a table.
|
| 351 |
+
"""
|
| 352 |
+
raise AbstractMethodError(self)
|
| 353 |
+
|
| 354 |
+
def _parse_tfoot_tr(self, table):
|
| 355 |
+
"""
|
| 356 |
+
Return the list of tfoot row elements from the parsed table element.
|
| 357 |
+
|
| 358 |
+
Parameters
|
| 359 |
+
----------
|
| 360 |
+
table : a table element that contains row elements.
|
| 361 |
+
|
| 362 |
+
Returns
|
| 363 |
+
-------
|
| 364 |
+
list of node-like
|
| 365 |
+
These are the <tr> row elements of a table.
|
| 366 |
+
"""
|
| 367 |
+
raise AbstractMethodError(self)
|
| 368 |
+
|
| 369 |
+
def _parse_tables(self, document, match, attrs):
|
| 370 |
+
"""
|
| 371 |
+
Return all tables from the parsed DOM.
|
| 372 |
+
|
| 373 |
+
Parameters
|
| 374 |
+
----------
|
| 375 |
+
document : the DOM from which to parse the table element.
|
| 376 |
+
|
| 377 |
+
match : str or regular expression
|
| 378 |
+
The text to search for in the DOM tree.
|
| 379 |
+
|
| 380 |
+
attrs : dict
|
| 381 |
+
A dictionary of table attributes that can be used to disambiguate
|
| 382 |
+
multiple tables on a page.
|
| 383 |
+
|
| 384 |
+
Raises
|
| 385 |
+
------
|
| 386 |
+
ValueError : `match` does not match any text in the document.
|
| 387 |
+
|
| 388 |
+
Returns
|
| 389 |
+
-------
|
| 390 |
+
list of node-like
|
| 391 |
+
HTML <table> elements to be parsed into raw data.
|
| 392 |
+
"""
|
| 393 |
+
raise AbstractMethodError(self)
|
| 394 |
+
|
| 395 |
+
def _equals_tag(self, obj, tag) -> bool:
|
| 396 |
+
"""
|
| 397 |
+
Return whether an individual DOM node matches a tag
|
| 398 |
+
|
| 399 |
+
Parameters
|
| 400 |
+
----------
|
| 401 |
+
obj : node-like
|
| 402 |
+
A DOM node.
|
| 403 |
+
|
| 404 |
+
tag : str
|
| 405 |
+
Tag name to be checked for equality.
|
| 406 |
+
|
| 407 |
+
Returns
|
| 408 |
+
-------
|
| 409 |
+
boolean
|
| 410 |
+
Whether `obj`'s tag name is `tag`
|
| 411 |
+
"""
|
| 412 |
+
raise AbstractMethodError(self)
|
| 413 |
+
|
| 414 |
+
def _build_doc(self):
|
| 415 |
+
"""
|
| 416 |
+
Return a tree-like object that can be used to iterate over the DOM.
|
| 417 |
+
|
| 418 |
+
Returns
|
| 419 |
+
-------
|
| 420 |
+
node-like
|
| 421 |
+
The DOM from which to parse the table element.
|
| 422 |
+
"""
|
| 423 |
+
raise AbstractMethodError(self)
|
| 424 |
+
|
| 425 |
+
def _parse_thead_tbody_tfoot(self, table_html):
|
| 426 |
+
"""
|
| 427 |
+
Given a table, return parsed header, body, and foot.
|
| 428 |
+
|
| 429 |
+
Parameters
|
| 430 |
+
----------
|
| 431 |
+
table_html : node-like
|
| 432 |
+
|
| 433 |
+
Returns
|
| 434 |
+
-------
|
| 435 |
+
tuple of (header, body, footer), each a list of list-of-text rows.
|
| 436 |
+
|
| 437 |
+
Notes
|
| 438 |
+
-----
|
| 439 |
+
Header and body are lists-of-lists. Top level list is a list of
|
| 440 |
+
rows. Each row is a list of str text.
|
| 441 |
+
|
| 442 |
+
Logic: Use <thead>, <tbody>, <tfoot> elements to identify
|
| 443 |
+
header, body, and footer, otherwise:
|
| 444 |
+
- Put all rows into body
|
| 445 |
+
- Move rows from top of body to header only if
|
| 446 |
+
all elements inside row are <th>
|
| 447 |
+
- Move rows from bottom of body to footer only if
|
| 448 |
+
all elements inside row are <th>
|
| 449 |
+
"""
|
| 450 |
+
header_rows = self._parse_thead_tr(table_html)
|
| 451 |
+
body_rows = self._parse_tbody_tr(table_html)
|
| 452 |
+
footer_rows = self._parse_tfoot_tr(table_html)
|
| 453 |
+
|
| 454 |
+
def row_is_all_th(row):
|
| 455 |
+
return all(self._equals_tag(t, "th") for t in self._parse_td(row))
|
| 456 |
+
|
| 457 |
+
if not header_rows:
|
| 458 |
+
# The table has no <thead>. Move the top all-<th> rows from
|
| 459 |
+
# body_rows to header_rows. (This is a common case because many
|
| 460 |
+
# tables in the wild have no <thead> or <tfoot>
|
| 461 |
+
while body_rows and row_is_all_th(body_rows[0]):
|
| 462 |
+
header_rows.append(body_rows.pop(0))
|
| 463 |
+
|
| 464 |
+
header = self._expand_colspan_rowspan(header_rows, section="header")
|
| 465 |
+
body = self._expand_colspan_rowspan(body_rows, section="body")
|
| 466 |
+
footer = self._expand_colspan_rowspan(footer_rows, section="footer")
|
| 467 |
+
|
| 468 |
+
return header, body, footer
|
| 469 |
+
|
| 470 |
+
def _expand_colspan_rowspan(
|
| 471 |
+
self, rows, section: Literal["header", "footer", "body"]
|
| 472 |
+
):
|
| 473 |
+
"""
|
| 474 |
+
Given a list of <tr>s, return a list of text rows.
|
| 475 |
+
|
| 476 |
+
Parameters
|
| 477 |
+
----------
|
| 478 |
+
rows : list of node-like
|
| 479 |
+
List of <tr>s
|
| 480 |
+
section : the section that the rows belong to (header, body or footer).
|
| 481 |
+
|
| 482 |
+
Returns
|
| 483 |
+
-------
|
| 484 |
+
list of list
|
| 485 |
+
Each returned row is a list of str text, or tuple (text, link)
|
| 486 |
+
if extract_links is not None.
|
| 487 |
+
|
| 488 |
+
Notes
|
| 489 |
+
-----
|
| 490 |
+
Any cell with ``rowspan`` or ``colspan`` will have its contents copied
|
| 491 |
+
to subsequent cells.
|
| 492 |
+
"""
|
| 493 |
+
all_texts = [] # list of rows, each a list of str
|
| 494 |
+
text: str | tuple
|
| 495 |
+
remainder: list[
|
| 496 |
+
tuple[int, str | tuple, int]
|
| 497 |
+
] = [] # list of (index, text, nrows)
|
| 498 |
+
|
| 499 |
+
for tr in rows:
|
| 500 |
+
texts = [] # the output for this row
|
| 501 |
+
next_remainder = []
|
| 502 |
+
|
| 503 |
+
index = 0
|
| 504 |
+
tds = self._parse_td(tr)
|
| 505 |
+
for td in tds:
|
| 506 |
+
# Append texts from previous rows with rowspan>1 that come
|
| 507 |
+
# before this <td>
|
| 508 |
+
while remainder and remainder[0][0] <= index:
|
| 509 |
+
prev_i, prev_text, prev_rowspan = remainder.pop(0)
|
| 510 |
+
texts.append(prev_text)
|
| 511 |
+
if prev_rowspan > 1:
|
| 512 |
+
next_remainder.append((prev_i, prev_text, prev_rowspan - 1))
|
| 513 |
+
index += 1
|
| 514 |
+
|
| 515 |
+
# Append the text from this <td>, colspan times
|
| 516 |
+
text = _remove_whitespace(self._text_getter(td))
|
| 517 |
+
if self.extract_links in ("all", section):
|
| 518 |
+
href = self._href_getter(td)
|
| 519 |
+
text = (text, href)
|
| 520 |
+
rowspan = int(self._attr_getter(td, "rowspan") or 1)
|
| 521 |
+
colspan = int(self._attr_getter(td, "colspan") or 1)
|
| 522 |
+
|
| 523 |
+
for _ in range(colspan):
|
| 524 |
+
texts.append(text)
|
| 525 |
+
if rowspan > 1:
|
| 526 |
+
next_remainder.append((index, text, rowspan - 1))
|
| 527 |
+
index += 1
|
| 528 |
+
|
| 529 |
+
# Append texts from previous rows at the final position
|
| 530 |
+
for prev_i, prev_text, prev_rowspan in remainder:
|
| 531 |
+
texts.append(prev_text)
|
| 532 |
+
if prev_rowspan > 1:
|
| 533 |
+
next_remainder.append((prev_i, prev_text, prev_rowspan - 1))
|
| 534 |
+
|
| 535 |
+
all_texts.append(texts)
|
| 536 |
+
remainder = next_remainder
|
| 537 |
+
|
| 538 |
+
# Append rows that only appear because the previous row had non-1
|
| 539 |
+
# rowspan
|
| 540 |
+
while remainder:
|
| 541 |
+
next_remainder = []
|
| 542 |
+
texts = []
|
| 543 |
+
for prev_i, prev_text, prev_rowspan in remainder:
|
| 544 |
+
texts.append(prev_text)
|
| 545 |
+
if prev_rowspan > 1:
|
| 546 |
+
next_remainder.append((prev_i, prev_text, prev_rowspan - 1))
|
| 547 |
+
all_texts.append(texts)
|
| 548 |
+
remainder = next_remainder
|
| 549 |
+
|
| 550 |
+
return all_texts
|
| 551 |
+
|
| 552 |
+
def _handle_hidden_tables(self, tbl_list, attr_name: str):
|
| 553 |
+
"""
|
| 554 |
+
Return list of tables, potentially removing hidden elements
|
| 555 |
+
|
| 556 |
+
Parameters
|
| 557 |
+
----------
|
| 558 |
+
tbl_list : list of node-like
|
| 559 |
+
Type of list elements will vary depending upon parser used
|
| 560 |
+
attr_name : str
|
| 561 |
+
Name of the accessor for retrieving HTML attributes
|
| 562 |
+
|
| 563 |
+
Returns
|
| 564 |
+
-------
|
| 565 |
+
list of node-like
|
| 566 |
+
Return type matches `tbl_list`
|
| 567 |
+
"""
|
| 568 |
+
if not self.displayed_only:
|
| 569 |
+
return tbl_list
|
| 570 |
+
|
| 571 |
+
return [
|
| 572 |
+
x
|
| 573 |
+
for x in tbl_list
|
| 574 |
+
if "display:none"
|
| 575 |
+
not in getattr(x, attr_name).get("style", "").replace(" ", "")
|
| 576 |
+
]
|
| 577 |
+
|
| 578 |
+
|
| 579 |
+
class _BeautifulSoupHtml5LibFrameParser(_HtmlFrameParser):
|
| 580 |
+
"""
|
| 581 |
+
HTML to DataFrame parser that uses BeautifulSoup under the hood.
|
| 582 |
+
|
| 583 |
+
See Also
|
| 584 |
+
--------
|
| 585 |
+
pandas.io.html._HtmlFrameParser
|
| 586 |
+
pandas.io.html._LxmlFrameParser
|
| 587 |
+
|
| 588 |
+
Notes
|
| 589 |
+
-----
|
| 590 |
+
Documentation strings for this class are in the base class
|
| 591 |
+
:class:`pandas.io.html._HtmlFrameParser`.
|
| 592 |
+
"""
|
| 593 |
+
|
| 594 |
+
def _parse_tables(self, document, match, attrs):
|
| 595 |
+
element_name = "table"
|
| 596 |
+
tables = document.find_all(element_name, attrs=attrs)
|
| 597 |
+
if not tables:
|
| 598 |
+
raise ValueError("No tables found")
|
| 599 |
+
|
| 600 |
+
result = []
|
| 601 |
+
unique_tables = set()
|
| 602 |
+
tables = self._handle_hidden_tables(tables, "attrs")
|
| 603 |
+
|
| 604 |
+
for table in tables:
|
| 605 |
+
if self.displayed_only:
|
| 606 |
+
for elem in table.find_all("style"):
|
| 607 |
+
elem.decompose()
|
| 608 |
+
|
| 609 |
+
for elem in table.find_all(style=re.compile(r"display:\s*none")):
|
| 610 |
+
elem.decompose()
|
| 611 |
+
|
| 612 |
+
if table not in unique_tables and table.find(string=match) is not None:
|
| 613 |
+
result.append(table)
|
| 614 |
+
unique_tables.add(table)
|
| 615 |
+
if not result:
|
| 616 |
+
raise ValueError(f"No tables found matching pattern {repr(match.pattern)}")
|
| 617 |
+
return result
|
| 618 |
+
|
| 619 |
+
def _href_getter(self, obj) -> str | None:
|
| 620 |
+
a = obj.find("a", href=True)
|
| 621 |
+
return None if not a else a["href"]
|
| 622 |
+
|
| 623 |
+
def _text_getter(self, obj):
|
| 624 |
+
return obj.text
|
| 625 |
+
|
| 626 |
+
def _equals_tag(self, obj, tag) -> bool:
|
| 627 |
+
return obj.name == tag
|
| 628 |
+
|
| 629 |
+
def _parse_td(self, row):
|
| 630 |
+
return row.find_all(("td", "th"), recursive=False)
|
| 631 |
+
|
| 632 |
+
def _parse_thead_tr(self, table):
|
| 633 |
+
return table.select("thead tr")
|
| 634 |
+
|
| 635 |
+
def _parse_tbody_tr(self, table):
|
| 636 |
+
from_tbody = table.select("tbody tr")
|
| 637 |
+
from_root = table.find_all("tr", recursive=False)
|
| 638 |
+
# HTML spec: at most one of these lists has content
|
| 639 |
+
return from_tbody + from_root
|
| 640 |
+
|
| 641 |
+
def _parse_tfoot_tr(self, table):
|
| 642 |
+
return table.select("tfoot tr")
|
| 643 |
+
|
| 644 |
+
def _setup_build_doc(self):
|
| 645 |
+
raw_text = _read(self.io, self.encoding, self.storage_options)
|
| 646 |
+
if not raw_text:
|
| 647 |
+
raise ValueError(f"No text parsed from document: {self.io}")
|
| 648 |
+
return raw_text
|
| 649 |
+
|
| 650 |
+
def _build_doc(self):
|
| 651 |
+
from bs4 import BeautifulSoup
|
| 652 |
+
|
| 653 |
+
bdoc = self._setup_build_doc()
|
| 654 |
+
if isinstance(bdoc, bytes) and self.encoding is not None:
|
| 655 |
+
udoc = bdoc.decode(self.encoding)
|
| 656 |
+
from_encoding = None
|
| 657 |
+
else:
|
| 658 |
+
udoc = bdoc
|
| 659 |
+
from_encoding = self.encoding
|
| 660 |
+
|
| 661 |
+
soup = BeautifulSoup(udoc, features="html5lib", from_encoding=from_encoding)
|
| 662 |
+
|
| 663 |
+
for br in soup.find_all("br"):
|
| 664 |
+
br.replace_with("\n" + br.text)
|
| 665 |
+
|
| 666 |
+
return soup
|
| 667 |
+
|
| 668 |
+
|
| 669 |
+
def _build_xpath_expr(attrs) -> str:
|
| 670 |
+
"""
|
| 671 |
+
Build an xpath expression to simulate bs4's ability to pass in kwargs to
|
| 672 |
+
search for attributes when using the lxml parser.
|
| 673 |
+
|
| 674 |
+
Parameters
|
| 675 |
+
----------
|
| 676 |
+
attrs : dict
|
| 677 |
+
A dict of HTML attributes. These are NOT checked for validity.
|
| 678 |
+
|
| 679 |
+
Returns
|
| 680 |
+
-------
|
| 681 |
+
expr : unicode
|
| 682 |
+
An XPath expression that checks for the given HTML attributes.
|
| 683 |
+
"""
|
| 684 |
+
# give class attribute as class_ because class is a python keyword
|
| 685 |
+
if "class_" in attrs:
|
| 686 |
+
attrs["class"] = attrs.pop("class_")
|
| 687 |
+
|
| 688 |
+
s = " and ".join([f"@{k}={repr(v)}" for k, v in attrs.items()])
|
| 689 |
+
return f"[{s}]"
|
| 690 |
+
|
| 691 |
+
|
| 692 |
+
_re_namespace = {"re": "http://exslt.org/regular-expressions"}
|
| 693 |
+
|
| 694 |
+
|
| 695 |
+
class _LxmlFrameParser(_HtmlFrameParser):
|
| 696 |
+
"""
|
| 697 |
+
HTML to DataFrame parser that uses lxml under the hood.
|
| 698 |
+
|
| 699 |
+
Warning
|
| 700 |
+
-------
|
| 701 |
+
This parser can only handle HTTP, FTP, and FILE urls.
|
| 702 |
+
|
| 703 |
+
See Also
|
| 704 |
+
--------
|
| 705 |
+
_HtmlFrameParser
|
| 706 |
+
_BeautifulSoupLxmlFrameParser
|
| 707 |
+
|
| 708 |
+
Notes
|
| 709 |
+
-----
|
| 710 |
+
Documentation strings for this class are in the base class
|
| 711 |
+
:class:`_HtmlFrameParser`.
|
| 712 |
+
"""
|
| 713 |
+
|
| 714 |
+
def _href_getter(self, obj) -> str | None:
|
| 715 |
+
href = obj.xpath(".//a/@href")
|
| 716 |
+
return None if not href else href[0]
|
| 717 |
+
|
| 718 |
+
def _text_getter(self, obj):
|
| 719 |
+
return obj.text_content()
|
| 720 |
+
|
| 721 |
+
def _parse_td(self, row):
|
| 722 |
+
# Look for direct children only: the "row" element here may be a
|
| 723 |
+
# <thead> or <tfoot> (see _parse_thead_tr).
|
| 724 |
+
return row.xpath("./td|./th")
|
| 725 |
+
|
| 726 |
+
def _parse_tables(self, document, match, kwargs):
|
| 727 |
+
pattern = match.pattern
|
| 728 |
+
|
| 729 |
+
# 1. check all descendants for the given pattern and only search tables
|
| 730 |
+
# GH 49929
|
| 731 |
+
xpath_expr = f"//table[.//text()[re:test(., {repr(pattern)})]]"
|
| 732 |
+
|
| 733 |
+
# if any table attributes were given build an xpath expression to
|
| 734 |
+
# search for them
|
| 735 |
+
if kwargs:
|
| 736 |
+
xpath_expr += _build_xpath_expr(kwargs)
|
| 737 |
+
|
| 738 |
+
tables = document.xpath(xpath_expr, namespaces=_re_namespace)
|
| 739 |
+
|
| 740 |
+
tables = self._handle_hidden_tables(tables, "attrib")
|
| 741 |
+
if self.displayed_only:
|
| 742 |
+
for table in tables:
|
| 743 |
+
# lxml utilizes XPATH 1.0 which does not have regex
|
| 744 |
+
# support. As a result, we find all elements with a style
|
| 745 |
+
# attribute and iterate them to check for display:none
|
| 746 |
+
for elem in table.xpath(".//style"):
|
| 747 |
+
elem.drop_tree()
|
| 748 |
+
for elem in table.xpath(".//*[@style]"):
|
| 749 |
+
if "display:none" in elem.attrib.get("style", "").replace(" ", ""):
|
| 750 |
+
elem.drop_tree()
|
| 751 |
+
if not tables:
|
| 752 |
+
raise ValueError(f"No tables found matching regex {repr(pattern)}")
|
| 753 |
+
return tables
|
| 754 |
+
|
| 755 |
+
def _equals_tag(self, obj, tag) -> bool:
|
| 756 |
+
return obj.tag == tag
|
| 757 |
+
|
| 758 |
+
def _build_doc(self):
|
| 759 |
+
"""
|
| 760 |
+
Raises
|
| 761 |
+
------
|
| 762 |
+
ValueError
|
| 763 |
+
* If a URL that lxml cannot parse is passed.
|
| 764 |
+
|
| 765 |
+
Exception
|
| 766 |
+
* Any other ``Exception`` thrown. For example, trying to parse a
|
| 767 |
+
URL that is syntactically correct on a machine with no internet
|
| 768 |
+
connection will fail.
|
| 769 |
+
|
| 770 |
+
See Also
|
| 771 |
+
--------
|
| 772 |
+
pandas.io.html._HtmlFrameParser._build_doc
|
| 773 |
+
"""
|
| 774 |
+
from lxml.etree import XMLSyntaxError
|
| 775 |
+
from lxml.html import (
|
| 776 |
+
HTMLParser,
|
| 777 |
+
fromstring,
|
| 778 |
+
parse,
|
| 779 |
+
)
|
| 780 |
+
|
| 781 |
+
parser = HTMLParser(recover=True, encoding=self.encoding)
|
| 782 |
+
|
| 783 |
+
try:
|
| 784 |
+
if is_url(self.io):
|
| 785 |
+
with get_handle(
|
| 786 |
+
self.io, "r", storage_options=self.storage_options
|
| 787 |
+
) as f:
|
| 788 |
+
r = parse(f.handle, parser=parser)
|
| 789 |
+
else:
|
| 790 |
+
# try to parse the input in the simplest way
|
| 791 |
+
r = parse(self.io, parser=parser)
|
| 792 |
+
try:
|
| 793 |
+
r = r.getroot()
|
| 794 |
+
except AttributeError:
|
| 795 |
+
pass
|
| 796 |
+
except (UnicodeDecodeError, OSError) as e:
|
| 797 |
+
# if the input is a blob of html goop
|
| 798 |
+
if not is_url(self.io):
|
| 799 |
+
r = fromstring(self.io, parser=parser)
|
| 800 |
+
|
| 801 |
+
try:
|
| 802 |
+
r = r.getroot()
|
| 803 |
+
except AttributeError:
|
| 804 |
+
pass
|
| 805 |
+
else:
|
| 806 |
+
raise e
|
| 807 |
+
else:
|
| 808 |
+
if not hasattr(r, "text_content"):
|
| 809 |
+
raise XMLSyntaxError("no text parsed from document", 0, 0, 0)
|
| 810 |
+
|
| 811 |
+
for br in r.xpath("*//br"):
|
| 812 |
+
br.tail = "\n" + (br.tail or "")
|
| 813 |
+
|
| 814 |
+
return r
|
| 815 |
+
|
| 816 |
+
def _parse_thead_tr(self, table):
|
| 817 |
+
rows = []
|
| 818 |
+
|
| 819 |
+
for thead in table.xpath(".//thead"):
|
| 820 |
+
rows.extend(thead.xpath("./tr"))
|
| 821 |
+
|
| 822 |
+
# HACK: lxml does not clean up the clearly-erroneous
|
| 823 |
+
# <thead><th>foo</th><th>bar</th></thead>. (Missing <tr>). Add
|
| 824 |
+
# the <thead> and _pretend_ it's a <tr>; _parse_td() will find its
|
| 825 |
+
# children as though it's a <tr>.
|
| 826 |
+
#
|
| 827 |
+
# Better solution would be to use html5lib.
|
| 828 |
+
elements_at_root = thead.xpath("./td|./th")
|
| 829 |
+
if elements_at_root:
|
| 830 |
+
rows.append(thead)
|
| 831 |
+
|
| 832 |
+
return rows
|
| 833 |
+
|
| 834 |
+
def _parse_tbody_tr(self, table):
|
| 835 |
+
from_tbody = table.xpath(".//tbody//tr")
|
| 836 |
+
from_root = table.xpath("./tr")
|
| 837 |
+
# HTML spec: at most one of these lists has content
|
| 838 |
+
return from_tbody + from_root
|
| 839 |
+
|
| 840 |
+
def _parse_tfoot_tr(self, table):
|
| 841 |
+
return table.xpath(".//tfoot//tr")
|
| 842 |
+
|
| 843 |
+
|
| 844 |
+
def _expand_elements(body) -> None:
|
| 845 |
+
data = [len(elem) for elem in body]
|
| 846 |
+
lens = Series(data)
|
| 847 |
+
lens_max = lens.max()
|
| 848 |
+
not_max = lens[lens != lens_max]
|
| 849 |
+
|
| 850 |
+
empty = [""]
|
| 851 |
+
for ind, length in not_max.items():
|
| 852 |
+
body[ind] += empty * (lens_max - length)
|
| 853 |
+
|
| 854 |
+
|
| 855 |
+
def _data_to_frame(**kwargs):
|
| 856 |
+
head, body, foot = kwargs.pop("data")
|
| 857 |
+
header = kwargs.pop("header")
|
| 858 |
+
kwargs["skiprows"] = _get_skiprows(kwargs["skiprows"])
|
| 859 |
+
if head:
|
| 860 |
+
body = head + body
|
| 861 |
+
|
| 862 |
+
# Infer header when there is a <thead> or top <th>-only rows
|
| 863 |
+
if header is None:
|
| 864 |
+
if len(head) == 1:
|
| 865 |
+
header = 0
|
| 866 |
+
else:
|
| 867 |
+
# ignore all-empty-text rows
|
| 868 |
+
header = [i for i, row in enumerate(head) if any(text for text in row)]
|
| 869 |
+
|
| 870 |
+
if foot:
|
| 871 |
+
body += foot
|
| 872 |
+
|
| 873 |
+
# fill out elements of body that are "ragged"
|
| 874 |
+
_expand_elements(body)
|
| 875 |
+
with TextParser(body, header=header, **kwargs) as tp:
|
| 876 |
+
return tp.read()
|
| 877 |
+
|
| 878 |
+
|
| 879 |
+
_valid_parsers = {
|
| 880 |
+
"lxml": _LxmlFrameParser,
|
| 881 |
+
None: _LxmlFrameParser,
|
| 882 |
+
"html5lib": _BeautifulSoupHtml5LibFrameParser,
|
| 883 |
+
"bs4": _BeautifulSoupHtml5LibFrameParser,
|
| 884 |
+
}
|
| 885 |
+
|
| 886 |
+
|
| 887 |
+
def _parser_dispatch(flavor: HTMLFlavors | None) -> type[_HtmlFrameParser]:
|
| 888 |
+
"""
|
| 889 |
+
Choose the parser based on the input flavor.
|
| 890 |
+
|
| 891 |
+
Parameters
|
| 892 |
+
----------
|
| 893 |
+
flavor : {{"lxml", "html5lib", "bs4"}} or None
|
| 894 |
+
The type of parser to use. This must be a valid backend.
|
| 895 |
+
|
| 896 |
+
Returns
|
| 897 |
+
-------
|
| 898 |
+
cls : _HtmlFrameParser subclass
|
| 899 |
+
The parser class based on the requested input flavor.
|
| 900 |
+
|
| 901 |
+
Raises
|
| 902 |
+
------
|
| 903 |
+
ValueError
|
| 904 |
+
* If `flavor` is not a valid backend.
|
| 905 |
+
ImportError
|
| 906 |
+
* If you do not have the requested `flavor`
|
| 907 |
+
"""
|
| 908 |
+
valid_parsers = list(_valid_parsers.keys())
|
| 909 |
+
if flavor not in valid_parsers:
|
| 910 |
+
raise ValueError(
|
| 911 |
+
f"{repr(flavor)} is not a valid flavor, valid flavors are {valid_parsers}"
|
| 912 |
+
)
|
| 913 |
+
|
| 914 |
+
if flavor in ("bs4", "html5lib"):
|
| 915 |
+
import_optional_dependency("html5lib")
|
| 916 |
+
import_optional_dependency("bs4")
|
| 917 |
+
else:
|
| 918 |
+
import_optional_dependency("lxml.etree")
|
| 919 |
+
return _valid_parsers[flavor]
|
| 920 |
+
|
| 921 |
+
|
| 922 |
+
def _print_as_set(s) -> str:
|
| 923 |
+
arg = ", ".join([pprint_thing(el) for el in s])
|
| 924 |
+
return f"{{{arg}}}"
|
| 925 |
+
|
| 926 |
+
|
| 927 |
+
def _validate_flavor(flavor):
|
| 928 |
+
if flavor is None:
|
| 929 |
+
flavor = "lxml", "bs4"
|
| 930 |
+
elif isinstance(flavor, str):
|
| 931 |
+
flavor = (flavor,)
|
| 932 |
+
elif isinstance(flavor, abc.Iterable):
|
| 933 |
+
if not all(isinstance(flav, str) for flav in flavor):
|
| 934 |
+
raise TypeError(
|
| 935 |
+
f"Object of type {repr(type(flavor).__name__)} "
|
| 936 |
+
f"is not an iterable of strings"
|
| 937 |
+
)
|
| 938 |
+
else:
|
| 939 |
+
msg = repr(flavor) if isinstance(flavor, str) else str(flavor)
|
| 940 |
+
msg += " is not a valid flavor"
|
| 941 |
+
raise ValueError(msg)
|
| 942 |
+
|
| 943 |
+
flavor = tuple(flavor)
|
| 944 |
+
valid_flavors = set(_valid_parsers)
|
| 945 |
+
flavor_set = set(flavor)
|
| 946 |
+
|
| 947 |
+
if not flavor_set & valid_flavors:
|
| 948 |
+
raise ValueError(
|
| 949 |
+
f"{_print_as_set(flavor_set)} is not a valid set of flavors, valid "
|
| 950 |
+
f"flavors are {_print_as_set(valid_flavors)}"
|
| 951 |
+
)
|
| 952 |
+
return flavor
|
| 953 |
+
|
| 954 |
+
|
| 955 |
+
def _parse(
|
| 956 |
+
flavor,
|
| 957 |
+
io,
|
| 958 |
+
match,
|
| 959 |
+
attrs,
|
| 960 |
+
encoding,
|
| 961 |
+
displayed_only,
|
| 962 |
+
extract_links,
|
| 963 |
+
storage_options,
|
| 964 |
+
**kwargs,
|
| 965 |
+
):
|
| 966 |
+
flavor = _validate_flavor(flavor)
|
| 967 |
+
compiled_match = re.compile(match) # you can pass a compiled regex here
|
| 968 |
+
|
| 969 |
+
retained = None
|
| 970 |
+
for flav in flavor:
|
| 971 |
+
parser = _parser_dispatch(flav)
|
| 972 |
+
p = parser(
|
| 973 |
+
io,
|
| 974 |
+
compiled_match,
|
| 975 |
+
attrs,
|
| 976 |
+
encoding,
|
| 977 |
+
displayed_only,
|
| 978 |
+
extract_links,
|
| 979 |
+
storage_options,
|
| 980 |
+
)
|
| 981 |
+
|
| 982 |
+
try:
|
| 983 |
+
tables = p.parse_tables()
|
| 984 |
+
except ValueError as caught:
|
| 985 |
+
# if `io` is an io-like object, check if it's seekable
|
| 986 |
+
# and try to rewind it before trying the next parser
|
| 987 |
+
if hasattr(io, "seekable") and io.seekable():
|
| 988 |
+
io.seek(0)
|
| 989 |
+
elif hasattr(io, "seekable") and not io.seekable():
|
| 990 |
+
# if we couldn't rewind it, let the user know
|
| 991 |
+
raise ValueError(
|
| 992 |
+
f"The flavor {flav} failed to parse your input. "
|
| 993 |
+
"Since you passed a non-rewindable file "
|
| 994 |
+
"object, we can't rewind it to try "
|
| 995 |
+
"another parser. Try read_html() with a different flavor."
|
| 996 |
+
) from caught
|
| 997 |
+
|
| 998 |
+
retained = caught
|
| 999 |
+
else:
|
| 1000 |
+
break
|
| 1001 |
+
else:
|
| 1002 |
+
assert retained is not None # for mypy
|
| 1003 |
+
raise retained
|
| 1004 |
+
|
| 1005 |
+
ret = []
|
| 1006 |
+
for table in tables:
|
| 1007 |
+
try:
|
| 1008 |
+
df = _data_to_frame(data=table, **kwargs)
|
| 1009 |
+
# Cast MultiIndex header to an Index of tuples when extracting header
|
| 1010 |
+
# links and replace nan with None (therefore can't use mi.to_flat_index()).
|
| 1011 |
+
# This maintains consistency of selection (e.g. df.columns.str[1])
|
| 1012 |
+
if extract_links in ("all", "header") and isinstance(
|
| 1013 |
+
df.columns, MultiIndex
|
| 1014 |
+
):
|
| 1015 |
+
df.columns = Index(
|
| 1016 |
+
((col[0], None if isna(col[1]) else col[1]) for col in df.columns),
|
| 1017 |
+
tupleize_cols=False,
|
| 1018 |
+
)
|
| 1019 |
+
|
| 1020 |
+
ret.append(df)
|
| 1021 |
+
except EmptyDataError: # empty table
|
| 1022 |
+
continue
|
| 1023 |
+
return ret
|
| 1024 |
+
|
| 1025 |
+
|
| 1026 |
+
@doc(storage_options=_shared_docs["storage_options"])
|
| 1027 |
+
def read_html(
|
| 1028 |
+
io: FilePath | ReadBuffer[str],
|
| 1029 |
+
*,
|
| 1030 |
+
match: str | Pattern = ".+",
|
| 1031 |
+
flavor: HTMLFlavors | Sequence[HTMLFlavors] | None = None,
|
| 1032 |
+
header: int | Sequence[int] | None = None,
|
| 1033 |
+
index_col: int | Sequence[int] | None = None,
|
| 1034 |
+
skiprows: int | Sequence[int] | slice | None = None,
|
| 1035 |
+
attrs: dict[str, str] | None = None,
|
| 1036 |
+
parse_dates: bool = False,
|
| 1037 |
+
thousands: str | None = ",",
|
| 1038 |
+
encoding: str | None = None,
|
| 1039 |
+
decimal: str = ".",
|
| 1040 |
+
converters: dict | None = None,
|
| 1041 |
+
na_values: Iterable[object] | None = None,
|
| 1042 |
+
keep_default_na: bool = True,
|
| 1043 |
+
displayed_only: bool = True,
|
| 1044 |
+
extract_links: Literal[None, "header", "footer", "body", "all"] = None,
|
| 1045 |
+
dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
|
| 1046 |
+
storage_options: StorageOptions = None,
|
| 1047 |
+
) -> list[DataFrame]:
|
| 1048 |
+
r"""
|
| 1049 |
+
Read HTML tables into a ``list`` of ``DataFrame`` objects.
|
| 1050 |
+
|
| 1051 |
+
Parameters
|
| 1052 |
+
----------
|
| 1053 |
+
io : str, path object, or file-like object
|
| 1054 |
+
String, path object (implementing ``os.PathLike[str]``), or file-like
|
| 1055 |
+
object implementing a string ``read()`` function.
|
| 1056 |
+
The string can represent a URL or the HTML itself. Note that
|
| 1057 |
+
lxml only accepts the http, ftp and file url protocols. If you have a
|
| 1058 |
+
URL that starts with ``'https'`` you might try removing the ``'s'``.
|
| 1059 |
+
|
| 1060 |
+
.. deprecated:: 2.1.0
|
| 1061 |
+
Passing html literal strings is deprecated.
|
| 1062 |
+
Wrap literal string/bytes input in ``io.StringIO``/``io.BytesIO`` instead.
|
| 1063 |
+
|
| 1064 |
+
match : str or compiled regular expression, optional
|
| 1065 |
+
The set of tables containing text matching this regex or string will be
|
| 1066 |
+
returned. Unless the HTML is extremely simple you will probably need to
|
| 1067 |
+
pass a non-empty string here. Defaults to '.+' (match any non-empty
|
| 1068 |
+
string). The default value will return all tables contained on a page.
|
| 1069 |
+
This value is converted to a regular expression so that there is
|
| 1070 |
+
consistent behavior between Beautiful Soup and lxml.
|
| 1071 |
+
|
| 1072 |
+
flavor : {{"lxml", "html5lib", "bs4"}} or list-like, optional
|
| 1073 |
+
The parsing engine (or list of parsing engines) to use. 'bs4' and
|
| 1074 |
+
'html5lib' are synonymous with each other, they are both there for
|
| 1075 |
+
backwards compatibility. The default of ``None`` tries to use ``lxml``
|
| 1076 |
+
to parse and if that fails it falls back on ``bs4`` + ``html5lib``.
|
| 1077 |
+
|
| 1078 |
+
header : int or list-like, optional
|
| 1079 |
+
The row (or list of rows for a :class:`~pandas.MultiIndex`) to use to
|
| 1080 |
+
make the columns headers.
|
| 1081 |
+
|
| 1082 |
+
index_col : int or list-like, optional
|
| 1083 |
+
The column (or list of columns) to use to create the index.
|
| 1084 |
+
|
| 1085 |
+
skiprows : int, list-like or slice, optional
|
| 1086 |
+
Number of rows to skip after parsing the column integer. 0-based. If a
|
| 1087 |
+
sequence of integers or a slice is given, will skip the rows indexed by
|
| 1088 |
+
that sequence. Note that a single element sequence means 'skip the nth
|
| 1089 |
+
row' whereas an integer means 'skip n rows'.
|
| 1090 |
+
|
| 1091 |
+
attrs : dict, optional
|
| 1092 |
+
This is a dictionary of attributes that you can pass to use to identify
|
| 1093 |
+
the table in the HTML. These are not checked for validity before being
|
| 1094 |
+
passed to lxml or Beautiful Soup. However, these attributes must be
|
| 1095 |
+
valid HTML table attributes to work correctly. For example, ::
|
| 1096 |
+
|
| 1097 |
+
attrs = {{'id': 'table'}}
|
| 1098 |
+
|
| 1099 |
+
is a valid attribute dictionary because the 'id' HTML tag attribute is
|
| 1100 |
+
a valid HTML attribute for *any* HTML tag as per `this document
|
| 1101 |
+
<https://html.spec.whatwg.org/multipage/dom.html#global-attributes>`__. ::
|
| 1102 |
+
|
| 1103 |
+
attrs = {{'asdf': 'table'}}
|
| 1104 |
+
|
| 1105 |
+
is *not* a valid attribute dictionary because 'asdf' is not a valid
|
| 1106 |
+
HTML attribute even if it is a valid XML attribute. Valid HTML 4.01
|
| 1107 |
+
table attributes can be found `here
|
| 1108 |
+
<http://www.w3.org/TR/REC-html40/struct/tables.html#h-11.2>`__. A
|
| 1109 |
+
working draft of the HTML 5 spec can be found `here
|
| 1110 |
+
<https://html.spec.whatwg.org/multipage/tables.html>`__. It contains the
|
| 1111 |
+
latest information on table attributes for the modern web.
|
| 1112 |
+
|
| 1113 |
+
parse_dates : bool, optional
|
| 1114 |
+
See :func:`~read_csv` for more details.
|
| 1115 |
+
|
| 1116 |
+
thousands : str, optional
|
| 1117 |
+
Separator to use to parse thousands. Defaults to ``','``.
|
| 1118 |
+
|
| 1119 |
+
encoding : str, optional
|
| 1120 |
+
The encoding used to decode the web page. Defaults to ``None``.``None``
|
| 1121 |
+
preserves the previous encoding behavior, which depends on the
|
| 1122 |
+
underlying parser library (e.g., the parser library will try to use
|
| 1123 |
+
the encoding provided by the document).
|
| 1124 |
+
|
| 1125 |
+
decimal : str, default '.'
|
| 1126 |
+
Character to recognize as decimal point (e.g. use ',' for European
|
| 1127 |
+
data).
|
| 1128 |
+
|
| 1129 |
+
converters : dict, default None
|
| 1130 |
+
Dict of functions for converting values in certain columns. Keys can
|
| 1131 |
+
either be integers or column labels, values are functions that take one
|
| 1132 |
+
input argument, the cell (not column) content, and return the
|
| 1133 |
+
transformed content.
|
| 1134 |
+
|
| 1135 |
+
na_values : iterable, default None
|
| 1136 |
+
Custom NA values.
|
| 1137 |
+
|
| 1138 |
+
keep_default_na : bool, default True
|
| 1139 |
+
If na_values are specified and keep_default_na is False the default NaN
|
| 1140 |
+
values are overridden, otherwise they're appended to.
|
| 1141 |
+
|
| 1142 |
+
displayed_only : bool, default True
|
| 1143 |
+
Whether elements with "display: none" should be parsed.
|
| 1144 |
+
|
| 1145 |
+
extract_links : {{None, "all", "header", "body", "footer"}}
|
| 1146 |
+
Table elements in the specified section(s) with <a> tags will have their
|
| 1147 |
+
href extracted.
|
| 1148 |
+
|
| 1149 |
+
.. versionadded:: 1.5.0
|
| 1150 |
+
|
| 1151 |
+
dtype_backend : {{'numpy_nullable', 'pyarrow'}}, default 'numpy_nullable'
|
| 1152 |
+
Back-end data type applied to the resultant :class:`DataFrame`
|
| 1153 |
+
(still experimental). Behaviour is as follows:
|
| 1154 |
+
|
| 1155 |
+
* ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
|
| 1156 |
+
(default).
|
| 1157 |
+
* ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`
|
| 1158 |
+
DataFrame.
|
| 1159 |
+
|
| 1160 |
+
.. versionadded:: 2.0
|
| 1161 |
+
|
| 1162 |
+
{storage_options}
|
| 1163 |
+
|
| 1164 |
+
.. versionadded:: 2.1.0
|
| 1165 |
+
|
| 1166 |
+
Returns
|
| 1167 |
+
-------
|
| 1168 |
+
dfs
|
| 1169 |
+
A list of DataFrames.
|
| 1170 |
+
|
| 1171 |
+
See Also
|
| 1172 |
+
--------
|
| 1173 |
+
read_csv : Read a comma-separated values (csv) file into DataFrame.
|
| 1174 |
+
|
| 1175 |
+
Notes
|
| 1176 |
+
-----
|
| 1177 |
+
Before using this function you should read the :ref:`gotchas about the
|
| 1178 |
+
HTML parsing libraries <io.html.gotchas>`.
|
| 1179 |
+
|
| 1180 |
+
Expect to do some cleanup after you call this function. For example, you
|
| 1181 |
+
might need to manually assign column names if the column names are
|
| 1182 |
+
converted to NaN when you pass the `header=0` argument. We try to assume as
|
| 1183 |
+
little as possible about the structure of the table and push the
|
| 1184 |
+
idiosyncrasies of the HTML contained in the table to the user.
|
| 1185 |
+
|
| 1186 |
+
This function searches for ``<table>`` elements and only for ``<tr>``
|
| 1187 |
+
and ``<th>`` rows and ``<td>`` elements within each ``<tr>`` or ``<th>``
|
| 1188 |
+
element in the table. ``<td>`` stands for "table data". This function
|
| 1189 |
+
attempts to properly handle ``colspan`` and ``rowspan`` attributes.
|
| 1190 |
+
If the function has a ``<thead>`` argument, it is used to construct
|
| 1191 |
+
the header, otherwise the function attempts to find the header within
|
| 1192 |
+
the body (by putting rows with only ``<th>`` elements into the header).
|
| 1193 |
+
|
| 1194 |
+
Similar to :func:`~read_csv` the `header` argument is applied
|
| 1195 |
+
**after** `skiprows` is applied.
|
| 1196 |
+
|
| 1197 |
+
This function will *always* return a list of :class:`DataFrame` *or*
|
| 1198 |
+
it will fail, e.g., it will *not* return an empty list.
|
| 1199 |
+
|
| 1200 |
+
Examples
|
| 1201 |
+
--------
|
| 1202 |
+
See the :ref:`read_html documentation in the IO section of the docs
|
| 1203 |
+
<io.read_html>` for some examples of reading in HTML tables.
|
| 1204 |
+
"""
|
| 1205 |
+
# Type check here. We don't want to parse only to fail because of an
|
| 1206 |
+
# invalid value of an integer skiprows.
|
| 1207 |
+
if isinstance(skiprows, numbers.Integral) and skiprows < 0:
|
| 1208 |
+
raise ValueError(
|
| 1209 |
+
"cannot skip rows starting from the end of the "
|
| 1210 |
+
"data (you passed a negative value)"
|
| 1211 |
+
)
|
| 1212 |
+
if extract_links not in [None, "header", "footer", "body", "all"]:
|
| 1213 |
+
raise ValueError(
|
| 1214 |
+
"`extract_links` must be one of "
|
| 1215 |
+
'{None, "header", "footer", "body", "all"}, got '
|
| 1216 |
+
f'"{extract_links}"'
|
| 1217 |
+
)
|
| 1218 |
+
|
| 1219 |
+
validate_header_arg(header)
|
| 1220 |
+
check_dtype_backend(dtype_backend)
|
| 1221 |
+
|
| 1222 |
+
io = stringify_path(io)
|
| 1223 |
+
|
| 1224 |
+
if isinstance(io, str) and not any(
|
| 1225 |
+
[
|
| 1226 |
+
is_file_like(io),
|
| 1227 |
+
file_exists(io),
|
| 1228 |
+
is_url(io),
|
| 1229 |
+
is_fsspec_url(io),
|
| 1230 |
+
]
|
| 1231 |
+
):
|
| 1232 |
+
warnings.warn(
|
| 1233 |
+
"Passing literal html to 'read_html' is deprecated and "
|
| 1234 |
+
"will be removed in a future version. To read from a "
|
| 1235 |
+
"literal string, wrap it in a 'StringIO' object.",
|
| 1236 |
+
FutureWarning,
|
| 1237 |
+
stacklevel=find_stack_level(),
|
| 1238 |
+
)
|
| 1239 |
+
|
| 1240 |
+
return _parse(
|
| 1241 |
+
flavor=flavor,
|
| 1242 |
+
io=io,
|
| 1243 |
+
match=match,
|
| 1244 |
+
header=header,
|
| 1245 |
+
index_col=index_col,
|
| 1246 |
+
skiprows=skiprows,
|
| 1247 |
+
parse_dates=parse_dates,
|
| 1248 |
+
thousands=thousands,
|
| 1249 |
+
attrs=attrs,
|
| 1250 |
+
encoding=encoding,
|
| 1251 |
+
decimal=decimal,
|
| 1252 |
+
converters=converters,
|
| 1253 |
+
na_values=na_values,
|
| 1254 |
+
keep_default_na=keep_default_na,
|
| 1255 |
+
displayed_only=displayed_only,
|
| 1256 |
+
extract_links=extract_links,
|
| 1257 |
+
dtype_backend=dtype_backend,
|
| 1258 |
+
storage_options=storage_options,
|
| 1259 |
+
)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/io/pickle.py
ADDED
|
@@ -0,0 +1,210 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
""" pickle compat """
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
import pickle
|
| 5 |
+
from typing import (
|
| 6 |
+
TYPE_CHECKING,
|
| 7 |
+
Any,
|
| 8 |
+
)
|
| 9 |
+
import warnings
|
| 10 |
+
|
| 11 |
+
from pandas.compat import pickle_compat as pc
|
| 12 |
+
from pandas.util._decorators import doc
|
| 13 |
+
|
| 14 |
+
from pandas.core.shared_docs import _shared_docs
|
| 15 |
+
|
| 16 |
+
from pandas.io.common import get_handle
|
| 17 |
+
|
| 18 |
+
if TYPE_CHECKING:
|
| 19 |
+
from pandas._typing import (
|
| 20 |
+
CompressionOptions,
|
| 21 |
+
FilePath,
|
| 22 |
+
ReadPickleBuffer,
|
| 23 |
+
StorageOptions,
|
| 24 |
+
WriteBuffer,
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
from pandas import (
|
| 28 |
+
DataFrame,
|
| 29 |
+
Series,
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
@doc(
|
| 34 |
+
storage_options=_shared_docs["storage_options"],
|
| 35 |
+
compression_options=_shared_docs["compression_options"] % "filepath_or_buffer",
|
| 36 |
+
)
|
| 37 |
+
def to_pickle(
|
| 38 |
+
obj: Any,
|
| 39 |
+
filepath_or_buffer: FilePath | WriteBuffer[bytes],
|
| 40 |
+
compression: CompressionOptions = "infer",
|
| 41 |
+
protocol: int = pickle.HIGHEST_PROTOCOL,
|
| 42 |
+
storage_options: StorageOptions | None = None,
|
| 43 |
+
) -> None:
|
| 44 |
+
"""
|
| 45 |
+
Pickle (serialize) object to file.
|
| 46 |
+
|
| 47 |
+
Parameters
|
| 48 |
+
----------
|
| 49 |
+
obj : any object
|
| 50 |
+
Any python object.
|
| 51 |
+
filepath_or_buffer : str, path object, or file-like object
|
| 52 |
+
String, path object (implementing ``os.PathLike[str]``), or file-like
|
| 53 |
+
object implementing a binary ``write()`` function.
|
| 54 |
+
Also accepts URL. URL has to be of S3 or GCS.
|
| 55 |
+
{compression_options}
|
| 56 |
+
|
| 57 |
+
.. versionchanged:: 1.4.0 Zstandard support.
|
| 58 |
+
|
| 59 |
+
protocol : int
|
| 60 |
+
Int which indicates which protocol should be used by the pickler,
|
| 61 |
+
default HIGHEST_PROTOCOL (see [1], paragraph 12.1.2). The possible
|
| 62 |
+
values for this parameter depend on the version of Python. For Python
|
| 63 |
+
2.x, possible values are 0, 1, 2. For Python>=3.0, 3 is a valid value.
|
| 64 |
+
For Python >= 3.4, 4 is a valid value. A negative value for the
|
| 65 |
+
protocol parameter is equivalent to setting its value to
|
| 66 |
+
HIGHEST_PROTOCOL.
|
| 67 |
+
|
| 68 |
+
{storage_options}
|
| 69 |
+
|
| 70 |
+
.. [1] https://docs.python.org/3/library/pickle.html
|
| 71 |
+
|
| 72 |
+
See Also
|
| 73 |
+
--------
|
| 74 |
+
read_pickle : Load pickled pandas object (or any object) from file.
|
| 75 |
+
DataFrame.to_hdf : Write DataFrame to an HDF5 file.
|
| 76 |
+
DataFrame.to_sql : Write DataFrame to a SQL database.
|
| 77 |
+
DataFrame.to_parquet : Write a DataFrame to the binary parquet format.
|
| 78 |
+
|
| 79 |
+
Examples
|
| 80 |
+
--------
|
| 81 |
+
>>> original_df = pd.DataFrame({{"foo": range(5), "bar": range(5, 10)}}) # doctest: +SKIP
|
| 82 |
+
>>> original_df # doctest: +SKIP
|
| 83 |
+
foo bar
|
| 84 |
+
0 0 5
|
| 85 |
+
1 1 6
|
| 86 |
+
2 2 7
|
| 87 |
+
3 3 8
|
| 88 |
+
4 4 9
|
| 89 |
+
>>> pd.to_pickle(original_df, "./dummy.pkl") # doctest: +SKIP
|
| 90 |
+
|
| 91 |
+
>>> unpickled_df = pd.read_pickle("./dummy.pkl") # doctest: +SKIP
|
| 92 |
+
>>> unpickled_df # doctest: +SKIP
|
| 93 |
+
foo bar
|
| 94 |
+
0 0 5
|
| 95 |
+
1 1 6
|
| 96 |
+
2 2 7
|
| 97 |
+
3 3 8
|
| 98 |
+
4 4 9
|
| 99 |
+
""" # noqa: E501
|
| 100 |
+
if protocol < 0:
|
| 101 |
+
protocol = pickle.HIGHEST_PROTOCOL
|
| 102 |
+
|
| 103 |
+
with get_handle(
|
| 104 |
+
filepath_or_buffer,
|
| 105 |
+
"wb",
|
| 106 |
+
compression=compression,
|
| 107 |
+
is_text=False,
|
| 108 |
+
storage_options=storage_options,
|
| 109 |
+
) as handles:
|
| 110 |
+
# letting pickle write directly to the buffer is more memory-efficient
|
| 111 |
+
pickle.dump(obj, handles.handle, protocol=protocol)
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
@doc(
|
| 115 |
+
storage_options=_shared_docs["storage_options"],
|
| 116 |
+
decompression_options=_shared_docs["decompression_options"] % "filepath_or_buffer",
|
| 117 |
+
)
|
| 118 |
+
def read_pickle(
|
| 119 |
+
filepath_or_buffer: FilePath | ReadPickleBuffer,
|
| 120 |
+
compression: CompressionOptions = "infer",
|
| 121 |
+
storage_options: StorageOptions | None = None,
|
| 122 |
+
) -> DataFrame | Series:
|
| 123 |
+
"""
|
| 124 |
+
Load pickled pandas object (or any object) from file.
|
| 125 |
+
|
| 126 |
+
.. warning::
|
| 127 |
+
|
| 128 |
+
Loading pickled data received from untrusted sources can be
|
| 129 |
+
unsafe. See `here <https://docs.python.org/3/library/pickle.html>`__.
|
| 130 |
+
|
| 131 |
+
Parameters
|
| 132 |
+
----------
|
| 133 |
+
filepath_or_buffer : str, path object, or file-like object
|
| 134 |
+
String, path object (implementing ``os.PathLike[str]``), or file-like
|
| 135 |
+
object implementing a binary ``readlines()`` function.
|
| 136 |
+
Also accepts URL. URL is not limited to S3 and GCS.
|
| 137 |
+
|
| 138 |
+
{decompression_options}
|
| 139 |
+
|
| 140 |
+
.. versionchanged:: 1.4.0 Zstandard support.
|
| 141 |
+
|
| 142 |
+
{storage_options}
|
| 143 |
+
|
| 144 |
+
Returns
|
| 145 |
+
-------
|
| 146 |
+
same type as object stored in file
|
| 147 |
+
|
| 148 |
+
See Also
|
| 149 |
+
--------
|
| 150 |
+
DataFrame.to_pickle : Pickle (serialize) DataFrame object to file.
|
| 151 |
+
Series.to_pickle : Pickle (serialize) Series object to file.
|
| 152 |
+
read_hdf : Read HDF5 file into a DataFrame.
|
| 153 |
+
read_sql : Read SQL query or database table into a DataFrame.
|
| 154 |
+
read_parquet : Load a parquet object, returning a DataFrame.
|
| 155 |
+
|
| 156 |
+
Notes
|
| 157 |
+
-----
|
| 158 |
+
read_pickle is only guaranteed to be backwards compatible to pandas 0.20.3
|
| 159 |
+
provided the object was serialized with to_pickle.
|
| 160 |
+
|
| 161 |
+
Examples
|
| 162 |
+
--------
|
| 163 |
+
>>> original_df = pd.DataFrame(
|
| 164 |
+
... {{"foo": range(5), "bar": range(5, 10)}}
|
| 165 |
+
... ) # doctest: +SKIP
|
| 166 |
+
>>> original_df # doctest: +SKIP
|
| 167 |
+
foo bar
|
| 168 |
+
0 0 5
|
| 169 |
+
1 1 6
|
| 170 |
+
2 2 7
|
| 171 |
+
3 3 8
|
| 172 |
+
4 4 9
|
| 173 |
+
>>> pd.to_pickle(original_df, "./dummy.pkl") # doctest: +SKIP
|
| 174 |
+
|
| 175 |
+
>>> unpickled_df = pd.read_pickle("./dummy.pkl") # doctest: +SKIP
|
| 176 |
+
>>> unpickled_df # doctest: +SKIP
|
| 177 |
+
foo bar
|
| 178 |
+
0 0 5
|
| 179 |
+
1 1 6
|
| 180 |
+
2 2 7
|
| 181 |
+
3 3 8
|
| 182 |
+
4 4 9
|
| 183 |
+
"""
|
| 184 |
+
excs_to_catch = (AttributeError, ImportError, ModuleNotFoundError, TypeError)
|
| 185 |
+
with get_handle(
|
| 186 |
+
filepath_or_buffer,
|
| 187 |
+
"rb",
|
| 188 |
+
compression=compression,
|
| 189 |
+
is_text=False,
|
| 190 |
+
storage_options=storage_options,
|
| 191 |
+
) as handles:
|
| 192 |
+
# 1) try standard library Pickle
|
| 193 |
+
# 2) try pickle_compat (older pandas version) to handle subclass changes
|
| 194 |
+
# 3) try pickle_compat with latin-1 encoding upon a UnicodeDecodeError
|
| 195 |
+
|
| 196 |
+
try:
|
| 197 |
+
# TypeError for Cython complaints about object.__new__ vs Tick.__new__
|
| 198 |
+
try:
|
| 199 |
+
with warnings.catch_warnings(record=True):
|
| 200 |
+
# We want to silence any warnings about, e.g. moved modules.
|
| 201 |
+
warnings.simplefilter("ignore", Warning)
|
| 202 |
+
return pickle.load(handles.handle)
|
| 203 |
+
except excs_to_catch:
|
| 204 |
+
# e.g.
|
| 205 |
+
# "No module named 'pandas.core.sparse.series'"
|
| 206 |
+
# "Can't get attribute '__nat_unpickle' on <module 'pandas._libs.tslib"
|
| 207 |
+
return pc.load(handles.handle, encoding=None)
|
| 208 |
+
except UnicodeDecodeError:
|
| 209 |
+
# e.g. can occur for files written in py27; see GH#28645 and GH#31988
|
| 210 |
+
return pc.load(handles.handle, encoding="latin-1")
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/io/pytables.py
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/io/sql.py
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (218 Bytes). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_aggregation.cpython-312.pyc
ADDED
|
Binary file (4.43 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_common.cpython-312.pyc
ADDED
|
Binary file (14.8 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_downstream.cpython-312.pyc
ADDED
|
Binary file (16 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_errors.cpython-312.pyc
ADDED
|
Binary file (4.26 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_expressions.cpython-312.pyc
ADDED
|
Binary file (23.5 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_flags.cpython-312.pyc
ADDED
|
Binary file (3.19 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_multilevel.cpython-312.pyc
ADDED
|
Binary file (19.3 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_nanops.cpython-312.pyc
ADDED
|
Binary file (66.6 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_optional_dependency.cpython-312.pyc
ADDED
|
Binary file (4.64 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_register_accessor.cpython-312.pyc
ADDED
|
Binary file (6.88 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_sorting.cpython-312.pyc
ADDED
|
Binary file (27.7 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_take.cpython-312.pyc
ADDED
|
Binary file (19.9 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/arrays/__init__.py
ADDED
|
File without changes
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/arrays/masked_shared.py
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Tests shared by MaskedArray subclasses.
|
| 3 |
+
"""
|
| 4 |
+
import numpy as np
|
| 5 |
+
import pytest
|
| 6 |
+
|
| 7 |
+
import pandas as pd
|
| 8 |
+
import pandas._testing as tm
|
| 9 |
+
from pandas.tests.extension.base import BaseOpsUtil
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class ComparisonOps(BaseOpsUtil):
|
| 13 |
+
def _compare_other(self, data, op, other):
|
| 14 |
+
# array
|
| 15 |
+
result = pd.Series(op(data, other))
|
| 16 |
+
expected = pd.Series(op(data._data, other), dtype="boolean")
|
| 17 |
+
|
| 18 |
+
# fill the nan locations
|
| 19 |
+
expected[data._mask] = pd.NA
|
| 20 |
+
|
| 21 |
+
tm.assert_series_equal(result, expected)
|
| 22 |
+
|
| 23 |
+
# series
|
| 24 |
+
ser = pd.Series(data)
|
| 25 |
+
result = op(ser, other)
|
| 26 |
+
|
| 27 |
+
# Set nullable dtype here to avoid upcasting when setting to pd.NA below
|
| 28 |
+
expected = op(pd.Series(data._data), other).astype("boolean")
|
| 29 |
+
|
| 30 |
+
# fill the nan locations
|
| 31 |
+
expected[data._mask] = pd.NA
|
| 32 |
+
|
| 33 |
+
tm.assert_series_equal(result, expected)
|
| 34 |
+
|
| 35 |
+
# subclass will override to parametrize 'other'
|
| 36 |
+
def test_scalar(self, other, comparison_op, dtype):
|
| 37 |
+
op = comparison_op
|
| 38 |
+
left = pd.array([1, 0, None], dtype=dtype)
|
| 39 |
+
|
| 40 |
+
result = op(left, other)
|
| 41 |
+
|
| 42 |
+
if other is pd.NA:
|
| 43 |
+
expected = pd.array([None, None, None], dtype="boolean")
|
| 44 |
+
else:
|
| 45 |
+
values = op(left._data, other)
|
| 46 |
+
expected = pd.arrays.BooleanArray(values, left._mask, copy=True)
|
| 47 |
+
tm.assert_extension_array_equal(result, expected)
|
| 48 |
+
|
| 49 |
+
# ensure we haven't mutated anything inplace
|
| 50 |
+
result[0] = pd.NA
|
| 51 |
+
tm.assert_extension_array_equal(left, pd.array([1, 0, None], dtype=dtype))
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
class NumericOps:
|
| 55 |
+
# Shared by IntegerArray and FloatingArray, not BooleanArray
|
| 56 |
+
|
| 57 |
+
def test_searchsorted_nan(self, dtype):
|
| 58 |
+
# The base class casts to object dtype, for which searchsorted returns
|
| 59 |
+
# 0 from the left and 10 from the right.
|
| 60 |
+
arr = pd.array(range(10), dtype=dtype)
|
| 61 |
+
|
| 62 |
+
assert arr.searchsorted(np.nan, side="left") == 10
|
| 63 |
+
assert arr.searchsorted(np.nan, side="right") == 10
|
| 64 |
+
|
| 65 |
+
def test_no_shared_mask(self, data):
|
| 66 |
+
result = data + 1
|
| 67 |
+
assert not tm.shares_memory(result, data)
|
| 68 |
+
|
| 69 |
+
def test_array(self, comparison_op, dtype):
|
| 70 |
+
op = comparison_op
|
| 71 |
+
|
| 72 |
+
left = pd.array([0, 1, 2, None, None, None], dtype=dtype)
|
| 73 |
+
right = pd.array([0, 1, None, 0, 1, None], dtype=dtype)
|
| 74 |
+
|
| 75 |
+
result = op(left, right)
|
| 76 |
+
values = op(left._data, right._data)
|
| 77 |
+
mask = left._mask | right._mask
|
| 78 |
+
|
| 79 |
+
expected = pd.arrays.BooleanArray(values, mask)
|
| 80 |
+
tm.assert_extension_array_equal(result, expected)
|
| 81 |
+
|
| 82 |
+
# ensure we haven't mutated anything inplace
|
| 83 |
+
result[0] = pd.NA
|
| 84 |
+
tm.assert_extension_array_equal(
|
| 85 |
+
left, pd.array([0, 1, 2, None, None, None], dtype=dtype)
|
| 86 |
+
)
|
| 87 |
+
tm.assert_extension_array_equal(
|
| 88 |
+
right, pd.array([0, 1, None, 0, 1, None], dtype=dtype)
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
def test_compare_with_booleanarray(self, comparison_op, dtype):
|
| 92 |
+
op = comparison_op
|
| 93 |
+
|
| 94 |
+
left = pd.array([True, False, None] * 3, dtype="boolean")
|
| 95 |
+
right = pd.array([0] * 3 + [1] * 3 + [None] * 3, dtype=dtype)
|
| 96 |
+
other = pd.array([False] * 3 + [True] * 3 + [None] * 3, dtype="boolean")
|
| 97 |
+
|
| 98 |
+
expected = op(left, other)
|
| 99 |
+
result = op(left, right)
|
| 100 |
+
tm.assert_extension_array_equal(result, expected)
|
| 101 |
+
|
| 102 |
+
# reversed op
|
| 103 |
+
expected = op(other, left)
|
| 104 |
+
result = op(right, left)
|
| 105 |
+
tm.assert_extension_array_equal(result, expected)
|
| 106 |
+
|
| 107 |
+
def test_compare_to_string(self, dtype):
|
| 108 |
+
# GH#28930
|
| 109 |
+
ser = pd.Series([1, None], dtype=dtype)
|
| 110 |
+
result = ser == "a"
|
| 111 |
+
expected = pd.Series([False, pd.NA], dtype="boolean")
|
| 112 |
+
|
| 113 |
+
tm.assert_series_equal(result, expected)
|
| 114 |
+
|
| 115 |
+
def test_ufunc_with_out(self, dtype):
|
| 116 |
+
arr = pd.array([1, 2, 3], dtype=dtype)
|
| 117 |
+
arr2 = pd.array([1, 2, pd.NA], dtype=dtype)
|
| 118 |
+
|
| 119 |
+
mask = arr == arr
|
| 120 |
+
mask2 = arr2 == arr2
|
| 121 |
+
|
| 122 |
+
result = np.zeros(3, dtype=bool)
|
| 123 |
+
result |= mask
|
| 124 |
+
# If MaskedArray.__array_ufunc__ handled "out" appropriately,
|
| 125 |
+
# `result` should still be an ndarray.
|
| 126 |
+
assert isinstance(result, np.ndarray)
|
| 127 |
+
assert result.all()
|
| 128 |
+
|
| 129 |
+
# result |= mask worked because mask could be cast losslessly to
|
| 130 |
+
# boolean ndarray. mask2 can't, so this raises
|
| 131 |
+
result = np.zeros(3, dtype=bool)
|
| 132 |
+
msg = "Specify an appropriate 'na_value' for this dtype"
|
| 133 |
+
with pytest.raises(ValueError, match=msg):
|
| 134 |
+
result |= mask2
|
| 135 |
+
|
| 136 |
+
# addition
|
| 137 |
+
res = np.add(arr, arr2)
|
| 138 |
+
expected = pd.array([2, 4, pd.NA], dtype=dtype)
|
| 139 |
+
tm.assert_extension_array_equal(res, expected)
|
| 140 |
+
|
| 141 |
+
# when passing out=arr, we will modify 'arr' inplace.
|
| 142 |
+
res = np.add(arr, arr2, out=arr)
|
| 143 |
+
assert res is arr
|
| 144 |
+
tm.assert_extension_array_equal(res, expected)
|
| 145 |
+
tm.assert_extension_array_equal(arr, expected)
|
| 146 |
+
|
| 147 |
+
def test_mul_td64_array(self, dtype):
|
| 148 |
+
# GH#45622
|
| 149 |
+
arr = pd.array([1, 2, pd.NA], dtype=dtype)
|
| 150 |
+
other = np.arange(3, dtype=np.int64).view("m8[ns]")
|
| 151 |
+
|
| 152 |
+
result = arr * other
|
| 153 |
+
expected = pd.array([pd.Timedelta(0), pd.Timedelta(2), pd.NaT])
|
| 154 |
+
tm.assert_extension_array_equal(result, expected)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/arrays/test_array.py
ADDED
|
@@ -0,0 +1,478 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import datetime
|
| 2 |
+
import decimal
|
| 3 |
+
import re
|
| 4 |
+
|
| 5 |
+
import numpy as np
|
| 6 |
+
import pytest
|
| 7 |
+
import pytz
|
| 8 |
+
|
| 9 |
+
import pandas as pd
|
| 10 |
+
import pandas._testing as tm
|
| 11 |
+
from pandas.api.extensions import register_extension_dtype
|
| 12 |
+
from pandas.arrays import (
|
| 13 |
+
BooleanArray,
|
| 14 |
+
DatetimeArray,
|
| 15 |
+
FloatingArray,
|
| 16 |
+
IntegerArray,
|
| 17 |
+
IntervalArray,
|
| 18 |
+
SparseArray,
|
| 19 |
+
TimedeltaArray,
|
| 20 |
+
)
|
| 21 |
+
from pandas.core.arrays import (
|
| 22 |
+
NumpyExtensionArray,
|
| 23 |
+
period_array,
|
| 24 |
+
)
|
| 25 |
+
from pandas.tests.extension.decimal import (
|
| 26 |
+
DecimalArray,
|
| 27 |
+
DecimalDtype,
|
| 28 |
+
to_decimal,
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
@pytest.mark.parametrize("dtype_unit", ["M8[h]", "M8[m]", "m8[h]", "M8[m]"])
|
| 33 |
+
def test_dt64_array(dtype_unit):
|
| 34 |
+
# PR 53817
|
| 35 |
+
dtype_var = np.dtype(dtype_unit)
|
| 36 |
+
msg = (
|
| 37 |
+
r"datetime64 and timedelta64 dtype resolutions other than "
|
| 38 |
+
r"'s', 'ms', 'us', and 'ns' are deprecated. "
|
| 39 |
+
r"In future releases passing unsupported resolutions will "
|
| 40 |
+
r"raise an exception."
|
| 41 |
+
)
|
| 42 |
+
with tm.assert_produces_warning(FutureWarning, match=re.escape(msg)):
|
| 43 |
+
pd.array([], dtype=dtype_var)
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
@pytest.mark.parametrize(
|
| 47 |
+
"data, dtype, expected",
|
| 48 |
+
[
|
| 49 |
+
# Basic NumPy defaults.
|
| 50 |
+
([], None, FloatingArray._from_sequence([], dtype="Float64")),
|
| 51 |
+
([1, 2], None, IntegerArray._from_sequence([1, 2], dtype="Int64")),
|
| 52 |
+
([1, 2], object, NumpyExtensionArray(np.array([1, 2], dtype=object))),
|
| 53 |
+
(
|
| 54 |
+
[1, 2],
|
| 55 |
+
np.dtype("float32"),
|
| 56 |
+
NumpyExtensionArray(np.array([1.0, 2.0], dtype=np.dtype("float32"))),
|
| 57 |
+
),
|
| 58 |
+
(
|
| 59 |
+
np.array([], dtype=object),
|
| 60 |
+
None,
|
| 61 |
+
NumpyExtensionArray(np.array([], dtype=object)),
|
| 62 |
+
),
|
| 63 |
+
(
|
| 64 |
+
np.array([1, 2], dtype="int64"),
|
| 65 |
+
None,
|
| 66 |
+
IntegerArray._from_sequence([1, 2], dtype="Int64"),
|
| 67 |
+
),
|
| 68 |
+
(
|
| 69 |
+
np.array([1.0, 2.0], dtype="float64"),
|
| 70 |
+
None,
|
| 71 |
+
FloatingArray._from_sequence([1.0, 2.0], dtype="Float64"),
|
| 72 |
+
),
|
| 73 |
+
# String alias passes through to NumPy
|
| 74 |
+
([1, 2], "float32", NumpyExtensionArray(np.array([1, 2], dtype="float32"))),
|
| 75 |
+
([1, 2], "int64", NumpyExtensionArray(np.array([1, 2], dtype=np.int64))),
|
| 76 |
+
# GH#44715 FloatingArray does not support float16, so fall
|
| 77 |
+
# back to NumpyExtensionArray
|
| 78 |
+
(
|
| 79 |
+
np.array([1, 2], dtype=np.float16),
|
| 80 |
+
None,
|
| 81 |
+
NumpyExtensionArray(np.array([1, 2], dtype=np.float16)),
|
| 82 |
+
),
|
| 83 |
+
# idempotency with e.g. pd.array(pd.array([1, 2], dtype="int64"))
|
| 84 |
+
(
|
| 85 |
+
NumpyExtensionArray(np.array([1, 2], dtype=np.int32)),
|
| 86 |
+
None,
|
| 87 |
+
NumpyExtensionArray(np.array([1, 2], dtype=np.int32)),
|
| 88 |
+
),
|
| 89 |
+
# Period alias
|
| 90 |
+
(
|
| 91 |
+
[pd.Period("2000", "D"), pd.Period("2001", "D")],
|
| 92 |
+
"Period[D]",
|
| 93 |
+
period_array(["2000", "2001"], freq="D"),
|
| 94 |
+
),
|
| 95 |
+
# Period dtype
|
| 96 |
+
(
|
| 97 |
+
[pd.Period("2000", "D")],
|
| 98 |
+
pd.PeriodDtype("D"),
|
| 99 |
+
period_array(["2000"], freq="D"),
|
| 100 |
+
),
|
| 101 |
+
# Datetime (naive)
|
| 102 |
+
(
|
| 103 |
+
[1, 2],
|
| 104 |
+
np.dtype("datetime64[ns]"),
|
| 105 |
+
DatetimeArray._from_sequence(
|
| 106 |
+
np.array([1, 2], dtype="M8[ns]"), dtype="M8[ns]"
|
| 107 |
+
),
|
| 108 |
+
),
|
| 109 |
+
(
|
| 110 |
+
[1, 2],
|
| 111 |
+
np.dtype("datetime64[s]"),
|
| 112 |
+
DatetimeArray._from_sequence(
|
| 113 |
+
np.array([1, 2], dtype="M8[s]"), dtype="M8[s]"
|
| 114 |
+
),
|
| 115 |
+
),
|
| 116 |
+
(
|
| 117 |
+
np.array([1, 2], dtype="datetime64[ns]"),
|
| 118 |
+
None,
|
| 119 |
+
DatetimeArray._from_sequence(
|
| 120 |
+
np.array([1, 2], dtype="M8[ns]"), dtype="M8[ns]"
|
| 121 |
+
),
|
| 122 |
+
),
|
| 123 |
+
(
|
| 124 |
+
pd.DatetimeIndex(["2000", "2001"]),
|
| 125 |
+
np.dtype("datetime64[ns]"),
|
| 126 |
+
DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[ns]"),
|
| 127 |
+
),
|
| 128 |
+
(
|
| 129 |
+
pd.DatetimeIndex(["2000", "2001"]),
|
| 130 |
+
None,
|
| 131 |
+
DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[ns]"),
|
| 132 |
+
),
|
| 133 |
+
(
|
| 134 |
+
["2000", "2001"],
|
| 135 |
+
np.dtype("datetime64[ns]"),
|
| 136 |
+
DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[ns]"),
|
| 137 |
+
),
|
| 138 |
+
# Datetime (tz-aware)
|
| 139 |
+
(
|
| 140 |
+
["2000", "2001"],
|
| 141 |
+
pd.DatetimeTZDtype(tz="CET"),
|
| 142 |
+
DatetimeArray._from_sequence(
|
| 143 |
+
["2000", "2001"], dtype=pd.DatetimeTZDtype(tz="CET")
|
| 144 |
+
),
|
| 145 |
+
),
|
| 146 |
+
# Timedelta
|
| 147 |
+
(
|
| 148 |
+
["1h", "2h"],
|
| 149 |
+
np.dtype("timedelta64[ns]"),
|
| 150 |
+
TimedeltaArray._from_sequence(["1h", "2h"], dtype="m8[ns]"),
|
| 151 |
+
),
|
| 152 |
+
(
|
| 153 |
+
pd.TimedeltaIndex(["1h", "2h"]),
|
| 154 |
+
np.dtype("timedelta64[ns]"),
|
| 155 |
+
TimedeltaArray._from_sequence(["1h", "2h"], dtype="m8[ns]"),
|
| 156 |
+
),
|
| 157 |
+
(
|
| 158 |
+
np.array([1, 2], dtype="m8[s]"),
|
| 159 |
+
np.dtype("timedelta64[s]"),
|
| 160 |
+
TimedeltaArray._from_sequence(
|
| 161 |
+
np.array([1, 2], dtype="m8[s]"), dtype="m8[s]"
|
| 162 |
+
),
|
| 163 |
+
),
|
| 164 |
+
(
|
| 165 |
+
pd.TimedeltaIndex(["1h", "2h"]),
|
| 166 |
+
None,
|
| 167 |
+
TimedeltaArray._from_sequence(["1h", "2h"], dtype="m8[ns]"),
|
| 168 |
+
),
|
| 169 |
+
(
|
| 170 |
+
# preserve non-nano, i.e. don't cast to NumpyExtensionArray
|
| 171 |
+
TimedeltaArray._simple_new(
|
| 172 |
+
np.arange(5, dtype=np.int64).view("m8[s]"), dtype=np.dtype("m8[s]")
|
| 173 |
+
),
|
| 174 |
+
None,
|
| 175 |
+
TimedeltaArray._simple_new(
|
| 176 |
+
np.arange(5, dtype=np.int64).view("m8[s]"), dtype=np.dtype("m8[s]")
|
| 177 |
+
),
|
| 178 |
+
),
|
| 179 |
+
(
|
| 180 |
+
# preserve non-nano, i.e. don't cast to NumpyExtensionArray
|
| 181 |
+
TimedeltaArray._simple_new(
|
| 182 |
+
np.arange(5, dtype=np.int64).view("m8[s]"), dtype=np.dtype("m8[s]")
|
| 183 |
+
),
|
| 184 |
+
np.dtype("m8[s]"),
|
| 185 |
+
TimedeltaArray._simple_new(
|
| 186 |
+
np.arange(5, dtype=np.int64).view("m8[s]"), dtype=np.dtype("m8[s]")
|
| 187 |
+
),
|
| 188 |
+
),
|
| 189 |
+
# Category
|
| 190 |
+
(["a", "b"], "category", pd.Categorical(["a", "b"])),
|
| 191 |
+
(
|
| 192 |
+
["a", "b"],
|
| 193 |
+
pd.CategoricalDtype(None, ordered=True),
|
| 194 |
+
pd.Categorical(["a", "b"], ordered=True),
|
| 195 |
+
),
|
| 196 |
+
# Interval
|
| 197 |
+
(
|
| 198 |
+
[pd.Interval(1, 2), pd.Interval(3, 4)],
|
| 199 |
+
"interval",
|
| 200 |
+
IntervalArray.from_tuples([(1, 2), (3, 4)]),
|
| 201 |
+
),
|
| 202 |
+
# Sparse
|
| 203 |
+
([0, 1], "Sparse[int64]", SparseArray([0, 1], dtype="int64")),
|
| 204 |
+
# IntegerNA
|
| 205 |
+
([1, None], "Int16", pd.array([1, None], dtype="Int16")),
|
| 206 |
+
(
|
| 207 |
+
pd.Series([1, 2]),
|
| 208 |
+
None,
|
| 209 |
+
NumpyExtensionArray(np.array([1, 2], dtype=np.int64)),
|
| 210 |
+
),
|
| 211 |
+
# String
|
| 212 |
+
(
|
| 213 |
+
["a", None],
|
| 214 |
+
"string",
|
| 215 |
+
pd.StringDtype()
|
| 216 |
+
.construct_array_type()
|
| 217 |
+
._from_sequence(["a", None], dtype=pd.StringDtype()),
|
| 218 |
+
),
|
| 219 |
+
(
|
| 220 |
+
["a", None],
|
| 221 |
+
pd.StringDtype(),
|
| 222 |
+
pd.StringDtype()
|
| 223 |
+
.construct_array_type()
|
| 224 |
+
._from_sequence(["a", None], dtype=pd.StringDtype()),
|
| 225 |
+
),
|
| 226 |
+
# Boolean
|
| 227 |
+
(
|
| 228 |
+
[True, None],
|
| 229 |
+
"boolean",
|
| 230 |
+
BooleanArray._from_sequence([True, None], dtype="boolean"),
|
| 231 |
+
),
|
| 232 |
+
(
|
| 233 |
+
[True, None],
|
| 234 |
+
pd.BooleanDtype(),
|
| 235 |
+
BooleanArray._from_sequence([True, None], dtype="boolean"),
|
| 236 |
+
),
|
| 237 |
+
# Index
|
| 238 |
+
(pd.Index([1, 2]), None, NumpyExtensionArray(np.array([1, 2], dtype=np.int64))),
|
| 239 |
+
# Series[EA] returns the EA
|
| 240 |
+
(
|
| 241 |
+
pd.Series(pd.Categorical(["a", "b"], categories=["a", "b", "c"])),
|
| 242 |
+
None,
|
| 243 |
+
pd.Categorical(["a", "b"], categories=["a", "b", "c"]),
|
| 244 |
+
),
|
| 245 |
+
# "3rd party" EAs work
|
| 246 |
+
([decimal.Decimal(0), decimal.Decimal(1)], "decimal", to_decimal([0, 1])),
|
| 247 |
+
# pass an ExtensionArray, but a different dtype
|
| 248 |
+
(
|
| 249 |
+
period_array(["2000", "2001"], freq="D"),
|
| 250 |
+
"category",
|
| 251 |
+
pd.Categorical([pd.Period("2000", "D"), pd.Period("2001", "D")]),
|
| 252 |
+
),
|
| 253 |
+
],
|
| 254 |
+
)
|
| 255 |
+
def test_array(data, dtype, expected):
|
| 256 |
+
result = pd.array(data, dtype=dtype)
|
| 257 |
+
tm.assert_equal(result, expected)
|
| 258 |
+
|
| 259 |
+
|
| 260 |
+
def test_array_copy():
|
| 261 |
+
a = np.array([1, 2])
|
| 262 |
+
# default is to copy
|
| 263 |
+
b = pd.array(a, dtype=a.dtype)
|
| 264 |
+
assert not tm.shares_memory(a, b)
|
| 265 |
+
|
| 266 |
+
# copy=True
|
| 267 |
+
b = pd.array(a, dtype=a.dtype, copy=True)
|
| 268 |
+
assert not tm.shares_memory(a, b)
|
| 269 |
+
|
| 270 |
+
# copy=False
|
| 271 |
+
b = pd.array(a, dtype=a.dtype, copy=False)
|
| 272 |
+
assert tm.shares_memory(a, b)
|
| 273 |
+
|
| 274 |
+
|
| 275 |
+
cet = pytz.timezone("CET")
|
| 276 |
+
|
| 277 |
+
|
| 278 |
+
@pytest.mark.parametrize(
|
| 279 |
+
"data, expected",
|
| 280 |
+
[
|
| 281 |
+
# period
|
| 282 |
+
(
|
| 283 |
+
[pd.Period("2000", "D"), pd.Period("2001", "D")],
|
| 284 |
+
period_array(["2000", "2001"], freq="D"),
|
| 285 |
+
),
|
| 286 |
+
# interval
|
| 287 |
+
([pd.Interval(0, 1), pd.Interval(1, 2)], IntervalArray.from_breaks([0, 1, 2])),
|
| 288 |
+
# datetime
|
| 289 |
+
(
|
| 290 |
+
[pd.Timestamp("2000"), pd.Timestamp("2001")],
|
| 291 |
+
DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[ns]"),
|
| 292 |
+
),
|
| 293 |
+
(
|
| 294 |
+
[datetime.datetime(2000, 1, 1), datetime.datetime(2001, 1, 1)],
|
| 295 |
+
DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[ns]"),
|
| 296 |
+
),
|
| 297 |
+
(
|
| 298 |
+
np.array([1, 2], dtype="M8[ns]"),
|
| 299 |
+
DatetimeArray._from_sequence(np.array([1, 2], dtype="M8[ns]")),
|
| 300 |
+
),
|
| 301 |
+
(
|
| 302 |
+
np.array([1, 2], dtype="M8[us]"),
|
| 303 |
+
DatetimeArray._simple_new(
|
| 304 |
+
np.array([1, 2], dtype="M8[us]"), dtype=np.dtype("M8[us]")
|
| 305 |
+
),
|
| 306 |
+
),
|
| 307 |
+
# datetimetz
|
| 308 |
+
(
|
| 309 |
+
[pd.Timestamp("2000", tz="CET"), pd.Timestamp("2001", tz="CET")],
|
| 310 |
+
DatetimeArray._from_sequence(
|
| 311 |
+
["2000", "2001"], dtype=pd.DatetimeTZDtype(tz="CET", unit="ns")
|
| 312 |
+
),
|
| 313 |
+
),
|
| 314 |
+
(
|
| 315 |
+
[
|
| 316 |
+
datetime.datetime(2000, 1, 1, tzinfo=cet),
|
| 317 |
+
datetime.datetime(2001, 1, 1, tzinfo=cet),
|
| 318 |
+
],
|
| 319 |
+
DatetimeArray._from_sequence(
|
| 320 |
+
["2000", "2001"], dtype=pd.DatetimeTZDtype(tz=cet, unit="ns")
|
| 321 |
+
),
|
| 322 |
+
),
|
| 323 |
+
# timedelta
|
| 324 |
+
(
|
| 325 |
+
[pd.Timedelta("1h"), pd.Timedelta("2h")],
|
| 326 |
+
TimedeltaArray._from_sequence(["1h", "2h"], dtype="m8[ns]"),
|
| 327 |
+
),
|
| 328 |
+
(
|
| 329 |
+
np.array([1, 2], dtype="m8[ns]"),
|
| 330 |
+
TimedeltaArray._from_sequence(np.array([1, 2], dtype="m8[ns]")),
|
| 331 |
+
),
|
| 332 |
+
(
|
| 333 |
+
np.array([1, 2], dtype="m8[us]"),
|
| 334 |
+
TimedeltaArray._from_sequence(np.array([1, 2], dtype="m8[us]")),
|
| 335 |
+
),
|
| 336 |
+
# integer
|
| 337 |
+
([1, 2], IntegerArray._from_sequence([1, 2], dtype="Int64")),
|
| 338 |
+
([1, None], IntegerArray._from_sequence([1, None], dtype="Int64")),
|
| 339 |
+
([1, pd.NA], IntegerArray._from_sequence([1, pd.NA], dtype="Int64")),
|
| 340 |
+
([1, np.nan], IntegerArray._from_sequence([1, np.nan], dtype="Int64")),
|
| 341 |
+
# float
|
| 342 |
+
([0.1, 0.2], FloatingArray._from_sequence([0.1, 0.2], dtype="Float64")),
|
| 343 |
+
([0.1, None], FloatingArray._from_sequence([0.1, pd.NA], dtype="Float64")),
|
| 344 |
+
([0.1, np.nan], FloatingArray._from_sequence([0.1, pd.NA], dtype="Float64")),
|
| 345 |
+
([0.1, pd.NA], FloatingArray._from_sequence([0.1, pd.NA], dtype="Float64")),
|
| 346 |
+
# integer-like float
|
| 347 |
+
([1.0, 2.0], FloatingArray._from_sequence([1.0, 2.0], dtype="Float64")),
|
| 348 |
+
([1.0, None], FloatingArray._from_sequence([1.0, pd.NA], dtype="Float64")),
|
| 349 |
+
([1.0, np.nan], FloatingArray._from_sequence([1.0, pd.NA], dtype="Float64")),
|
| 350 |
+
([1.0, pd.NA], FloatingArray._from_sequence([1.0, pd.NA], dtype="Float64")),
|
| 351 |
+
# mixed-integer-float
|
| 352 |
+
([1, 2.0], FloatingArray._from_sequence([1.0, 2.0], dtype="Float64")),
|
| 353 |
+
(
|
| 354 |
+
[1, np.nan, 2.0],
|
| 355 |
+
FloatingArray._from_sequence([1.0, None, 2.0], dtype="Float64"),
|
| 356 |
+
),
|
| 357 |
+
# string
|
| 358 |
+
(
|
| 359 |
+
["a", "b"],
|
| 360 |
+
pd.StringDtype()
|
| 361 |
+
.construct_array_type()
|
| 362 |
+
._from_sequence(["a", "b"], dtype=pd.StringDtype()),
|
| 363 |
+
),
|
| 364 |
+
(
|
| 365 |
+
["a", None],
|
| 366 |
+
pd.StringDtype()
|
| 367 |
+
.construct_array_type()
|
| 368 |
+
._from_sequence(["a", None], dtype=pd.StringDtype()),
|
| 369 |
+
),
|
| 370 |
+
# Boolean
|
| 371 |
+
([True, False], BooleanArray._from_sequence([True, False], dtype="boolean")),
|
| 372 |
+
([True, None], BooleanArray._from_sequence([True, None], dtype="boolean")),
|
| 373 |
+
],
|
| 374 |
+
)
|
| 375 |
+
def test_array_inference(data, expected):
|
| 376 |
+
result = pd.array(data)
|
| 377 |
+
tm.assert_equal(result, expected)
|
| 378 |
+
|
| 379 |
+
|
| 380 |
+
@pytest.mark.parametrize(
|
| 381 |
+
"data",
|
| 382 |
+
[
|
| 383 |
+
# mix of frequencies
|
| 384 |
+
[pd.Period("2000", "D"), pd.Period("2001", "Y")],
|
| 385 |
+
# mix of closed
|
| 386 |
+
[pd.Interval(0, 1, closed="left"), pd.Interval(1, 2, closed="right")],
|
| 387 |
+
# Mix of timezones
|
| 388 |
+
[pd.Timestamp("2000", tz="CET"), pd.Timestamp("2000", tz="UTC")],
|
| 389 |
+
# Mix of tz-aware and tz-naive
|
| 390 |
+
[pd.Timestamp("2000", tz="CET"), pd.Timestamp("2000")],
|
| 391 |
+
np.array([pd.Timestamp("2000"), pd.Timestamp("2000", tz="CET")]),
|
| 392 |
+
],
|
| 393 |
+
)
|
| 394 |
+
def test_array_inference_fails(data):
|
| 395 |
+
result = pd.array(data)
|
| 396 |
+
expected = NumpyExtensionArray(np.array(data, dtype=object))
|
| 397 |
+
tm.assert_extension_array_equal(result, expected)
|
| 398 |
+
|
| 399 |
+
|
| 400 |
+
@pytest.mark.parametrize("data", [np.array(0)])
|
| 401 |
+
def test_nd_raises(data):
|
| 402 |
+
with pytest.raises(ValueError, match="NumpyExtensionArray must be 1-dimensional"):
|
| 403 |
+
pd.array(data, dtype="int64")
|
| 404 |
+
|
| 405 |
+
|
| 406 |
+
def test_scalar_raises():
|
| 407 |
+
with pytest.raises(ValueError, match="Cannot pass scalar '1'"):
|
| 408 |
+
pd.array(1)
|
| 409 |
+
|
| 410 |
+
|
| 411 |
+
def test_dataframe_raises():
|
| 412 |
+
# GH#51167 don't accidentally cast to StringArray by doing inference on columns
|
| 413 |
+
df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"])
|
| 414 |
+
msg = "Cannot pass DataFrame to 'pandas.array'"
|
| 415 |
+
with pytest.raises(TypeError, match=msg):
|
| 416 |
+
pd.array(df)
|
| 417 |
+
|
| 418 |
+
|
| 419 |
+
def test_bounds_check():
|
| 420 |
+
# GH21796
|
| 421 |
+
with pytest.raises(
|
| 422 |
+
TypeError, match=r"cannot safely cast non-equivalent int(32|64) to uint16"
|
| 423 |
+
):
|
| 424 |
+
pd.array([-1, 2, 3], dtype="UInt16")
|
| 425 |
+
|
| 426 |
+
|
| 427 |
+
# ---------------------------------------------------------------------------
|
| 428 |
+
# A couple dummy classes to ensure that Series and Indexes are unboxed before
|
| 429 |
+
# getting to the EA classes.
|
| 430 |
+
|
| 431 |
+
|
| 432 |
+
@register_extension_dtype
|
| 433 |
+
class DecimalDtype2(DecimalDtype):
|
| 434 |
+
name = "decimal2"
|
| 435 |
+
|
| 436 |
+
@classmethod
|
| 437 |
+
def construct_array_type(cls):
|
| 438 |
+
"""
|
| 439 |
+
Return the array type associated with this dtype.
|
| 440 |
+
|
| 441 |
+
Returns
|
| 442 |
+
-------
|
| 443 |
+
type
|
| 444 |
+
"""
|
| 445 |
+
return DecimalArray2
|
| 446 |
+
|
| 447 |
+
|
| 448 |
+
class DecimalArray2(DecimalArray):
|
| 449 |
+
@classmethod
|
| 450 |
+
def _from_sequence(cls, scalars, *, dtype=None, copy=False):
|
| 451 |
+
if isinstance(scalars, (pd.Series, pd.Index)):
|
| 452 |
+
raise TypeError("scalars should not be of type pd.Series or pd.Index")
|
| 453 |
+
|
| 454 |
+
return super()._from_sequence(scalars, dtype=dtype, copy=copy)
|
| 455 |
+
|
| 456 |
+
|
| 457 |
+
def test_array_unboxes(index_or_series):
|
| 458 |
+
box = index_or_series
|
| 459 |
+
|
| 460 |
+
data = box([decimal.Decimal("1"), decimal.Decimal("2")])
|
| 461 |
+
dtype = DecimalDtype2()
|
| 462 |
+
# make sure it works
|
| 463 |
+
with pytest.raises(
|
| 464 |
+
TypeError, match="scalars should not be of type pd.Series or pd.Index"
|
| 465 |
+
):
|
| 466 |
+
DecimalArray2._from_sequence(data, dtype=dtype)
|
| 467 |
+
|
| 468 |
+
result = pd.array(data, dtype="decimal2")
|
| 469 |
+
expected = DecimalArray2._from_sequence(data.values, dtype=dtype)
|
| 470 |
+
tm.assert_equal(result, expected)
|
| 471 |
+
|
| 472 |
+
|
| 473 |
+
def test_array_to_numpy_na():
|
| 474 |
+
# GH#40638
|
| 475 |
+
arr = pd.array([pd.NA, 1], dtype="string[python]")
|
| 476 |
+
result = arr.to_numpy(na_value=True, dtype=bool)
|
| 477 |
+
expected = np.array([True, True])
|
| 478 |
+
tm.assert_numpy_array_equal(result, expected)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/arrays/test_datetimelike.py
ADDED
|
@@ -0,0 +1,1344 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import re
|
| 4 |
+
import warnings
|
| 5 |
+
|
| 6 |
+
import numpy as np
|
| 7 |
+
import pytest
|
| 8 |
+
|
| 9 |
+
from pandas._libs import (
|
| 10 |
+
NaT,
|
| 11 |
+
OutOfBoundsDatetime,
|
| 12 |
+
Timestamp,
|
| 13 |
+
)
|
| 14 |
+
from pandas._libs.tslibs.dtypes import freq_to_period_freqstr
|
| 15 |
+
from pandas.compat.numpy import np_version_gt2
|
| 16 |
+
|
| 17 |
+
import pandas as pd
|
| 18 |
+
from pandas import (
|
| 19 |
+
DatetimeIndex,
|
| 20 |
+
Period,
|
| 21 |
+
PeriodIndex,
|
| 22 |
+
TimedeltaIndex,
|
| 23 |
+
)
|
| 24 |
+
import pandas._testing as tm
|
| 25 |
+
from pandas.core.arrays import (
|
| 26 |
+
DatetimeArray,
|
| 27 |
+
NumpyExtensionArray,
|
| 28 |
+
PeriodArray,
|
| 29 |
+
TimedeltaArray,
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
# TODO: more freq variants
|
| 34 |
+
@pytest.fixture(params=["D", "B", "W", "ME", "QE", "YE"])
|
| 35 |
+
def freqstr(request):
|
| 36 |
+
"""Fixture returning parametrized frequency in string format."""
|
| 37 |
+
return request.param
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
@pytest.fixture
|
| 41 |
+
def period_index(freqstr):
|
| 42 |
+
"""
|
| 43 |
+
A fixture to provide PeriodIndex objects with different frequencies.
|
| 44 |
+
|
| 45 |
+
Most PeriodArray behavior is already tested in PeriodIndex tests,
|
| 46 |
+
so here we just test that the PeriodArray behavior matches
|
| 47 |
+
the PeriodIndex behavior.
|
| 48 |
+
"""
|
| 49 |
+
# TODO: non-monotone indexes; NaTs, different start dates
|
| 50 |
+
with warnings.catch_warnings():
|
| 51 |
+
# suppress deprecation of Period[B]
|
| 52 |
+
warnings.filterwarnings(
|
| 53 |
+
"ignore", message="Period with BDay freq", category=FutureWarning
|
| 54 |
+
)
|
| 55 |
+
freqstr = freq_to_period_freqstr(1, freqstr)
|
| 56 |
+
pi = pd.period_range(start=Timestamp("2000-01-01"), periods=100, freq=freqstr)
|
| 57 |
+
return pi
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
@pytest.fixture
|
| 61 |
+
def datetime_index(freqstr):
|
| 62 |
+
"""
|
| 63 |
+
A fixture to provide DatetimeIndex objects with different frequencies.
|
| 64 |
+
|
| 65 |
+
Most DatetimeArray behavior is already tested in DatetimeIndex tests,
|
| 66 |
+
so here we just test that the DatetimeArray behavior matches
|
| 67 |
+
the DatetimeIndex behavior.
|
| 68 |
+
"""
|
| 69 |
+
# TODO: non-monotone indexes; NaTs, different start dates, timezones
|
| 70 |
+
dti = pd.date_range(start=Timestamp("2000-01-01"), periods=100, freq=freqstr)
|
| 71 |
+
return dti
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
@pytest.fixture
|
| 75 |
+
def timedelta_index():
|
| 76 |
+
"""
|
| 77 |
+
A fixture to provide TimedeltaIndex objects with different frequencies.
|
| 78 |
+
Most TimedeltaArray behavior is already tested in TimedeltaIndex tests,
|
| 79 |
+
so here we just test that the TimedeltaArray behavior matches
|
| 80 |
+
the TimedeltaIndex behavior.
|
| 81 |
+
"""
|
| 82 |
+
# TODO: flesh this out
|
| 83 |
+
return TimedeltaIndex(["1 Day", "3 Hours", "NaT"])
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
class SharedTests:
|
| 87 |
+
index_cls: type[DatetimeIndex | PeriodIndex | TimedeltaIndex]
|
| 88 |
+
|
| 89 |
+
@pytest.fixture
|
| 90 |
+
def arr1d(self):
|
| 91 |
+
"""Fixture returning DatetimeArray with daily frequency."""
|
| 92 |
+
data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
|
| 93 |
+
if self.array_cls is PeriodArray:
|
| 94 |
+
arr = self.array_cls(data, freq="D")
|
| 95 |
+
else:
|
| 96 |
+
arr = self.index_cls(data, freq="D")._data
|
| 97 |
+
return arr
|
| 98 |
+
|
| 99 |
+
def test_compare_len1_raises(self, arr1d):
|
| 100 |
+
# make sure we raise when comparing with different lengths, specific
|
| 101 |
+
# to the case where one has length-1, which numpy would broadcast
|
| 102 |
+
arr = arr1d
|
| 103 |
+
idx = self.index_cls(arr)
|
| 104 |
+
|
| 105 |
+
with pytest.raises(ValueError, match="Lengths must match"):
|
| 106 |
+
arr == arr[:1]
|
| 107 |
+
|
| 108 |
+
# test the index classes while we're at it, GH#23078
|
| 109 |
+
with pytest.raises(ValueError, match="Lengths must match"):
|
| 110 |
+
idx <= idx[[0]]
|
| 111 |
+
|
| 112 |
+
@pytest.mark.parametrize(
|
| 113 |
+
"result",
|
| 114 |
+
[
|
| 115 |
+
pd.date_range("2020", periods=3),
|
| 116 |
+
pd.date_range("2020", periods=3, tz="UTC"),
|
| 117 |
+
pd.timedelta_range("0 days", periods=3),
|
| 118 |
+
pd.period_range("2020Q1", periods=3, freq="Q"),
|
| 119 |
+
],
|
| 120 |
+
)
|
| 121 |
+
def test_compare_with_Categorical(self, result):
|
| 122 |
+
expected = pd.Categorical(result)
|
| 123 |
+
assert all(result == expected)
|
| 124 |
+
assert not any(result != expected)
|
| 125 |
+
|
| 126 |
+
@pytest.mark.parametrize("reverse", [True, False])
|
| 127 |
+
@pytest.mark.parametrize("as_index", [True, False])
|
| 128 |
+
def test_compare_categorical_dtype(self, arr1d, as_index, reverse, ordered):
|
| 129 |
+
other = pd.Categorical(arr1d, ordered=ordered)
|
| 130 |
+
if as_index:
|
| 131 |
+
other = pd.CategoricalIndex(other)
|
| 132 |
+
|
| 133 |
+
left, right = arr1d, other
|
| 134 |
+
if reverse:
|
| 135 |
+
left, right = right, left
|
| 136 |
+
|
| 137 |
+
ones = np.ones(arr1d.shape, dtype=bool)
|
| 138 |
+
zeros = ~ones
|
| 139 |
+
|
| 140 |
+
result = left == right
|
| 141 |
+
tm.assert_numpy_array_equal(result, ones)
|
| 142 |
+
|
| 143 |
+
result = left != right
|
| 144 |
+
tm.assert_numpy_array_equal(result, zeros)
|
| 145 |
+
|
| 146 |
+
if not reverse and not as_index:
|
| 147 |
+
# Otherwise Categorical raises TypeError bc it is not ordered
|
| 148 |
+
# TODO: we should probably get the same behavior regardless?
|
| 149 |
+
result = left < right
|
| 150 |
+
tm.assert_numpy_array_equal(result, zeros)
|
| 151 |
+
|
| 152 |
+
result = left <= right
|
| 153 |
+
tm.assert_numpy_array_equal(result, ones)
|
| 154 |
+
|
| 155 |
+
result = left > right
|
| 156 |
+
tm.assert_numpy_array_equal(result, zeros)
|
| 157 |
+
|
| 158 |
+
result = left >= right
|
| 159 |
+
tm.assert_numpy_array_equal(result, ones)
|
| 160 |
+
|
| 161 |
+
def test_take(self):
|
| 162 |
+
data = np.arange(100, dtype="i8") * 24 * 3600 * 10**9
|
| 163 |
+
np.random.default_rng(2).shuffle(data)
|
| 164 |
+
|
| 165 |
+
if self.array_cls is PeriodArray:
|
| 166 |
+
arr = PeriodArray(data, dtype="period[D]")
|
| 167 |
+
else:
|
| 168 |
+
arr = self.index_cls(data)._data
|
| 169 |
+
idx = self.index_cls._simple_new(arr)
|
| 170 |
+
|
| 171 |
+
takers = [1, 4, 94]
|
| 172 |
+
result = arr.take(takers)
|
| 173 |
+
expected = idx.take(takers)
|
| 174 |
+
|
| 175 |
+
tm.assert_index_equal(self.index_cls(result), expected)
|
| 176 |
+
|
| 177 |
+
takers = np.array([1, 4, 94])
|
| 178 |
+
result = arr.take(takers)
|
| 179 |
+
expected = idx.take(takers)
|
| 180 |
+
|
| 181 |
+
tm.assert_index_equal(self.index_cls(result), expected)
|
| 182 |
+
|
| 183 |
+
@pytest.mark.parametrize("fill_value", [2, 2.0, Timestamp(2021, 1, 1, 12).time])
|
| 184 |
+
def test_take_fill_raises(self, fill_value, arr1d):
|
| 185 |
+
msg = f"value should be a '{arr1d._scalar_type.__name__}' or 'NaT'. Got"
|
| 186 |
+
with pytest.raises(TypeError, match=msg):
|
| 187 |
+
arr1d.take([0, 1], allow_fill=True, fill_value=fill_value)
|
| 188 |
+
|
| 189 |
+
def test_take_fill(self, arr1d):
|
| 190 |
+
arr = arr1d
|
| 191 |
+
|
| 192 |
+
result = arr.take([-1, 1], allow_fill=True, fill_value=None)
|
| 193 |
+
assert result[0] is NaT
|
| 194 |
+
|
| 195 |
+
result = arr.take([-1, 1], allow_fill=True, fill_value=np.nan)
|
| 196 |
+
assert result[0] is NaT
|
| 197 |
+
|
| 198 |
+
result = arr.take([-1, 1], allow_fill=True, fill_value=NaT)
|
| 199 |
+
assert result[0] is NaT
|
| 200 |
+
|
| 201 |
+
@pytest.mark.filterwarnings(
|
| 202 |
+
"ignore:Period with BDay freq is deprecated:FutureWarning"
|
| 203 |
+
)
|
| 204 |
+
def test_take_fill_str(self, arr1d):
|
| 205 |
+
# Cast str fill_value matching other fill_value-taking methods
|
| 206 |
+
result = arr1d.take([-1, 1], allow_fill=True, fill_value=str(arr1d[-1]))
|
| 207 |
+
expected = arr1d[[-1, 1]]
|
| 208 |
+
tm.assert_equal(result, expected)
|
| 209 |
+
|
| 210 |
+
msg = f"value should be a '{arr1d._scalar_type.__name__}' or 'NaT'. Got"
|
| 211 |
+
with pytest.raises(TypeError, match=msg):
|
| 212 |
+
arr1d.take([-1, 1], allow_fill=True, fill_value="foo")
|
| 213 |
+
|
| 214 |
+
def test_concat_same_type(self, arr1d):
|
| 215 |
+
arr = arr1d
|
| 216 |
+
idx = self.index_cls(arr)
|
| 217 |
+
idx = idx.insert(0, NaT)
|
| 218 |
+
arr = arr1d
|
| 219 |
+
|
| 220 |
+
result = arr._concat_same_type([arr[:-1], arr[1:], arr])
|
| 221 |
+
arr2 = arr.astype(object)
|
| 222 |
+
expected = self.index_cls(np.concatenate([arr2[:-1], arr2[1:], arr2]))
|
| 223 |
+
|
| 224 |
+
tm.assert_index_equal(self.index_cls(result), expected)
|
| 225 |
+
|
| 226 |
+
def test_unbox_scalar(self, arr1d):
|
| 227 |
+
result = arr1d._unbox_scalar(arr1d[0])
|
| 228 |
+
expected = arr1d._ndarray.dtype.type
|
| 229 |
+
assert isinstance(result, expected)
|
| 230 |
+
|
| 231 |
+
result = arr1d._unbox_scalar(NaT)
|
| 232 |
+
assert isinstance(result, expected)
|
| 233 |
+
|
| 234 |
+
msg = f"'value' should be a {self.scalar_type.__name__}."
|
| 235 |
+
with pytest.raises(ValueError, match=msg):
|
| 236 |
+
arr1d._unbox_scalar("foo")
|
| 237 |
+
|
| 238 |
+
def test_check_compatible_with(self, arr1d):
|
| 239 |
+
arr1d._check_compatible_with(arr1d[0])
|
| 240 |
+
arr1d._check_compatible_with(arr1d[:1])
|
| 241 |
+
arr1d._check_compatible_with(NaT)
|
| 242 |
+
|
| 243 |
+
def test_scalar_from_string(self, arr1d):
|
| 244 |
+
result = arr1d._scalar_from_string(str(arr1d[0]))
|
| 245 |
+
assert result == arr1d[0]
|
| 246 |
+
|
| 247 |
+
def test_reduce_invalid(self, arr1d):
|
| 248 |
+
msg = "does not support reduction 'not a method'"
|
| 249 |
+
with pytest.raises(TypeError, match=msg):
|
| 250 |
+
arr1d._reduce("not a method")
|
| 251 |
+
|
| 252 |
+
@pytest.mark.parametrize("method", ["pad", "backfill"])
|
| 253 |
+
def test_fillna_method_doesnt_change_orig(self, method):
|
| 254 |
+
data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
|
| 255 |
+
if self.array_cls is PeriodArray:
|
| 256 |
+
arr = self.array_cls(data, dtype="period[D]")
|
| 257 |
+
else:
|
| 258 |
+
arr = self.array_cls._from_sequence(data)
|
| 259 |
+
arr[4] = NaT
|
| 260 |
+
|
| 261 |
+
fill_value = arr[3] if method == "pad" else arr[5]
|
| 262 |
+
|
| 263 |
+
result = arr._pad_or_backfill(method=method)
|
| 264 |
+
assert result[4] == fill_value
|
| 265 |
+
|
| 266 |
+
# check that the original was not changed
|
| 267 |
+
assert arr[4] is NaT
|
| 268 |
+
|
| 269 |
+
def test_searchsorted(self):
|
| 270 |
+
data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
|
| 271 |
+
if self.array_cls is PeriodArray:
|
| 272 |
+
arr = self.array_cls(data, dtype="period[D]")
|
| 273 |
+
else:
|
| 274 |
+
arr = self.array_cls._from_sequence(data)
|
| 275 |
+
|
| 276 |
+
# scalar
|
| 277 |
+
result = arr.searchsorted(arr[1])
|
| 278 |
+
assert result == 1
|
| 279 |
+
|
| 280 |
+
result = arr.searchsorted(arr[2], side="right")
|
| 281 |
+
assert result == 3
|
| 282 |
+
|
| 283 |
+
# own-type
|
| 284 |
+
result = arr.searchsorted(arr[1:3])
|
| 285 |
+
expected = np.array([1, 2], dtype=np.intp)
|
| 286 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 287 |
+
|
| 288 |
+
result = arr.searchsorted(arr[1:3], side="right")
|
| 289 |
+
expected = np.array([2, 3], dtype=np.intp)
|
| 290 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 291 |
+
|
| 292 |
+
# GH#29884 match numpy convention on whether NaT goes
|
| 293 |
+
# at the end or the beginning
|
| 294 |
+
result = arr.searchsorted(NaT)
|
| 295 |
+
assert result == 10
|
| 296 |
+
|
| 297 |
+
@pytest.mark.parametrize("box", [None, "index", "series"])
|
| 298 |
+
def test_searchsorted_castable_strings(self, arr1d, box, string_storage):
|
| 299 |
+
arr = arr1d
|
| 300 |
+
if box is None:
|
| 301 |
+
pass
|
| 302 |
+
elif box == "index":
|
| 303 |
+
# Test the equivalent Index.searchsorted method while we're here
|
| 304 |
+
arr = self.index_cls(arr)
|
| 305 |
+
else:
|
| 306 |
+
# Test the equivalent Series.searchsorted method while we're here
|
| 307 |
+
arr = pd.Series(arr)
|
| 308 |
+
|
| 309 |
+
# scalar
|
| 310 |
+
result = arr.searchsorted(str(arr[1]))
|
| 311 |
+
assert result == 1
|
| 312 |
+
|
| 313 |
+
result = arr.searchsorted(str(arr[2]), side="right")
|
| 314 |
+
assert result == 3
|
| 315 |
+
|
| 316 |
+
result = arr.searchsorted([str(x) for x in arr[1:3]])
|
| 317 |
+
expected = np.array([1, 2], dtype=np.intp)
|
| 318 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 319 |
+
|
| 320 |
+
with pytest.raises(
|
| 321 |
+
TypeError,
|
| 322 |
+
match=re.escape(
|
| 323 |
+
f"value should be a '{arr1d._scalar_type.__name__}', 'NaT', "
|
| 324 |
+
"or array of those. Got 'str' instead."
|
| 325 |
+
),
|
| 326 |
+
):
|
| 327 |
+
arr.searchsorted("foo")
|
| 328 |
+
|
| 329 |
+
with pd.option_context("string_storage", string_storage):
|
| 330 |
+
with pytest.raises(
|
| 331 |
+
TypeError,
|
| 332 |
+
match=re.escape(
|
| 333 |
+
f"value should be a '{arr1d._scalar_type.__name__}', 'NaT', "
|
| 334 |
+
"or array of those. Got string array instead."
|
| 335 |
+
),
|
| 336 |
+
):
|
| 337 |
+
arr.searchsorted([str(arr[1]), "baz"])
|
| 338 |
+
|
| 339 |
+
def test_getitem_near_implementation_bounds(self):
|
| 340 |
+
# We only check tz-naive for DTA bc the bounds are slightly different
|
| 341 |
+
# for other tzs
|
| 342 |
+
i8vals = np.asarray([NaT._value + n for n in range(1, 5)], dtype="i8")
|
| 343 |
+
if self.array_cls is PeriodArray:
|
| 344 |
+
arr = self.array_cls(i8vals, dtype="period[ns]")
|
| 345 |
+
else:
|
| 346 |
+
arr = self.index_cls(i8vals, freq="ns")._data
|
| 347 |
+
arr[0] # should not raise OutOfBoundsDatetime
|
| 348 |
+
|
| 349 |
+
index = pd.Index(arr)
|
| 350 |
+
index[0] # should not raise OutOfBoundsDatetime
|
| 351 |
+
|
| 352 |
+
ser = pd.Series(arr)
|
| 353 |
+
ser[0] # should not raise OutOfBoundsDatetime
|
| 354 |
+
|
| 355 |
+
def test_getitem_2d(self, arr1d):
|
| 356 |
+
# 2d slicing on a 1D array
|
| 357 |
+
expected = type(arr1d)._simple_new(
|
| 358 |
+
arr1d._ndarray[:, np.newaxis], dtype=arr1d.dtype
|
| 359 |
+
)
|
| 360 |
+
result = arr1d[:, np.newaxis]
|
| 361 |
+
tm.assert_equal(result, expected)
|
| 362 |
+
|
| 363 |
+
# Lookup on a 2D array
|
| 364 |
+
arr2d = expected
|
| 365 |
+
expected = type(arr2d)._simple_new(arr2d._ndarray[:3, 0], dtype=arr2d.dtype)
|
| 366 |
+
result = arr2d[:3, 0]
|
| 367 |
+
tm.assert_equal(result, expected)
|
| 368 |
+
|
| 369 |
+
# Scalar lookup
|
| 370 |
+
result = arr2d[-1, 0]
|
| 371 |
+
expected = arr1d[-1]
|
| 372 |
+
assert result == expected
|
| 373 |
+
|
| 374 |
+
def test_iter_2d(self, arr1d):
|
| 375 |
+
data2d = arr1d._ndarray[:3, np.newaxis]
|
| 376 |
+
arr2d = type(arr1d)._simple_new(data2d, dtype=arr1d.dtype)
|
| 377 |
+
result = list(arr2d)
|
| 378 |
+
assert len(result) == 3
|
| 379 |
+
for x in result:
|
| 380 |
+
assert isinstance(x, type(arr1d))
|
| 381 |
+
assert x.ndim == 1
|
| 382 |
+
assert x.dtype == arr1d.dtype
|
| 383 |
+
|
| 384 |
+
def test_repr_2d(self, arr1d):
|
| 385 |
+
data2d = arr1d._ndarray[:3, np.newaxis]
|
| 386 |
+
arr2d = type(arr1d)._simple_new(data2d, dtype=arr1d.dtype)
|
| 387 |
+
|
| 388 |
+
result = repr(arr2d)
|
| 389 |
+
|
| 390 |
+
if isinstance(arr2d, TimedeltaArray):
|
| 391 |
+
expected = (
|
| 392 |
+
f"<{type(arr2d).__name__}>\n"
|
| 393 |
+
"[\n"
|
| 394 |
+
f"['{arr1d[0]._repr_base()}'],\n"
|
| 395 |
+
f"['{arr1d[1]._repr_base()}'],\n"
|
| 396 |
+
f"['{arr1d[2]._repr_base()}']\n"
|
| 397 |
+
"]\n"
|
| 398 |
+
f"Shape: (3, 1), dtype: {arr1d.dtype}"
|
| 399 |
+
)
|
| 400 |
+
else:
|
| 401 |
+
expected = (
|
| 402 |
+
f"<{type(arr2d).__name__}>\n"
|
| 403 |
+
"[\n"
|
| 404 |
+
f"['{arr1d[0]}'],\n"
|
| 405 |
+
f"['{arr1d[1]}'],\n"
|
| 406 |
+
f"['{arr1d[2]}']\n"
|
| 407 |
+
"]\n"
|
| 408 |
+
f"Shape: (3, 1), dtype: {arr1d.dtype}"
|
| 409 |
+
)
|
| 410 |
+
|
| 411 |
+
assert result == expected
|
| 412 |
+
|
| 413 |
+
def test_setitem(self):
|
| 414 |
+
data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
|
| 415 |
+
if self.array_cls is PeriodArray:
|
| 416 |
+
arr = self.array_cls(data, dtype="period[D]")
|
| 417 |
+
else:
|
| 418 |
+
arr = self.index_cls(data, freq="D")._data
|
| 419 |
+
|
| 420 |
+
arr[0] = arr[1]
|
| 421 |
+
expected = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
|
| 422 |
+
expected[0] = expected[1]
|
| 423 |
+
|
| 424 |
+
tm.assert_numpy_array_equal(arr.asi8, expected)
|
| 425 |
+
|
| 426 |
+
arr[:2] = arr[-2:]
|
| 427 |
+
expected[:2] = expected[-2:]
|
| 428 |
+
tm.assert_numpy_array_equal(arr.asi8, expected)
|
| 429 |
+
|
| 430 |
+
@pytest.mark.parametrize(
|
| 431 |
+
"box",
|
| 432 |
+
[
|
| 433 |
+
pd.Index,
|
| 434 |
+
pd.Series,
|
| 435 |
+
np.array,
|
| 436 |
+
list,
|
| 437 |
+
NumpyExtensionArray,
|
| 438 |
+
],
|
| 439 |
+
)
|
| 440 |
+
def test_setitem_object_dtype(self, box, arr1d):
|
| 441 |
+
expected = arr1d.copy()[::-1]
|
| 442 |
+
if expected.dtype.kind in ["m", "M"]:
|
| 443 |
+
expected = expected._with_freq(None)
|
| 444 |
+
|
| 445 |
+
vals = expected
|
| 446 |
+
if box is list:
|
| 447 |
+
vals = list(vals)
|
| 448 |
+
elif box is np.array:
|
| 449 |
+
# if we do np.array(x).astype(object) then dt64 and td64 cast to ints
|
| 450 |
+
vals = np.array(vals.astype(object))
|
| 451 |
+
elif box is NumpyExtensionArray:
|
| 452 |
+
vals = box(np.asarray(vals, dtype=object))
|
| 453 |
+
else:
|
| 454 |
+
vals = box(vals).astype(object)
|
| 455 |
+
|
| 456 |
+
arr1d[:] = vals
|
| 457 |
+
|
| 458 |
+
tm.assert_equal(arr1d, expected)
|
| 459 |
+
|
| 460 |
+
def test_setitem_strs(self, arr1d):
|
| 461 |
+
# Check that we parse strs in both scalar and listlike
|
| 462 |
+
|
| 463 |
+
# Setting list-like of strs
|
| 464 |
+
expected = arr1d.copy()
|
| 465 |
+
expected[[0, 1]] = arr1d[-2:]
|
| 466 |
+
|
| 467 |
+
result = arr1d.copy()
|
| 468 |
+
result[:2] = [str(x) for x in arr1d[-2:]]
|
| 469 |
+
tm.assert_equal(result, expected)
|
| 470 |
+
|
| 471 |
+
# Same thing but now for just a scalar str
|
| 472 |
+
expected = arr1d.copy()
|
| 473 |
+
expected[0] = arr1d[-1]
|
| 474 |
+
|
| 475 |
+
result = arr1d.copy()
|
| 476 |
+
result[0] = str(arr1d[-1])
|
| 477 |
+
tm.assert_equal(result, expected)
|
| 478 |
+
|
| 479 |
+
@pytest.mark.parametrize("as_index", [True, False])
|
| 480 |
+
def test_setitem_categorical(self, arr1d, as_index):
|
| 481 |
+
expected = arr1d.copy()[::-1]
|
| 482 |
+
if not isinstance(expected, PeriodArray):
|
| 483 |
+
expected = expected._with_freq(None)
|
| 484 |
+
|
| 485 |
+
cat = pd.Categorical(arr1d)
|
| 486 |
+
if as_index:
|
| 487 |
+
cat = pd.CategoricalIndex(cat)
|
| 488 |
+
|
| 489 |
+
arr1d[:] = cat[::-1]
|
| 490 |
+
|
| 491 |
+
tm.assert_equal(arr1d, expected)
|
| 492 |
+
|
| 493 |
+
def test_setitem_raises(self, arr1d):
|
| 494 |
+
arr = arr1d[:10]
|
| 495 |
+
val = arr[0]
|
| 496 |
+
|
| 497 |
+
with pytest.raises(IndexError, match="index 12 is out of bounds"):
|
| 498 |
+
arr[12] = val
|
| 499 |
+
|
| 500 |
+
with pytest.raises(TypeError, match="value should be a.* 'object'"):
|
| 501 |
+
arr[0] = object()
|
| 502 |
+
|
| 503 |
+
msg = "cannot set using a list-like indexer with a different length"
|
| 504 |
+
with pytest.raises(ValueError, match=msg):
|
| 505 |
+
# GH#36339
|
| 506 |
+
arr[[]] = [arr[1]]
|
| 507 |
+
|
| 508 |
+
msg = "cannot set using a slice indexer with a different length than"
|
| 509 |
+
with pytest.raises(ValueError, match=msg):
|
| 510 |
+
# GH#36339
|
| 511 |
+
arr[1:1] = arr[:3]
|
| 512 |
+
|
| 513 |
+
@pytest.mark.parametrize("box", [list, np.array, pd.Index, pd.Series])
|
| 514 |
+
def test_setitem_numeric_raises(self, arr1d, box):
|
| 515 |
+
# We dont case e.g. int64 to our own dtype for setitem
|
| 516 |
+
|
| 517 |
+
msg = (
|
| 518 |
+
f"value should be a '{arr1d._scalar_type.__name__}', "
|
| 519 |
+
"'NaT', or array of those. Got"
|
| 520 |
+
)
|
| 521 |
+
with pytest.raises(TypeError, match=msg):
|
| 522 |
+
arr1d[:2] = box([0, 1])
|
| 523 |
+
|
| 524 |
+
with pytest.raises(TypeError, match=msg):
|
| 525 |
+
arr1d[:2] = box([0.0, 1.0])
|
| 526 |
+
|
| 527 |
+
def test_inplace_arithmetic(self):
|
| 528 |
+
# GH#24115 check that iadd and isub are actually in-place
|
| 529 |
+
data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
|
| 530 |
+
if self.array_cls is PeriodArray:
|
| 531 |
+
arr = self.array_cls(data, dtype="period[D]")
|
| 532 |
+
else:
|
| 533 |
+
arr = self.index_cls(data, freq="D")._data
|
| 534 |
+
|
| 535 |
+
expected = arr + pd.Timedelta(days=1)
|
| 536 |
+
arr += pd.Timedelta(days=1)
|
| 537 |
+
tm.assert_equal(arr, expected)
|
| 538 |
+
|
| 539 |
+
expected = arr - pd.Timedelta(days=1)
|
| 540 |
+
arr -= pd.Timedelta(days=1)
|
| 541 |
+
tm.assert_equal(arr, expected)
|
| 542 |
+
|
| 543 |
+
def test_shift_fill_int_deprecated(self, arr1d):
|
| 544 |
+
# GH#31971, enforced in 2.0
|
| 545 |
+
with pytest.raises(TypeError, match="value should be a"):
|
| 546 |
+
arr1d.shift(1, fill_value=1)
|
| 547 |
+
|
| 548 |
+
def test_median(self, arr1d):
|
| 549 |
+
arr = arr1d
|
| 550 |
+
if len(arr) % 2 == 0:
|
| 551 |
+
# make it easier to define `expected`
|
| 552 |
+
arr = arr[:-1]
|
| 553 |
+
|
| 554 |
+
expected = arr[len(arr) // 2]
|
| 555 |
+
|
| 556 |
+
result = arr.median()
|
| 557 |
+
assert type(result) is type(expected)
|
| 558 |
+
assert result == expected
|
| 559 |
+
|
| 560 |
+
arr[len(arr) // 2] = NaT
|
| 561 |
+
if not isinstance(expected, Period):
|
| 562 |
+
expected = arr[len(arr) // 2 - 1 : len(arr) // 2 + 2].mean()
|
| 563 |
+
|
| 564 |
+
assert arr.median(skipna=False) is NaT
|
| 565 |
+
|
| 566 |
+
result = arr.median()
|
| 567 |
+
assert type(result) is type(expected)
|
| 568 |
+
assert result == expected
|
| 569 |
+
|
| 570 |
+
assert arr[:0].median() is NaT
|
| 571 |
+
assert arr[:0].median(skipna=False) is NaT
|
| 572 |
+
|
| 573 |
+
# 2d Case
|
| 574 |
+
arr2 = arr.reshape(-1, 1)
|
| 575 |
+
|
| 576 |
+
result = arr2.median(axis=None)
|
| 577 |
+
assert type(result) is type(expected)
|
| 578 |
+
assert result == expected
|
| 579 |
+
|
| 580 |
+
assert arr2.median(axis=None, skipna=False) is NaT
|
| 581 |
+
|
| 582 |
+
result = arr2.median(axis=0)
|
| 583 |
+
expected2 = type(arr)._from_sequence([expected], dtype=arr.dtype)
|
| 584 |
+
tm.assert_equal(result, expected2)
|
| 585 |
+
|
| 586 |
+
result = arr2.median(axis=0, skipna=False)
|
| 587 |
+
expected2 = type(arr)._from_sequence([NaT], dtype=arr.dtype)
|
| 588 |
+
tm.assert_equal(result, expected2)
|
| 589 |
+
|
| 590 |
+
result = arr2.median(axis=1)
|
| 591 |
+
tm.assert_equal(result, arr)
|
| 592 |
+
|
| 593 |
+
result = arr2.median(axis=1, skipna=False)
|
| 594 |
+
tm.assert_equal(result, arr)
|
| 595 |
+
|
| 596 |
+
def test_from_integer_array(self):
|
| 597 |
+
arr = np.array([1, 2, 3], dtype=np.int64)
|
| 598 |
+
data = pd.array(arr, dtype="Int64")
|
| 599 |
+
if self.array_cls is PeriodArray:
|
| 600 |
+
expected = self.array_cls(arr, dtype=self.example_dtype)
|
| 601 |
+
result = self.array_cls(data, dtype=self.example_dtype)
|
| 602 |
+
else:
|
| 603 |
+
expected = self.array_cls._from_sequence(arr, dtype=self.example_dtype)
|
| 604 |
+
result = self.array_cls._from_sequence(data, dtype=self.example_dtype)
|
| 605 |
+
|
| 606 |
+
tm.assert_extension_array_equal(result, expected)
|
| 607 |
+
|
| 608 |
+
|
| 609 |
+
class TestDatetimeArray(SharedTests):
|
| 610 |
+
index_cls = DatetimeIndex
|
| 611 |
+
array_cls = DatetimeArray
|
| 612 |
+
scalar_type = Timestamp
|
| 613 |
+
example_dtype = "M8[ns]"
|
| 614 |
+
|
| 615 |
+
@pytest.fixture
|
| 616 |
+
def arr1d(self, tz_naive_fixture, freqstr):
|
| 617 |
+
"""
|
| 618 |
+
Fixture returning DatetimeArray with parametrized frequency and
|
| 619 |
+
timezones
|
| 620 |
+
"""
|
| 621 |
+
tz = tz_naive_fixture
|
| 622 |
+
dti = pd.date_range("2016-01-01 01:01:00", periods=5, freq=freqstr, tz=tz)
|
| 623 |
+
dta = dti._data
|
| 624 |
+
return dta
|
| 625 |
+
|
| 626 |
+
def test_round(self, arr1d):
|
| 627 |
+
# GH#24064
|
| 628 |
+
dti = self.index_cls(arr1d)
|
| 629 |
+
|
| 630 |
+
result = dti.round(freq="2min")
|
| 631 |
+
expected = dti - pd.Timedelta(minutes=1)
|
| 632 |
+
expected = expected._with_freq(None)
|
| 633 |
+
tm.assert_index_equal(result, expected)
|
| 634 |
+
|
| 635 |
+
dta = dti._data
|
| 636 |
+
result = dta.round(freq="2min")
|
| 637 |
+
expected = expected._data._with_freq(None)
|
| 638 |
+
tm.assert_datetime_array_equal(result, expected)
|
| 639 |
+
|
| 640 |
+
def test_array_interface(self, datetime_index):
|
| 641 |
+
arr = datetime_index._data
|
| 642 |
+
copy_false = None if np_version_gt2 else False
|
| 643 |
+
|
| 644 |
+
# default asarray gives the same underlying data (for tz naive)
|
| 645 |
+
result = np.asarray(arr)
|
| 646 |
+
expected = arr._ndarray
|
| 647 |
+
assert result is expected
|
| 648 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 649 |
+
result = np.array(arr, copy=copy_false)
|
| 650 |
+
assert result is expected
|
| 651 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 652 |
+
|
| 653 |
+
# specifying M8[ns] gives the same result as default
|
| 654 |
+
result = np.asarray(arr, dtype="datetime64[ns]")
|
| 655 |
+
expected = arr._ndarray
|
| 656 |
+
assert result is expected
|
| 657 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 658 |
+
result = np.array(arr, dtype="datetime64[ns]", copy=copy_false)
|
| 659 |
+
assert result is expected
|
| 660 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 661 |
+
result = np.array(arr, dtype="datetime64[ns]")
|
| 662 |
+
if not np_version_gt2:
|
| 663 |
+
# TODO: GH 57739
|
| 664 |
+
assert result is not expected
|
| 665 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 666 |
+
|
| 667 |
+
# to object dtype
|
| 668 |
+
result = np.asarray(arr, dtype=object)
|
| 669 |
+
expected = np.array(list(arr), dtype=object)
|
| 670 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 671 |
+
|
| 672 |
+
# to other dtype always copies
|
| 673 |
+
result = np.asarray(arr, dtype="int64")
|
| 674 |
+
assert result is not arr.asi8
|
| 675 |
+
assert not np.may_share_memory(arr, result)
|
| 676 |
+
expected = arr.asi8.copy()
|
| 677 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 678 |
+
|
| 679 |
+
# other dtypes handled by numpy
|
| 680 |
+
for dtype in ["float64", str]:
|
| 681 |
+
result = np.asarray(arr, dtype=dtype)
|
| 682 |
+
expected = np.asarray(arr).astype(dtype)
|
| 683 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 684 |
+
|
| 685 |
+
def test_array_object_dtype(self, arr1d):
|
| 686 |
+
# GH#23524
|
| 687 |
+
arr = arr1d
|
| 688 |
+
dti = self.index_cls(arr1d)
|
| 689 |
+
|
| 690 |
+
expected = np.array(list(dti))
|
| 691 |
+
|
| 692 |
+
result = np.array(arr, dtype=object)
|
| 693 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 694 |
+
|
| 695 |
+
# also test the DatetimeIndex method while we're at it
|
| 696 |
+
result = np.array(dti, dtype=object)
|
| 697 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 698 |
+
|
| 699 |
+
def test_array_tz(self, arr1d):
|
| 700 |
+
# GH#23524
|
| 701 |
+
arr = arr1d
|
| 702 |
+
dti = self.index_cls(arr1d)
|
| 703 |
+
copy_false = None if np_version_gt2 else False
|
| 704 |
+
|
| 705 |
+
expected = dti.asi8.view("M8[ns]")
|
| 706 |
+
result = np.array(arr, dtype="M8[ns]")
|
| 707 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 708 |
+
|
| 709 |
+
result = np.array(arr, dtype="datetime64[ns]")
|
| 710 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 711 |
+
|
| 712 |
+
# check that we are not making copies when setting copy=copy_false
|
| 713 |
+
result = np.array(arr, dtype="M8[ns]", copy=copy_false)
|
| 714 |
+
assert result.base is expected.base
|
| 715 |
+
assert result.base is not None
|
| 716 |
+
result = np.array(arr, dtype="datetime64[ns]", copy=copy_false)
|
| 717 |
+
assert result.base is expected.base
|
| 718 |
+
assert result.base is not None
|
| 719 |
+
|
| 720 |
+
def test_array_i8_dtype(self, arr1d):
|
| 721 |
+
arr = arr1d
|
| 722 |
+
dti = self.index_cls(arr1d)
|
| 723 |
+
copy_false = None if np_version_gt2 else False
|
| 724 |
+
|
| 725 |
+
expected = dti.asi8
|
| 726 |
+
result = np.array(arr, dtype="i8")
|
| 727 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 728 |
+
|
| 729 |
+
result = np.array(arr, dtype=np.int64)
|
| 730 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 731 |
+
|
| 732 |
+
# check that we are still making copies when setting copy=copy_false
|
| 733 |
+
result = np.array(arr, dtype="i8", copy=copy_false)
|
| 734 |
+
assert result.base is not expected.base
|
| 735 |
+
assert result.base is None
|
| 736 |
+
|
| 737 |
+
def test_from_array_keeps_base(self):
|
| 738 |
+
# Ensure that DatetimeArray._ndarray.base isn't lost.
|
| 739 |
+
arr = np.array(["2000-01-01", "2000-01-02"], dtype="M8[ns]")
|
| 740 |
+
dta = DatetimeArray._from_sequence(arr)
|
| 741 |
+
|
| 742 |
+
assert dta._ndarray is arr
|
| 743 |
+
dta = DatetimeArray._from_sequence(arr[:0])
|
| 744 |
+
assert dta._ndarray.base is arr
|
| 745 |
+
|
| 746 |
+
def test_from_dti(self, arr1d):
|
| 747 |
+
arr = arr1d
|
| 748 |
+
dti = self.index_cls(arr1d)
|
| 749 |
+
assert list(dti) == list(arr)
|
| 750 |
+
|
| 751 |
+
# Check that Index.__new__ knows what to do with DatetimeArray
|
| 752 |
+
dti2 = pd.Index(arr)
|
| 753 |
+
assert isinstance(dti2, DatetimeIndex)
|
| 754 |
+
assert list(dti2) == list(arr)
|
| 755 |
+
|
| 756 |
+
def test_astype_object(self, arr1d):
|
| 757 |
+
arr = arr1d
|
| 758 |
+
dti = self.index_cls(arr1d)
|
| 759 |
+
|
| 760 |
+
asobj = arr.astype("O")
|
| 761 |
+
assert isinstance(asobj, np.ndarray)
|
| 762 |
+
assert asobj.dtype == "O"
|
| 763 |
+
assert list(asobj) == list(dti)
|
| 764 |
+
|
| 765 |
+
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
|
| 766 |
+
def test_to_period(self, datetime_index, freqstr):
|
| 767 |
+
dti = datetime_index
|
| 768 |
+
arr = dti._data
|
| 769 |
+
|
| 770 |
+
freqstr = freq_to_period_freqstr(1, freqstr)
|
| 771 |
+
expected = dti.to_period(freq=freqstr)
|
| 772 |
+
result = arr.to_period(freq=freqstr)
|
| 773 |
+
assert isinstance(result, PeriodArray)
|
| 774 |
+
|
| 775 |
+
tm.assert_equal(result, expected._data)
|
| 776 |
+
|
| 777 |
+
def test_to_period_2d(self, arr1d):
|
| 778 |
+
arr2d = arr1d.reshape(1, -1)
|
| 779 |
+
|
| 780 |
+
warn = None if arr1d.tz is None else UserWarning
|
| 781 |
+
with tm.assert_produces_warning(warn):
|
| 782 |
+
result = arr2d.to_period("D")
|
| 783 |
+
expected = arr1d.to_period("D").reshape(1, -1)
|
| 784 |
+
tm.assert_period_array_equal(result, expected)
|
| 785 |
+
|
| 786 |
+
@pytest.mark.parametrize("propname", DatetimeArray._bool_ops)
|
| 787 |
+
def test_bool_properties(self, arr1d, propname):
|
| 788 |
+
# in this case _bool_ops is just `is_leap_year`
|
| 789 |
+
dti = self.index_cls(arr1d)
|
| 790 |
+
arr = arr1d
|
| 791 |
+
assert dti.freq == arr.freq
|
| 792 |
+
|
| 793 |
+
result = getattr(arr, propname)
|
| 794 |
+
expected = np.array(getattr(dti, propname), dtype=result.dtype)
|
| 795 |
+
|
| 796 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 797 |
+
|
| 798 |
+
@pytest.mark.parametrize("propname", DatetimeArray._field_ops)
|
| 799 |
+
def test_int_properties(self, arr1d, propname):
|
| 800 |
+
dti = self.index_cls(arr1d)
|
| 801 |
+
arr = arr1d
|
| 802 |
+
|
| 803 |
+
result = getattr(arr, propname)
|
| 804 |
+
expected = np.array(getattr(dti, propname), dtype=result.dtype)
|
| 805 |
+
|
| 806 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 807 |
+
|
| 808 |
+
def test_take_fill_valid(self, arr1d, fixed_now_ts):
|
| 809 |
+
arr = arr1d
|
| 810 |
+
dti = self.index_cls(arr1d)
|
| 811 |
+
|
| 812 |
+
now = fixed_now_ts.tz_localize(dti.tz)
|
| 813 |
+
result = arr.take([-1, 1], allow_fill=True, fill_value=now)
|
| 814 |
+
assert result[0] == now
|
| 815 |
+
|
| 816 |
+
msg = f"value should be a '{arr1d._scalar_type.__name__}' or 'NaT'. Got"
|
| 817 |
+
with pytest.raises(TypeError, match=msg):
|
| 818 |
+
# fill_value Timedelta invalid
|
| 819 |
+
arr.take([-1, 1], allow_fill=True, fill_value=now - now)
|
| 820 |
+
|
| 821 |
+
with pytest.raises(TypeError, match=msg):
|
| 822 |
+
# fill_value Period invalid
|
| 823 |
+
arr.take([-1, 1], allow_fill=True, fill_value=Period("2014Q1"))
|
| 824 |
+
|
| 825 |
+
tz = None if dti.tz is not None else "US/Eastern"
|
| 826 |
+
now = fixed_now_ts.tz_localize(tz)
|
| 827 |
+
msg = "Cannot compare tz-naive and tz-aware datetime-like objects"
|
| 828 |
+
with pytest.raises(TypeError, match=msg):
|
| 829 |
+
# Timestamp with mismatched tz-awareness
|
| 830 |
+
arr.take([-1, 1], allow_fill=True, fill_value=now)
|
| 831 |
+
|
| 832 |
+
value = NaT._value
|
| 833 |
+
msg = f"value should be a '{arr1d._scalar_type.__name__}' or 'NaT'. Got"
|
| 834 |
+
with pytest.raises(TypeError, match=msg):
|
| 835 |
+
# require NaT, not iNaT, as it could be confused with an integer
|
| 836 |
+
arr.take([-1, 1], allow_fill=True, fill_value=value)
|
| 837 |
+
|
| 838 |
+
value = np.timedelta64("NaT", "ns")
|
| 839 |
+
with pytest.raises(TypeError, match=msg):
|
| 840 |
+
# require appropriate-dtype if we have a NA value
|
| 841 |
+
arr.take([-1, 1], allow_fill=True, fill_value=value)
|
| 842 |
+
|
| 843 |
+
if arr.tz is not None:
|
| 844 |
+
# GH#37356
|
| 845 |
+
# Assuming here that arr1d fixture does not include Australia/Melbourne
|
| 846 |
+
value = fixed_now_ts.tz_localize("Australia/Melbourne")
|
| 847 |
+
result = arr.take([-1, 1], allow_fill=True, fill_value=value)
|
| 848 |
+
|
| 849 |
+
expected = arr.take(
|
| 850 |
+
[-1, 1],
|
| 851 |
+
allow_fill=True,
|
| 852 |
+
fill_value=value.tz_convert(arr.dtype.tz),
|
| 853 |
+
)
|
| 854 |
+
tm.assert_equal(result, expected)
|
| 855 |
+
|
| 856 |
+
def test_concat_same_type_invalid(self, arr1d):
|
| 857 |
+
# different timezones
|
| 858 |
+
arr = arr1d
|
| 859 |
+
|
| 860 |
+
if arr.tz is None:
|
| 861 |
+
other = arr.tz_localize("UTC")
|
| 862 |
+
else:
|
| 863 |
+
other = arr.tz_localize(None)
|
| 864 |
+
|
| 865 |
+
with pytest.raises(ValueError, match="to_concat must have the same"):
|
| 866 |
+
arr._concat_same_type([arr, other])
|
| 867 |
+
|
| 868 |
+
def test_concat_same_type_different_freq(self, unit):
|
| 869 |
+
# we *can* concatenate DTI with different freqs.
|
| 870 |
+
a = pd.date_range("2000", periods=2, freq="D", tz="US/Central", unit=unit)._data
|
| 871 |
+
b = pd.date_range("2000", periods=2, freq="h", tz="US/Central", unit=unit)._data
|
| 872 |
+
result = DatetimeArray._concat_same_type([a, b])
|
| 873 |
+
expected = (
|
| 874 |
+
pd.to_datetime(
|
| 875 |
+
[
|
| 876 |
+
"2000-01-01 00:00:00",
|
| 877 |
+
"2000-01-02 00:00:00",
|
| 878 |
+
"2000-01-01 00:00:00",
|
| 879 |
+
"2000-01-01 01:00:00",
|
| 880 |
+
]
|
| 881 |
+
)
|
| 882 |
+
.tz_localize("US/Central")
|
| 883 |
+
.as_unit(unit)
|
| 884 |
+
._data
|
| 885 |
+
)
|
| 886 |
+
|
| 887 |
+
tm.assert_datetime_array_equal(result, expected)
|
| 888 |
+
|
| 889 |
+
def test_strftime(self, arr1d):
|
| 890 |
+
arr = arr1d
|
| 891 |
+
|
| 892 |
+
result = arr.strftime("%Y %b")
|
| 893 |
+
expected = np.array([ts.strftime("%Y %b") for ts in arr], dtype=object)
|
| 894 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 895 |
+
|
| 896 |
+
def test_strftime_nat(self):
|
| 897 |
+
# GH 29578
|
| 898 |
+
arr = DatetimeIndex(["2019-01-01", NaT])._data
|
| 899 |
+
|
| 900 |
+
result = arr.strftime("%Y-%m-%d")
|
| 901 |
+
expected = np.array(["2019-01-01", np.nan], dtype=object)
|
| 902 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 903 |
+
|
| 904 |
+
|
| 905 |
+
class TestTimedeltaArray(SharedTests):
|
| 906 |
+
index_cls = TimedeltaIndex
|
| 907 |
+
array_cls = TimedeltaArray
|
| 908 |
+
scalar_type = pd.Timedelta
|
| 909 |
+
example_dtype = "m8[ns]"
|
| 910 |
+
|
| 911 |
+
def test_from_tdi(self):
|
| 912 |
+
tdi = TimedeltaIndex(["1 Day", "3 Hours"])
|
| 913 |
+
arr = tdi._data
|
| 914 |
+
assert list(arr) == list(tdi)
|
| 915 |
+
|
| 916 |
+
# Check that Index.__new__ knows what to do with TimedeltaArray
|
| 917 |
+
tdi2 = pd.Index(arr)
|
| 918 |
+
assert isinstance(tdi2, TimedeltaIndex)
|
| 919 |
+
assert list(tdi2) == list(arr)
|
| 920 |
+
|
| 921 |
+
def test_astype_object(self):
|
| 922 |
+
tdi = TimedeltaIndex(["1 Day", "3 Hours"])
|
| 923 |
+
arr = tdi._data
|
| 924 |
+
asobj = arr.astype("O")
|
| 925 |
+
assert isinstance(asobj, np.ndarray)
|
| 926 |
+
assert asobj.dtype == "O"
|
| 927 |
+
assert list(asobj) == list(tdi)
|
| 928 |
+
|
| 929 |
+
def test_to_pytimedelta(self, timedelta_index):
|
| 930 |
+
tdi = timedelta_index
|
| 931 |
+
arr = tdi._data
|
| 932 |
+
|
| 933 |
+
expected = tdi.to_pytimedelta()
|
| 934 |
+
result = arr.to_pytimedelta()
|
| 935 |
+
|
| 936 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 937 |
+
|
| 938 |
+
def test_total_seconds(self, timedelta_index):
|
| 939 |
+
tdi = timedelta_index
|
| 940 |
+
arr = tdi._data
|
| 941 |
+
|
| 942 |
+
expected = tdi.total_seconds()
|
| 943 |
+
result = arr.total_seconds()
|
| 944 |
+
|
| 945 |
+
tm.assert_numpy_array_equal(result, expected.values)
|
| 946 |
+
|
| 947 |
+
@pytest.mark.parametrize("propname", TimedeltaArray._field_ops)
|
| 948 |
+
def test_int_properties(self, timedelta_index, propname):
|
| 949 |
+
tdi = timedelta_index
|
| 950 |
+
arr = tdi._data
|
| 951 |
+
|
| 952 |
+
result = getattr(arr, propname)
|
| 953 |
+
expected = np.array(getattr(tdi, propname), dtype=result.dtype)
|
| 954 |
+
|
| 955 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 956 |
+
|
| 957 |
+
def test_array_interface(self, timedelta_index):
|
| 958 |
+
arr = timedelta_index._data
|
| 959 |
+
copy_false = None if np_version_gt2 else False
|
| 960 |
+
|
| 961 |
+
# default asarray gives the same underlying data
|
| 962 |
+
result = np.asarray(arr)
|
| 963 |
+
expected = arr._ndarray
|
| 964 |
+
assert result is expected
|
| 965 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 966 |
+
result = np.array(arr, copy=copy_false)
|
| 967 |
+
assert result is expected
|
| 968 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 969 |
+
|
| 970 |
+
# specifying m8[ns] gives the same result as default
|
| 971 |
+
result = np.asarray(arr, dtype="timedelta64[ns]")
|
| 972 |
+
expected = arr._ndarray
|
| 973 |
+
assert result is expected
|
| 974 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 975 |
+
result = np.array(arr, dtype="timedelta64[ns]", copy=copy_false)
|
| 976 |
+
assert result is expected
|
| 977 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 978 |
+
result = np.array(arr, dtype="timedelta64[ns]")
|
| 979 |
+
if not np_version_gt2:
|
| 980 |
+
# TODO: GH 57739
|
| 981 |
+
assert result is not expected
|
| 982 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 983 |
+
|
| 984 |
+
# to object dtype
|
| 985 |
+
result = np.asarray(arr, dtype=object)
|
| 986 |
+
expected = np.array(list(arr), dtype=object)
|
| 987 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 988 |
+
|
| 989 |
+
# to other dtype always copies
|
| 990 |
+
result = np.asarray(arr, dtype="int64")
|
| 991 |
+
assert result is not arr.asi8
|
| 992 |
+
assert not np.may_share_memory(arr, result)
|
| 993 |
+
expected = arr.asi8.copy()
|
| 994 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 995 |
+
|
| 996 |
+
# other dtypes handled by numpy
|
| 997 |
+
for dtype in ["float64", str]:
|
| 998 |
+
result = np.asarray(arr, dtype=dtype)
|
| 999 |
+
expected = np.asarray(arr).astype(dtype)
|
| 1000 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 1001 |
+
|
| 1002 |
+
def test_take_fill_valid(self, timedelta_index, fixed_now_ts):
|
| 1003 |
+
tdi = timedelta_index
|
| 1004 |
+
arr = tdi._data
|
| 1005 |
+
|
| 1006 |
+
td1 = pd.Timedelta(days=1)
|
| 1007 |
+
result = arr.take([-1, 1], allow_fill=True, fill_value=td1)
|
| 1008 |
+
assert result[0] == td1
|
| 1009 |
+
|
| 1010 |
+
value = fixed_now_ts
|
| 1011 |
+
msg = f"value should be a '{arr._scalar_type.__name__}' or 'NaT'. Got"
|
| 1012 |
+
with pytest.raises(TypeError, match=msg):
|
| 1013 |
+
# fill_value Timestamp invalid
|
| 1014 |
+
arr.take([0, 1], allow_fill=True, fill_value=value)
|
| 1015 |
+
|
| 1016 |
+
value = fixed_now_ts.to_period("D")
|
| 1017 |
+
with pytest.raises(TypeError, match=msg):
|
| 1018 |
+
# fill_value Period invalid
|
| 1019 |
+
arr.take([0, 1], allow_fill=True, fill_value=value)
|
| 1020 |
+
|
| 1021 |
+
value = np.datetime64("NaT", "ns")
|
| 1022 |
+
with pytest.raises(TypeError, match=msg):
|
| 1023 |
+
# require appropriate-dtype if we have a NA value
|
| 1024 |
+
arr.take([-1, 1], allow_fill=True, fill_value=value)
|
| 1025 |
+
|
| 1026 |
+
|
| 1027 |
+
@pytest.mark.filterwarnings(r"ignore:Period with BDay freq is deprecated:FutureWarning")
|
| 1028 |
+
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
|
| 1029 |
+
class TestPeriodArray(SharedTests):
|
| 1030 |
+
index_cls = PeriodIndex
|
| 1031 |
+
array_cls = PeriodArray
|
| 1032 |
+
scalar_type = Period
|
| 1033 |
+
example_dtype = PeriodIndex([], freq="W").dtype
|
| 1034 |
+
|
| 1035 |
+
@pytest.fixture
|
| 1036 |
+
def arr1d(self, period_index):
|
| 1037 |
+
"""
|
| 1038 |
+
Fixture returning DatetimeArray from parametrized PeriodIndex objects
|
| 1039 |
+
"""
|
| 1040 |
+
return period_index._data
|
| 1041 |
+
|
| 1042 |
+
def test_from_pi(self, arr1d):
|
| 1043 |
+
pi = self.index_cls(arr1d)
|
| 1044 |
+
arr = arr1d
|
| 1045 |
+
assert list(arr) == list(pi)
|
| 1046 |
+
|
| 1047 |
+
# Check that Index.__new__ knows what to do with PeriodArray
|
| 1048 |
+
pi2 = pd.Index(arr)
|
| 1049 |
+
assert isinstance(pi2, PeriodIndex)
|
| 1050 |
+
assert list(pi2) == list(arr)
|
| 1051 |
+
|
| 1052 |
+
def test_astype_object(self, arr1d):
|
| 1053 |
+
pi = self.index_cls(arr1d)
|
| 1054 |
+
arr = arr1d
|
| 1055 |
+
asobj = arr.astype("O")
|
| 1056 |
+
assert isinstance(asobj, np.ndarray)
|
| 1057 |
+
assert asobj.dtype == "O"
|
| 1058 |
+
assert list(asobj) == list(pi)
|
| 1059 |
+
|
| 1060 |
+
def test_take_fill_valid(self, arr1d):
|
| 1061 |
+
arr = arr1d
|
| 1062 |
+
|
| 1063 |
+
value = NaT._value
|
| 1064 |
+
msg = f"value should be a '{arr1d._scalar_type.__name__}' or 'NaT'. Got"
|
| 1065 |
+
with pytest.raises(TypeError, match=msg):
|
| 1066 |
+
# require NaT, not iNaT, as it could be confused with an integer
|
| 1067 |
+
arr.take([-1, 1], allow_fill=True, fill_value=value)
|
| 1068 |
+
|
| 1069 |
+
value = np.timedelta64("NaT", "ns")
|
| 1070 |
+
with pytest.raises(TypeError, match=msg):
|
| 1071 |
+
# require appropriate-dtype if we have a NA value
|
| 1072 |
+
arr.take([-1, 1], allow_fill=True, fill_value=value)
|
| 1073 |
+
|
| 1074 |
+
@pytest.mark.parametrize("how", ["S", "E"])
|
| 1075 |
+
def test_to_timestamp(self, how, arr1d):
|
| 1076 |
+
pi = self.index_cls(arr1d)
|
| 1077 |
+
arr = arr1d
|
| 1078 |
+
|
| 1079 |
+
expected = DatetimeIndex(pi.to_timestamp(how=how))._data
|
| 1080 |
+
result = arr.to_timestamp(how=how)
|
| 1081 |
+
assert isinstance(result, DatetimeArray)
|
| 1082 |
+
|
| 1083 |
+
tm.assert_equal(result, expected)
|
| 1084 |
+
|
| 1085 |
+
def test_to_timestamp_roundtrip_bday(self):
|
| 1086 |
+
# Case where infer_freq inside would choose "D" instead of "B"
|
| 1087 |
+
dta = pd.date_range("2021-10-18", periods=3, freq="B")._data
|
| 1088 |
+
parr = dta.to_period()
|
| 1089 |
+
result = parr.to_timestamp()
|
| 1090 |
+
assert result.freq == "B"
|
| 1091 |
+
tm.assert_extension_array_equal(result, dta)
|
| 1092 |
+
|
| 1093 |
+
dta2 = dta[::2]
|
| 1094 |
+
parr2 = dta2.to_period()
|
| 1095 |
+
result2 = parr2.to_timestamp()
|
| 1096 |
+
assert result2.freq == "2B"
|
| 1097 |
+
tm.assert_extension_array_equal(result2, dta2)
|
| 1098 |
+
|
| 1099 |
+
parr3 = dta.to_period("2B")
|
| 1100 |
+
result3 = parr3.to_timestamp()
|
| 1101 |
+
assert result3.freq == "B"
|
| 1102 |
+
tm.assert_extension_array_equal(result3, dta)
|
| 1103 |
+
|
| 1104 |
+
def test_to_timestamp_out_of_bounds(self):
|
| 1105 |
+
# GH#19643 previously overflowed silently
|
| 1106 |
+
pi = pd.period_range("1500", freq="Y", periods=3)
|
| 1107 |
+
msg = "Out of bounds nanosecond timestamp: 1500-01-01 00:00:00"
|
| 1108 |
+
with pytest.raises(OutOfBoundsDatetime, match=msg):
|
| 1109 |
+
pi.to_timestamp()
|
| 1110 |
+
|
| 1111 |
+
with pytest.raises(OutOfBoundsDatetime, match=msg):
|
| 1112 |
+
pi._data.to_timestamp()
|
| 1113 |
+
|
| 1114 |
+
@pytest.mark.parametrize("propname", PeriodArray._bool_ops)
|
| 1115 |
+
def test_bool_properties(self, arr1d, propname):
|
| 1116 |
+
# in this case _bool_ops is just `is_leap_year`
|
| 1117 |
+
pi = self.index_cls(arr1d)
|
| 1118 |
+
arr = arr1d
|
| 1119 |
+
|
| 1120 |
+
result = getattr(arr, propname)
|
| 1121 |
+
expected = np.array(getattr(pi, propname))
|
| 1122 |
+
|
| 1123 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 1124 |
+
|
| 1125 |
+
@pytest.mark.parametrize("propname", PeriodArray._field_ops)
|
| 1126 |
+
def test_int_properties(self, arr1d, propname):
|
| 1127 |
+
pi = self.index_cls(arr1d)
|
| 1128 |
+
arr = arr1d
|
| 1129 |
+
|
| 1130 |
+
result = getattr(arr, propname)
|
| 1131 |
+
expected = np.array(getattr(pi, propname))
|
| 1132 |
+
|
| 1133 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 1134 |
+
|
| 1135 |
+
def test_array_interface(self, arr1d):
|
| 1136 |
+
arr = arr1d
|
| 1137 |
+
|
| 1138 |
+
# default asarray gives objects
|
| 1139 |
+
result = np.asarray(arr)
|
| 1140 |
+
expected = np.array(list(arr), dtype=object)
|
| 1141 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 1142 |
+
|
| 1143 |
+
# to object dtype (same as default)
|
| 1144 |
+
result = np.asarray(arr, dtype=object)
|
| 1145 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 1146 |
+
|
| 1147 |
+
result = np.asarray(arr, dtype="int64")
|
| 1148 |
+
tm.assert_numpy_array_equal(result, arr.asi8)
|
| 1149 |
+
|
| 1150 |
+
# to other dtypes
|
| 1151 |
+
msg = r"float\(\) argument must be a string or a( real)? number, not 'Period'"
|
| 1152 |
+
with pytest.raises(TypeError, match=msg):
|
| 1153 |
+
np.asarray(arr, dtype="float64")
|
| 1154 |
+
|
| 1155 |
+
result = np.asarray(arr, dtype="S20")
|
| 1156 |
+
expected = np.asarray(arr).astype("S20")
|
| 1157 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 1158 |
+
|
| 1159 |
+
def test_strftime(self, arr1d):
|
| 1160 |
+
arr = arr1d
|
| 1161 |
+
|
| 1162 |
+
result = arr.strftime("%Y")
|
| 1163 |
+
expected = np.array([per.strftime("%Y") for per in arr], dtype=object)
|
| 1164 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 1165 |
+
|
| 1166 |
+
def test_strftime_nat(self):
|
| 1167 |
+
# GH 29578
|
| 1168 |
+
arr = PeriodArray(PeriodIndex(["2019-01-01", NaT], dtype="period[D]"))
|
| 1169 |
+
|
| 1170 |
+
result = arr.strftime("%Y-%m-%d")
|
| 1171 |
+
expected = np.array(["2019-01-01", np.nan], dtype=object)
|
| 1172 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 1173 |
+
|
| 1174 |
+
|
| 1175 |
+
@pytest.mark.parametrize(
|
| 1176 |
+
"arr,casting_nats",
|
| 1177 |
+
[
|
| 1178 |
+
(
|
| 1179 |
+
TimedeltaIndex(["1 Day", "3 Hours", "NaT"])._data,
|
| 1180 |
+
(NaT, np.timedelta64("NaT", "ns")),
|
| 1181 |
+
),
|
| 1182 |
+
(
|
| 1183 |
+
pd.date_range("2000-01-01", periods=3, freq="D")._data,
|
| 1184 |
+
(NaT, np.datetime64("NaT", "ns")),
|
| 1185 |
+
),
|
| 1186 |
+
(pd.period_range("2000-01-01", periods=3, freq="D")._data, (NaT,)),
|
| 1187 |
+
],
|
| 1188 |
+
ids=lambda x: type(x).__name__,
|
| 1189 |
+
)
|
| 1190 |
+
def test_casting_nat_setitem_array(arr, casting_nats):
|
| 1191 |
+
expected = type(arr)._from_sequence([NaT, arr[1], arr[2]], dtype=arr.dtype)
|
| 1192 |
+
|
| 1193 |
+
for nat in casting_nats:
|
| 1194 |
+
arr = arr.copy()
|
| 1195 |
+
arr[0] = nat
|
| 1196 |
+
tm.assert_equal(arr, expected)
|
| 1197 |
+
|
| 1198 |
+
|
| 1199 |
+
@pytest.mark.parametrize(
|
| 1200 |
+
"arr,non_casting_nats",
|
| 1201 |
+
[
|
| 1202 |
+
(
|
| 1203 |
+
TimedeltaIndex(["1 Day", "3 Hours", "NaT"])._data,
|
| 1204 |
+
(np.datetime64("NaT", "ns"), NaT._value),
|
| 1205 |
+
),
|
| 1206 |
+
(
|
| 1207 |
+
pd.date_range("2000-01-01", periods=3, freq="D")._data,
|
| 1208 |
+
(np.timedelta64("NaT", "ns"), NaT._value),
|
| 1209 |
+
),
|
| 1210 |
+
(
|
| 1211 |
+
pd.period_range("2000-01-01", periods=3, freq="D")._data,
|
| 1212 |
+
(np.datetime64("NaT", "ns"), np.timedelta64("NaT", "ns"), NaT._value),
|
| 1213 |
+
),
|
| 1214 |
+
],
|
| 1215 |
+
ids=lambda x: type(x).__name__,
|
| 1216 |
+
)
|
| 1217 |
+
def test_invalid_nat_setitem_array(arr, non_casting_nats):
|
| 1218 |
+
msg = (
|
| 1219 |
+
"value should be a '(Timestamp|Timedelta|Period)', 'NaT', or array of those. "
|
| 1220 |
+
"Got '(timedelta64|datetime64|int)' instead."
|
| 1221 |
+
)
|
| 1222 |
+
|
| 1223 |
+
for nat in non_casting_nats:
|
| 1224 |
+
with pytest.raises(TypeError, match=msg):
|
| 1225 |
+
arr[0] = nat
|
| 1226 |
+
|
| 1227 |
+
|
| 1228 |
+
@pytest.mark.parametrize(
|
| 1229 |
+
"arr",
|
| 1230 |
+
[
|
| 1231 |
+
pd.date_range("2000", periods=4).array,
|
| 1232 |
+
pd.timedelta_range("2000", periods=4).array,
|
| 1233 |
+
],
|
| 1234 |
+
)
|
| 1235 |
+
def test_to_numpy_extra(arr):
|
| 1236 |
+
arr[0] = NaT
|
| 1237 |
+
original = arr.copy()
|
| 1238 |
+
|
| 1239 |
+
result = arr.to_numpy()
|
| 1240 |
+
assert np.isnan(result[0])
|
| 1241 |
+
|
| 1242 |
+
result = arr.to_numpy(dtype="int64")
|
| 1243 |
+
assert result[0] == -9223372036854775808
|
| 1244 |
+
|
| 1245 |
+
result = arr.to_numpy(dtype="int64", na_value=0)
|
| 1246 |
+
assert result[0] == 0
|
| 1247 |
+
|
| 1248 |
+
result = arr.to_numpy(na_value=arr[1].to_numpy())
|
| 1249 |
+
assert result[0] == result[1]
|
| 1250 |
+
|
| 1251 |
+
result = arr.to_numpy(na_value=arr[1].to_numpy(copy=False))
|
| 1252 |
+
assert result[0] == result[1]
|
| 1253 |
+
|
| 1254 |
+
tm.assert_equal(arr, original)
|
| 1255 |
+
|
| 1256 |
+
|
| 1257 |
+
@pytest.mark.parametrize("as_index", [True, False])
|
| 1258 |
+
@pytest.mark.parametrize(
|
| 1259 |
+
"values",
|
| 1260 |
+
[
|
| 1261 |
+
pd.to_datetime(["2020-01-01", "2020-02-01"]),
|
| 1262 |
+
pd.to_timedelta([1, 2], unit="D"),
|
| 1263 |
+
PeriodIndex(["2020-01-01", "2020-02-01"], freq="D"),
|
| 1264 |
+
],
|
| 1265 |
+
)
|
| 1266 |
+
@pytest.mark.parametrize(
|
| 1267 |
+
"klass",
|
| 1268 |
+
[
|
| 1269 |
+
list,
|
| 1270 |
+
np.array,
|
| 1271 |
+
pd.array,
|
| 1272 |
+
pd.Series,
|
| 1273 |
+
pd.Index,
|
| 1274 |
+
pd.Categorical,
|
| 1275 |
+
pd.CategoricalIndex,
|
| 1276 |
+
],
|
| 1277 |
+
)
|
| 1278 |
+
def test_searchsorted_datetimelike_with_listlike(values, klass, as_index):
|
| 1279 |
+
# https://github.com/pandas-dev/pandas/issues/32762
|
| 1280 |
+
if not as_index:
|
| 1281 |
+
values = values._data
|
| 1282 |
+
|
| 1283 |
+
result = values.searchsorted(klass(values))
|
| 1284 |
+
expected = np.array([0, 1], dtype=result.dtype)
|
| 1285 |
+
|
| 1286 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 1287 |
+
|
| 1288 |
+
|
| 1289 |
+
@pytest.mark.parametrize(
|
| 1290 |
+
"values",
|
| 1291 |
+
[
|
| 1292 |
+
pd.to_datetime(["2020-01-01", "2020-02-01"]),
|
| 1293 |
+
pd.to_timedelta([1, 2], unit="D"),
|
| 1294 |
+
PeriodIndex(["2020-01-01", "2020-02-01"], freq="D"),
|
| 1295 |
+
],
|
| 1296 |
+
)
|
| 1297 |
+
@pytest.mark.parametrize(
|
| 1298 |
+
"arg", [[1, 2], ["a", "b"], [Timestamp("2020-01-01", tz="Europe/London")] * 2]
|
| 1299 |
+
)
|
| 1300 |
+
def test_searchsorted_datetimelike_with_listlike_invalid_dtype(values, arg):
|
| 1301 |
+
# https://github.com/pandas-dev/pandas/issues/32762
|
| 1302 |
+
msg = "[Unexpected type|Cannot compare]"
|
| 1303 |
+
with pytest.raises(TypeError, match=msg):
|
| 1304 |
+
values.searchsorted(arg)
|
| 1305 |
+
|
| 1306 |
+
|
| 1307 |
+
@pytest.mark.parametrize("klass", [list, tuple, np.array, pd.Series])
|
| 1308 |
+
def test_period_index_construction_from_strings(klass):
|
| 1309 |
+
# https://github.com/pandas-dev/pandas/issues/26109
|
| 1310 |
+
strings = ["2020Q1", "2020Q2"] * 2
|
| 1311 |
+
data = klass(strings)
|
| 1312 |
+
result = PeriodIndex(data, freq="Q")
|
| 1313 |
+
expected = PeriodIndex([Period(s) for s in strings])
|
| 1314 |
+
tm.assert_index_equal(result, expected)
|
| 1315 |
+
|
| 1316 |
+
|
| 1317 |
+
@pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"])
|
| 1318 |
+
def test_from_pandas_array(dtype):
|
| 1319 |
+
# GH#24615
|
| 1320 |
+
data = np.array([1, 2, 3], dtype=dtype)
|
| 1321 |
+
arr = NumpyExtensionArray(data)
|
| 1322 |
+
|
| 1323 |
+
cls = {"M8[ns]": DatetimeArray, "m8[ns]": TimedeltaArray}[dtype]
|
| 1324 |
+
|
| 1325 |
+
depr_msg = f"{cls.__name__}.__init__ is deprecated"
|
| 1326 |
+
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
|
| 1327 |
+
result = cls(arr)
|
| 1328 |
+
expected = cls(data)
|
| 1329 |
+
tm.assert_extension_array_equal(result, expected)
|
| 1330 |
+
|
| 1331 |
+
result = cls._from_sequence(arr, dtype=dtype)
|
| 1332 |
+
expected = cls._from_sequence(data, dtype=dtype)
|
| 1333 |
+
tm.assert_extension_array_equal(result, expected)
|
| 1334 |
+
|
| 1335 |
+
func = {"M8[ns]": pd.to_datetime, "m8[ns]": pd.to_timedelta}[dtype]
|
| 1336 |
+
result = func(arr).array
|
| 1337 |
+
expected = func(data).array
|
| 1338 |
+
tm.assert_equal(result, expected)
|
| 1339 |
+
|
| 1340 |
+
# Let's check the Indexes while we're here
|
| 1341 |
+
idx_cls = {"M8[ns]": DatetimeIndex, "m8[ns]": TimedeltaIndex}[dtype]
|
| 1342 |
+
result = idx_cls(arr)
|
| 1343 |
+
expected = idx_cls(data)
|
| 1344 |
+
tm.assert_index_equal(result, expected)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/arrays/test_datetimes.py
ADDED
|
@@ -0,0 +1,840 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Tests for DatetimeArray
|
| 3 |
+
"""
|
| 4 |
+
from __future__ import annotations
|
| 5 |
+
|
| 6 |
+
from datetime import timedelta
|
| 7 |
+
import operator
|
| 8 |
+
|
| 9 |
+
try:
|
| 10 |
+
from zoneinfo import ZoneInfo
|
| 11 |
+
except ImportError:
|
| 12 |
+
# Cannot assign to a type
|
| 13 |
+
ZoneInfo = None # type: ignore[misc, assignment]
|
| 14 |
+
|
| 15 |
+
import numpy as np
|
| 16 |
+
import pytest
|
| 17 |
+
|
| 18 |
+
from pandas._libs.tslibs import tz_compare
|
| 19 |
+
|
| 20 |
+
from pandas.core.dtypes.dtypes import DatetimeTZDtype
|
| 21 |
+
|
| 22 |
+
import pandas as pd
|
| 23 |
+
import pandas._testing as tm
|
| 24 |
+
from pandas.core.arrays import (
|
| 25 |
+
DatetimeArray,
|
| 26 |
+
TimedeltaArray,
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
class TestNonNano:
|
| 31 |
+
@pytest.fixture(params=["s", "ms", "us"])
|
| 32 |
+
def unit(self, request):
|
| 33 |
+
"""Fixture returning parametrized time units"""
|
| 34 |
+
return request.param
|
| 35 |
+
|
| 36 |
+
@pytest.fixture
|
| 37 |
+
def dtype(self, unit, tz_naive_fixture):
|
| 38 |
+
tz = tz_naive_fixture
|
| 39 |
+
if tz is None:
|
| 40 |
+
return np.dtype(f"datetime64[{unit}]")
|
| 41 |
+
else:
|
| 42 |
+
return DatetimeTZDtype(unit=unit, tz=tz)
|
| 43 |
+
|
| 44 |
+
@pytest.fixture
|
| 45 |
+
def dta_dti(self, unit, dtype):
|
| 46 |
+
tz = getattr(dtype, "tz", None)
|
| 47 |
+
|
| 48 |
+
dti = pd.date_range("2016-01-01", periods=55, freq="D", tz=tz)
|
| 49 |
+
if tz is None:
|
| 50 |
+
arr = np.asarray(dti).astype(f"M8[{unit}]")
|
| 51 |
+
else:
|
| 52 |
+
arr = np.asarray(dti.tz_convert("UTC").tz_localize(None)).astype(
|
| 53 |
+
f"M8[{unit}]"
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
dta = DatetimeArray._simple_new(arr, dtype=dtype)
|
| 57 |
+
return dta, dti
|
| 58 |
+
|
| 59 |
+
@pytest.fixture
|
| 60 |
+
def dta(self, dta_dti):
|
| 61 |
+
dta, dti = dta_dti
|
| 62 |
+
return dta
|
| 63 |
+
|
| 64 |
+
def test_non_nano(self, unit, dtype):
|
| 65 |
+
arr = np.arange(5, dtype=np.int64).view(f"M8[{unit}]")
|
| 66 |
+
dta = DatetimeArray._simple_new(arr, dtype=dtype)
|
| 67 |
+
|
| 68 |
+
assert dta.dtype == dtype
|
| 69 |
+
assert dta[0].unit == unit
|
| 70 |
+
assert tz_compare(dta.tz, dta[0].tz)
|
| 71 |
+
assert (dta[0] == dta[:1]).all()
|
| 72 |
+
|
| 73 |
+
@pytest.mark.parametrize(
|
| 74 |
+
"field", DatetimeArray._field_ops + DatetimeArray._bool_ops
|
| 75 |
+
)
|
| 76 |
+
def test_fields(self, unit, field, dtype, dta_dti):
|
| 77 |
+
dta, dti = dta_dti
|
| 78 |
+
|
| 79 |
+
assert (dti == dta).all()
|
| 80 |
+
|
| 81 |
+
res = getattr(dta, field)
|
| 82 |
+
expected = getattr(dti._data, field)
|
| 83 |
+
tm.assert_numpy_array_equal(res, expected)
|
| 84 |
+
|
| 85 |
+
def test_normalize(self, unit):
|
| 86 |
+
dti = pd.date_range("2016-01-01 06:00:00", periods=55, freq="D")
|
| 87 |
+
arr = np.asarray(dti).astype(f"M8[{unit}]")
|
| 88 |
+
|
| 89 |
+
dta = DatetimeArray._simple_new(arr, dtype=arr.dtype)
|
| 90 |
+
|
| 91 |
+
assert not dta.is_normalized
|
| 92 |
+
|
| 93 |
+
# TODO: simplify once we can just .astype to other unit
|
| 94 |
+
exp = np.asarray(dti.normalize()).astype(f"M8[{unit}]")
|
| 95 |
+
expected = DatetimeArray._simple_new(exp, dtype=exp.dtype)
|
| 96 |
+
|
| 97 |
+
res = dta.normalize()
|
| 98 |
+
tm.assert_extension_array_equal(res, expected)
|
| 99 |
+
|
| 100 |
+
def test_simple_new_requires_match(self, unit):
|
| 101 |
+
arr = np.arange(5, dtype=np.int64).view(f"M8[{unit}]")
|
| 102 |
+
dtype = DatetimeTZDtype(unit, "UTC")
|
| 103 |
+
|
| 104 |
+
dta = DatetimeArray._simple_new(arr, dtype=dtype)
|
| 105 |
+
assert dta.dtype == dtype
|
| 106 |
+
|
| 107 |
+
wrong = DatetimeTZDtype("ns", "UTC")
|
| 108 |
+
with pytest.raises(AssertionError, match=""):
|
| 109 |
+
DatetimeArray._simple_new(arr, dtype=wrong)
|
| 110 |
+
|
| 111 |
+
def test_std_non_nano(self, unit):
|
| 112 |
+
dti = pd.date_range("2016-01-01", periods=55, freq="D")
|
| 113 |
+
arr = np.asarray(dti).astype(f"M8[{unit}]")
|
| 114 |
+
|
| 115 |
+
dta = DatetimeArray._simple_new(arr, dtype=arr.dtype)
|
| 116 |
+
|
| 117 |
+
# we should match the nano-reso std, but floored to our reso.
|
| 118 |
+
res = dta.std()
|
| 119 |
+
assert res._creso == dta._creso
|
| 120 |
+
assert res == dti.std().floor(unit)
|
| 121 |
+
|
| 122 |
+
@pytest.mark.filterwarnings("ignore:Converting to PeriodArray.*:UserWarning")
|
| 123 |
+
def test_to_period(self, dta_dti):
|
| 124 |
+
dta, dti = dta_dti
|
| 125 |
+
result = dta.to_period("D")
|
| 126 |
+
expected = dti._data.to_period("D")
|
| 127 |
+
|
| 128 |
+
tm.assert_extension_array_equal(result, expected)
|
| 129 |
+
|
| 130 |
+
def test_iter(self, dta):
|
| 131 |
+
res = next(iter(dta))
|
| 132 |
+
expected = dta[0]
|
| 133 |
+
|
| 134 |
+
assert type(res) is pd.Timestamp
|
| 135 |
+
assert res._value == expected._value
|
| 136 |
+
assert res._creso == expected._creso
|
| 137 |
+
assert res == expected
|
| 138 |
+
|
| 139 |
+
def test_astype_object(self, dta):
|
| 140 |
+
result = dta.astype(object)
|
| 141 |
+
assert all(x._creso == dta._creso for x in result)
|
| 142 |
+
assert all(x == y for x, y in zip(result, dta))
|
| 143 |
+
|
| 144 |
+
def test_to_pydatetime(self, dta_dti):
|
| 145 |
+
dta, dti = dta_dti
|
| 146 |
+
|
| 147 |
+
result = dta.to_pydatetime()
|
| 148 |
+
expected = dti.to_pydatetime()
|
| 149 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 150 |
+
|
| 151 |
+
@pytest.mark.parametrize("meth", ["time", "timetz", "date"])
|
| 152 |
+
def test_time_date(self, dta_dti, meth):
|
| 153 |
+
dta, dti = dta_dti
|
| 154 |
+
|
| 155 |
+
result = getattr(dta, meth)
|
| 156 |
+
expected = getattr(dti, meth)
|
| 157 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 158 |
+
|
| 159 |
+
def test_format_native_types(self, unit, dtype, dta_dti):
|
| 160 |
+
# In this case we should get the same formatted values with our nano
|
| 161 |
+
# version dti._data as we do with the non-nano dta
|
| 162 |
+
dta, dti = dta_dti
|
| 163 |
+
|
| 164 |
+
res = dta._format_native_types()
|
| 165 |
+
exp = dti._data._format_native_types()
|
| 166 |
+
tm.assert_numpy_array_equal(res, exp)
|
| 167 |
+
|
| 168 |
+
def test_repr(self, dta_dti, unit):
|
| 169 |
+
dta, dti = dta_dti
|
| 170 |
+
|
| 171 |
+
assert repr(dta) == repr(dti._data).replace("[ns", f"[{unit}")
|
| 172 |
+
|
| 173 |
+
# TODO: tests with td64
|
| 174 |
+
def test_compare_mismatched_resolutions(self, comparison_op):
|
| 175 |
+
# comparison that numpy gets wrong bc of silent overflows
|
| 176 |
+
op = comparison_op
|
| 177 |
+
|
| 178 |
+
iinfo = np.iinfo(np.int64)
|
| 179 |
+
vals = np.array([iinfo.min, iinfo.min + 1, iinfo.max], dtype=np.int64)
|
| 180 |
+
|
| 181 |
+
# Construct so that arr2[1] < arr[1] < arr[2] < arr2[2]
|
| 182 |
+
arr = np.array(vals).view("M8[ns]")
|
| 183 |
+
arr2 = arr.view("M8[s]")
|
| 184 |
+
|
| 185 |
+
left = DatetimeArray._simple_new(arr, dtype=arr.dtype)
|
| 186 |
+
right = DatetimeArray._simple_new(arr2, dtype=arr2.dtype)
|
| 187 |
+
|
| 188 |
+
if comparison_op is operator.eq:
|
| 189 |
+
expected = np.array([False, False, False])
|
| 190 |
+
elif comparison_op is operator.ne:
|
| 191 |
+
expected = np.array([True, True, True])
|
| 192 |
+
elif comparison_op in [operator.lt, operator.le]:
|
| 193 |
+
expected = np.array([False, False, True])
|
| 194 |
+
else:
|
| 195 |
+
expected = np.array([False, True, False])
|
| 196 |
+
|
| 197 |
+
result = op(left, right)
|
| 198 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 199 |
+
|
| 200 |
+
result = op(left[1], right)
|
| 201 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 202 |
+
|
| 203 |
+
if op not in [operator.eq, operator.ne]:
|
| 204 |
+
# check that numpy still gets this wrong; if it is fixed we may be
|
| 205 |
+
# able to remove compare_mismatched_resolutions
|
| 206 |
+
np_res = op(left._ndarray, right._ndarray)
|
| 207 |
+
tm.assert_numpy_array_equal(np_res[1:], ~expected[1:])
|
| 208 |
+
|
| 209 |
+
def test_add_mismatched_reso_doesnt_downcast(self):
|
| 210 |
+
# https://github.com/pandas-dev/pandas/pull/48748#issuecomment-1260181008
|
| 211 |
+
td = pd.Timedelta(microseconds=1)
|
| 212 |
+
dti = pd.date_range("2016-01-01", periods=3) - td
|
| 213 |
+
dta = dti._data.as_unit("us")
|
| 214 |
+
|
| 215 |
+
res = dta + td.as_unit("us")
|
| 216 |
+
# even though the result is an even number of days
|
| 217 |
+
# (so we _could_ downcast to unit="s"), we do not.
|
| 218 |
+
assert res.unit == "us"
|
| 219 |
+
|
| 220 |
+
@pytest.mark.parametrize(
|
| 221 |
+
"scalar",
|
| 222 |
+
[
|
| 223 |
+
timedelta(hours=2),
|
| 224 |
+
pd.Timedelta(hours=2),
|
| 225 |
+
np.timedelta64(2, "h"),
|
| 226 |
+
np.timedelta64(2 * 3600 * 1000, "ms"),
|
| 227 |
+
pd.offsets.Minute(120),
|
| 228 |
+
pd.offsets.Hour(2),
|
| 229 |
+
],
|
| 230 |
+
)
|
| 231 |
+
def test_add_timedeltalike_scalar_mismatched_reso(self, dta_dti, scalar):
|
| 232 |
+
dta, dti = dta_dti
|
| 233 |
+
|
| 234 |
+
td = pd.Timedelta(scalar)
|
| 235 |
+
exp_unit = tm.get_finest_unit(dta.unit, td.unit)
|
| 236 |
+
|
| 237 |
+
expected = (dti + td)._data.as_unit(exp_unit)
|
| 238 |
+
result = dta + scalar
|
| 239 |
+
tm.assert_extension_array_equal(result, expected)
|
| 240 |
+
|
| 241 |
+
result = scalar + dta
|
| 242 |
+
tm.assert_extension_array_equal(result, expected)
|
| 243 |
+
|
| 244 |
+
expected = (dti - td)._data.as_unit(exp_unit)
|
| 245 |
+
result = dta - scalar
|
| 246 |
+
tm.assert_extension_array_equal(result, expected)
|
| 247 |
+
|
| 248 |
+
def test_sub_datetimelike_scalar_mismatch(self):
|
| 249 |
+
dti = pd.date_range("2016-01-01", periods=3)
|
| 250 |
+
dta = dti._data.as_unit("us")
|
| 251 |
+
|
| 252 |
+
ts = dta[0].as_unit("s")
|
| 253 |
+
|
| 254 |
+
result = dta - ts
|
| 255 |
+
expected = (dti - dti[0])._data.as_unit("us")
|
| 256 |
+
assert result.dtype == "m8[us]"
|
| 257 |
+
tm.assert_extension_array_equal(result, expected)
|
| 258 |
+
|
| 259 |
+
def test_sub_datetime64_reso_mismatch(self):
|
| 260 |
+
dti = pd.date_range("2016-01-01", periods=3)
|
| 261 |
+
left = dti._data.as_unit("s")
|
| 262 |
+
right = left.as_unit("ms")
|
| 263 |
+
|
| 264 |
+
result = left - right
|
| 265 |
+
exp_values = np.array([0, 0, 0], dtype="m8[ms]")
|
| 266 |
+
expected = TimedeltaArray._simple_new(
|
| 267 |
+
exp_values,
|
| 268 |
+
dtype=exp_values.dtype,
|
| 269 |
+
)
|
| 270 |
+
tm.assert_extension_array_equal(result, expected)
|
| 271 |
+
result2 = right - left
|
| 272 |
+
tm.assert_extension_array_equal(result2, expected)
|
| 273 |
+
|
| 274 |
+
|
| 275 |
+
class TestDatetimeArrayComparisons:
|
| 276 |
+
# TODO: merge this into tests/arithmetic/test_datetime64 once it is
|
| 277 |
+
# sufficiently robust
|
| 278 |
+
|
| 279 |
+
def test_cmp_dt64_arraylike_tznaive(self, comparison_op):
|
| 280 |
+
# arbitrary tz-naive DatetimeIndex
|
| 281 |
+
op = comparison_op
|
| 282 |
+
|
| 283 |
+
dti = pd.date_range("2016-01-1", freq="MS", periods=9, tz=None)
|
| 284 |
+
arr = dti._data
|
| 285 |
+
assert arr.freq == dti.freq
|
| 286 |
+
assert arr.tz == dti.tz
|
| 287 |
+
|
| 288 |
+
right = dti
|
| 289 |
+
|
| 290 |
+
expected = np.ones(len(arr), dtype=bool)
|
| 291 |
+
if comparison_op.__name__ in ["ne", "gt", "lt"]:
|
| 292 |
+
# for these the comparisons should be all-False
|
| 293 |
+
expected = ~expected
|
| 294 |
+
|
| 295 |
+
result = op(arr, arr)
|
| 296 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 297 |
+
for other in [
|
| 298 |
+
right,
|
| 299 |
+
np.array(right),
|
| 300 |
+
list(right),
|
| 301 |
+
tuple(right),
|
| 302 |
+
right.astype(object),
|
| 303 |
+
]:
|
| 304 |
+
result = op(arr, other)
|
| 305 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 306 |
+
|
| 307 |
+
result = op(other, arr)
|
| 308 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 309 |
+
|
| 310 |
+
|
| 311 |
+
class TestDatetimeArray:
|
| 312 |
+
def test_astype_ns_to_ms_near_bounds(self):
|
| 313 |
+
# GH#55979
|
| 314 |
+
ts = pd.Timestamp("1677-09-21 00:12:43.145225")
|
| 315 |
+
target = ts.as_unit("ms")
|
| 316 |
+
|
| 317 |
+
dta = DatetimeArray._from_sequence([ts], dtype="M8[ns]")
|
| 318 |
+
assert (dta.view("i8") == ts.as_unit("ns").value).all()
|
| 319 |
+
|
| 320 |
+
result = dta.astype("M8[ms]")
|
| 321 |
+
assert result[0] == target
|
| 322 |
+
|
| 323 |
+
expected = DatetimeArray._from_sequence([ts], dtype="M8[ms]")
|
| 324 |
+
assert (expected.view("i8") == target._value).all()
|
| 325 |
+
|
| 326 |
+
tm.assert_datetime_array_equal(result, expected)
|
| 327 |
+
|
| 328 |
+
def test_astype_non_nano_tznaive(self):
|
| 329 |
+
dti = pd.date_range("2016-01-01", periods=3)
|
| 330 |
+
|
| 331 |
+
res = dti.astype("M8[s]")
|
| 332 |
+
assert res.dtype == "M8[s]"
|
| 333 |
+
|
| 334 |
+
dta = dti._data
|
| 335 |
+
res = dta.astype("M8[s]")
|
| 336 |
+
assert res.dtype == "M8[s]"
|
| 337 |
+
assert isinstance(res, pd.core.arrays.DatetimeArray) # used to be ndarray
|
| 338 |
+
|
| 339 |
+
def test_astype_non_nano_tzaware(self):
|
| 340 |
+
dti = pd.date_range("2016-01-01", periods=3, tz="UTC")
|
| 341 |
+
|
| 342 |
+
res = dti.astype("M8[s, US/Pacific]")
|
| 343 |
+
assert res.dtype == "M8[s, US/Pacific]"
|
| 344 |
+
|
| 345 |
+
dta = dti._data
|
| 346 |
+
res = dta.astype("M8[s, US/Pacific]")
|
| 347 |
+
assert res.dtype == "M8[s, US/Pacific]"
|
| 348 |
+
|
| 349 |
+
# from non-nano to non-nano, preserving reso
|
| 350 |
+
res2 = res.astype("M8[s, UTC]")
|
| 351 |
+
assert res2.dtype == "M8[s, UTC]"
|
| 352 |
+
assert not tm.shares_memory(res2, res)
|
| 353 |
+
|
| 354 |
+
res3 = res.astype("M8[s, UTC]", copy=False)
|
| 355 |
+
assert res2.dtype == "M8[s, UTC]"
|
| 356 |
+
assert tm.shares_memory(res3, res)
|
| 357 |
+
|
| 358 |
+
def test_astype_to_same(self):
|
| 359 |
+
arr = DatetimeArray._from_sequence(
|
| 360 |
+
["2000"], dtype=DatetimeTZDtype(tz="US/Central")
|
| 361 |
+
)
|
| 362 |
+
result = arr.astype(DatetimeTZDtype(tz="US/Central"), copy=False)
|
| 363 |
+
assert result is arr
|
| 364 |
+
|
| 365 |
+
@pytest.mark.parametrize("dtype", ["datetime64[ns]", "datetime64[ns, UTC]"])
|
| 366 |
+
@pytest.mark.parametrize(
|
| 367 |
+
"other", ["datetime64[ns]", "datetime64[ns, UTC]", "datetime64[ns, CET]"]
|
| 368 |
+
)
|
| 369 |
+
def test_astype_copies(self, dtype, other):
|
| 370 |
+
# https://github.com/pandas-dev/pandas/pull/32490
|
| 371 |
+
ser = pd.Series([1, 2], dtype=dtype)
|
| 372 |
+
orig = ser.copy()
|
| 373 |
+
|
| 374 |
+
err = False
|
| 375 |
+
if (dtype == "datetime64[ns]") ^ (other == "datetime64[ns]"):
|
| 376 |
+
# deprecated in favor of tz_localize
|
| 377 |
+
err = True
|
| 378 |
+
|
| 379 |
+
if err:
|
| 380 |
+
if dtype == "datetime64[ns]":
|
| 381 |
+
msg = "Use obj.tz_localize instead or series.dt.tz_localize instead"
|
| 382 |
+
else:
|
| 383 |
+
msg = "from timezone-aware dtype to timezone-naive dtype"
|
| 384 |
+
with pytest.raises(TypeError, match=msg):
|
| 385 |
+
ser.astype(other)
|
| 386 |
+
else:
|
| 387 |
+
t = ser.astype(other)
|
| 388 |
+
t[:] = pd.NaT
|
| 389 |
+
tm.assert_series_equal(ser, orig)
|
| 390 |
+
|
| 391 |
+
@pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"])
|
| 392 |
+
def test_astype_int(self, dtype):
|
| 393 |
+
arr = DatetimeArray._from_sequence(
|
| 394 |
+
[pd.Timestamp("2000"), pd.Timestamp("2001")], dtype="M8[ns]"
|
| 395 |
+
)
|
| 396 |
+
|
| 397 |
+
if np.dtype(dtype) != np.int64:
|
| 398 |
+
with pytest.raises(TypeError, match=r"Do obj.astype\('int64'\)"):
|
| 399 |
+
arr.astype(dtype)
|
| 400 |
+
return
|
| 401 |
+
|
| 402 |
+
result = arr.astype(dtype)
|
| 403 |
+
expected = arr._ndarray.view("i8")
|
| 404 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 405 |
+
|
| 406 |
+
def test_astype_to_sparse_dt64(self):
|
| 407 |
+
# GH#50082
|
| 408 |
+
dti = pd.date_range("2016-01-01", periods=4)
|
| 409 |
+
dta = dti._data
|
| 410 |
+
result = dta.astype("Sparse[datetime64[ns]]")
|
| 411 |
+
|
| 412 |
+
assert result.dtype == "Sparse[datetime64[ns]]"
|
| 413 |
+
assert (result == dta).all()
|
| 414 |
+
|
| 415 |
+
def test_tz_setter_raises(self):
|
| 416 |
+
arr = DatetimeArray._from_sequence(
|
| 417 |
+
["2000"], dtype=DatetimeTZDtype(tz="US/Central")
|
| 418 |
+
)
|
| 419 |
+
with pytest.raises(AttributeError, match="tz_localize"):
|
| 420 |
+
arr.tz = "UTC"
|
| 421 |
+
|
| 422 |
+
def test_setitem_str_impute_tz(self, tz_naive_fixture):
|
| 423 |
+
# Like for getitem, if we are passed a naive-like string, we impute
|
| 424 |
+
# our own timezone.
|
| 425 |
+
tz = tz_naive_fixture
|
| 426 |
+
|
| 427 |
+
data = np.array([1, 2, 3], dtype="M8[ns]")
|
| 428 |
+
dtype = data.dtype if tz is None else DatetimeTZDtype(tz=tz)
|
| 429 |
+
arr = DatetimeArray._from_sequence(data, dtype=dtype)
|
| 430 |
+
expected = arr.copy()
|
| 431 |
+
|
| 432 |
+
ts = pd.Timestamp("2020-09-08 16:50").tz_localize(tz)
|
| 433 |
+
setter = str(ts.tz_localize(None))
|
| 434 |
+
|
| 435 |
+
# Setting a scalar tznaive string
|
| 436 |
+
expected[0] = ts
|
| 437 |
+
arr[0] = setter
|
| 438 |
+
tm.assert_equal(arr, expected)
|
| 439 |
+
|
| 440 |
+
# Setting a listlike of tznaive strings
|
| 441 |
+
expected[1] = ts
|
| 442 |
+
arr[:2] = [setter, setter]
|
| 443 |
+
tm.assert_equal(arr, expected)
|
| 444 |
+
|
| 445 |
+
def test_setitem_different_tz_raises(self):
|
| 446 |
+
# pre-2.0 we required exact tz match, in 2.0 we require only
|
| 447 |
+
# tzawareness-match
|
| 448 |
+
data = np.array([1, 2, 3], dtype="M8[ns]")
|
| 449 |
+
arr = DatetimeArray._from_sequence(
|
| 450 |
+
data, copy=False, dtype=DatetimeTZDtype(tz="US/Central")
|
| 451 |
+
)
|
| 452 |
+
with pytest.raises(TypeError, match="Cannot compare tz-naive and tz-aware"):
|
| 453 |
+
arr[0] = pd.Timestamp("2000")
|
| 454 |
+
|
| 455 |
+
ts = pd.Timestamp("2000", tz="US/Eastern")
|
| 456 |
+
arr[0] = ts
|
| 457 |
+
assert arr[0] == ts.tz_convert("US/Central")
|
| 458 |
+
|
| 459 |
+
def test_setitem_clears_freq(self):
|
| 460 |
+
a = pd.date_range("2000", periods=2, freq="D", tz="US/Central")._data
|
| 461 |
+
a[0] = pd.Timestamp("2000", tz="US/Central")
|
| 462 |
+
assert a.freq is None
|
| 463 |
+
|
| 464 |
+
@pytest.mark.parametrize(
|
| 465 |
+
"obj",
|
| 466 |
+
[
|
| 467 |
+
pd.Timestamp("2021-01-01"),
|
| 468 |
+
pd.Timestamp("2021-01-01").to_datetime64(),
|
| 469 |
+
pd.Timestamp("2021-01-01").to_pydatetime(),
|
| 470 |
+
],
|
| 471 |
+
)
|
| 472 |
+
def test_setitem_objects(self, obj):
|
| 473 |
+
# make sure we accept datetime64 and datetime in addition to Timestamp
|
| 474 |
+
dti = pd.date_range("2000", periods=2, freq="D")
|
| 475 |
+
arr = dti._data
|
| 476 |
+
|
| 477 |
+
arr[0] = obj
|
| 478 |
+
assert arr[0] == obj
|
| 479 |
+
|
| 480 |
+
def test_repeat_preserves_tz(self):
|
| 481 |
+
dti = pd.date_range("2000", periods=2, freq="D", tz="US/Central")
|
| 482 |
+
arr = dti._data
|
| 483 |
+
|
| 484 |
+
repeated = arr.repeat([1, 1])
|
| 485 |
+
|
| 486 |
+
# preserves tz and values, but not freq
|
| 487 |
+
expected = DatetimeArray._from_sequence(arr.asi8, dtype=arr.dtype)
|
| 488 |
+
tm.assert_equal(repeated, expected)
|
| 489 |
+
|
| 490 |
+
def test_value_counts_preserves_tz(self):
|
| 491 |
+
dti = pd.date_range("2000", periods=2, freq="D", tz="US/Central")
|
| 492 |
+
arr = dti._data.repeat([4, 3])
|
| 493 |
+
|
| 494 |
+
result = arr.value_counts()
|
| 495 |
+
|
| 496 |
+
# Note: not tm.assert_index_equal, since `freq`s do not match
|
| 497 |
+
assert result.index.equals(dti)
|
| 498 |
+
|
| 499 |
+
arr[-2] = pd.NaT
|
| 500 |
+
result = arr.value_counts(dropna=False)
|
| 501 |
+
expected = pd.Series([4, 2, 1], index=[dti[0], dti[1], pd.NaT], name="count")
|
| 502 |
+
tm.assert_series_equal(result, expected)
|
| 503 |
+
|
| 504 |
+
@pytest.mark.parametrize("method", ["pad", "backfill"])
|
| 505 |
+
def test_fillna_preserves_tz(self, method):
|
| 506 |
+
dti = pd.date_range("2000-01-01", periods=5, freq="D", tz="US/Central")
|
| 507 |
+
arr = DatetimeArray._from_sequence(dti, copy=True)
|
| 508 |
+
arr[2] = pd.NaT
|
| 509 |
+
|
| 510 |
+
fill_val = dti[1] if method == "pad" else dti[3]
|
| 511 |
+
expected = DatetimeArray._from_sequence(
|
| 512 |
+
[dti[0], dti[1], fill_val, dti[3], dti[4]],
|
| 513 |
+
dtype=DatetimeTZDtype(tz="US/Central"),
|
| 514 |
+
)
|
| 515 |
+
|
| 516 |
+
result = arr._pad_or_backfill(method=method)
|
| 517 |
+
tm.assert_extension_array_equal(result, expected)
|
| 518 |
+
|
| 519 |
+
# assert that arr and dti were not modified in-place
|
| 520 |
+
assert arr[2] is pd.NaT
|
| 521 |
+
assert dti[2] == pd.Timestamp("2000-01-03", tz="US/Central")
|
| 522 |
+
|
| 523 |
+
def test_fillna_2d(self):
|
| 524 |
+
dti = pd.date_range("2016-01-01", periods=6, tz="US/Pacific")
|
| 525 |
+
dta = dti._data.reshape(3, 2).copy()
|
| 526 |
+
dta[0, 1] = pd.NaT
|
| 527 |
+
dta[1, 0] = pd.NaT
|
| 528 |
+
|
| 529 |
+
res1 = dta._pad_or_backfill(method="pad")
|
| 530 |
+
expected1 = dta.copy()
|
| 531 |
+
expected1[1, 0] = dta[0, 0]
|
| 532 |
+
tm.assert_extension_array_equal(res1, expected1)
|
| 533 |
+
|
| 534 |
+
res2 = dta._pad_or_backfill(method="backfill")
|
| 535 |
+
expected2 = dta.copy()
|
| 536 |
+
expected2 = dta.copy()
|
| 537 |
+
expected2[1, 0] = dta[2, 0]
|
| 538 |
+
expected2[0, 1] = dta[1, 1]
|
| 539 |
+
tm.assert_extension_array_equal(res2, expected2)
|
| 540 |
+
|
| 541 |
+
# with different ordering for underlying ndarray; behavior should
|
| 542 |
+
# be unchanged
|
| 543 |
+
dta2 = dta._from_backing_data(dta._ndarray.copy(order="F"))
|
| 544 |
+
assert dta2._ndarray.flags["F_CONTIGUOUS"]
|
| 545 |
+
assert not dta2._ndarray.flags["C_CONTIGUOUS"]
|
| 546 |
+
tm.assert_extension_array_equal(dta, dta2)
|
| 547 |
+
|
| 548 |
+
res3 = dta2._pad_or_backfill(method="pad")
|
| 549 |
+
tm.assert_extension_array_equal(res3, expected1)
|
| 550 |
+
|
| 551 |
+
res4 = dta2._pad_or_backfill(method="backfill")
|
| 552 |
+
tm.assert_extension_array_equal(res4, expected2)
|
| 553 |
+
|
| 554 |
+
# test the DataFrame method while we're here
|
| 555 |
+
df = pd.DataFrame(dta)
|
| 556 |
+
res = df.ffill()
|
| 557 |
+
expected = pd.DataFrame(expected1)
|
| 558 |
+
tm.assert_frame_equal(res, expected)
|
| 559 |
+
|
| 560 |
+
res = df.bfill()
|
| 561 |
+
expected = pd.DataFrame(expected2)
|
| 562 |
+
tm.assert_frame_equal(res, expected)
|
| 563 |
+
|
| 564 |
+
def test_array_interface_tz(self):
|
| 565 |
+
tz = "US/Central"
|
| 566 |
+
data = pd.date_range("2017", periods=2, tz=tz)._data
|
| 567 |
+
result = np.asarray(data)
|
| 568 |
+
|
| 569 |
+
expected = np.array(
|
| 570 |
+
[
|
| 571 |
+
pd.Timestamp("2017-01-01T00:00:00", tz=tz),
|
| 572 |
+
pd.Timestamp("2017-01-02T00:00:00", tz=tz),
|
| 573 |
+
],
|
| 574 |
+
dtype=object,
|
| 575 |
+
)
|
| 576 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 577 |
+
|
| 578 |
+
result = np.asarray(data, dtype=object)
|
| 579 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 580 |
+
|
| 581 |
+
result = np.asarray(data, dtype="M8[ns]")
|
| 582 |
+
|
| 583 |
+
expected = np.array(
|
| 584 |
+
["2017-01-01T06:00:00", "2017-01-02T06:00:00"], dtype="M8[ns]"
|
| 585 |
+
)
|
| 586 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 587 |
+
|
| 588 |
+
def test_array_interface(self):
|
| 589 |
+
data = pd.date_range("2017", periods=2)._data
|
| 590 |
+
expected = np.array(
|
| 591 |
+
["2017-01-01T00:00:00", "2017-01-02T00:00:00"], dtype="datetime64[ns]"
|
| 592 |
+
)
|
| 593 |
+
|
| 594 |
+
result = np.asarray(data)
|
| 595 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 596 |
+
|
| 597 |
+
result = np.asarray(data, dtype=object)
|
| 598 |
+
expected = np.array(
|
| 599 |
+
[pd.Timestamp("2017-01-01T00:00:00"), pd.Timestamp("2017-01-02T00:00:00")],
|
| 600 |
+
dtype=object,
|
| 601 |
+
)
|
| 602 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 603 |
+
|
| 604 |
+
@pytest.mark.parametrize("index", [True, False])
|
| 605 |
+
def test_searchsorted_different_tz(self, index):
|
| 606 |
+
data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
|
| 607 |
+
arr = pd.DatetimeIndex(data, freq="D")._data.tz_localize("Asia/Tokyo")
|
| 608 |
+
if index:
|
| 609 |
+
arr = pd.Index(arr)
|
| 610 |
+
|
| 611 |
+
expected = arr.searchsorted(arr[2])
|
| 612 |
+
result = arr.searchsorted(arr[2].tz_convert("UTC"))
|
| 613 |
+
assert result == expected
|
| 614 |
+
|
| 615 |
+
expected = arr.searchsorted(arr[2:6])
|
| 616 |
+
result = arr.searchsorted(arr[2:6].tz_convert("UTC"))
|
| 617 |
+
tm.assert_equal(result, expected)
|
| 618 |
+
|
| 619 |
+
@pytest.mark.parametrize("index", [True, False])
|
| 620 |
+
def test_searchsorted_tzawareness_compat(self, index):
|
| 621 |
+
data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
|
| 622 |
+
arr = pd.DatetimeIndex(data, freq="D")._data
|
| 623 |
+
if index:
|
| 624 |
+
arr = pd.Index(arr)
|
| 625 |
+
|
| 626 |
+
mismatch = arr.tz_localize("Asia/Tokyo")
|
| 627 |
+
|
| 628 |
+
msg = "Cannot compare tz-naive and tz-aware datetime-like objects"
|
| 629 |
+
with pytest.raises(TypeError, match=msg):
|
| 630 |
+
arr.searchsorted(mismatch[0])
|
| 631 |
+
with pytest.raises(TypeError, match=msg):
|
| 632 |
+
arr.searchsorted(mismatch)
|
| 633 |
+
|
| 634 |
+
with pytest.raises(TypeError, match=msg):
|
| 635 |
+
mismatch.searchsorted(arr[0])
|
| 636 |
+
with pytest.raises(TypeError, match=msg):
|
| 637 |
+
mismatch.searchsorted(arr)
|
| 638 |
+
|
| 639 |
+
@pytest.mark.parametrize(
|
| 640 |
+
"other",
|
| 641 |
+
[
|
| 642 |
+
1,
|
| 643 |
+
np.int64(1),
|
| 644 |
+
1.0,
|
| 645 |
+
np.timedelta64("NaT"),
|
| 646 |
+
pd.Timedelta(days=2),
|
| 647 |
+
"invalid",
|
| 648 |
+
np.arange(10, dtype="i8") * 24 * 3600 * 10**9,
|
| 649 |
+
np.arange(10).view("timedelta64[ns]") * 24 * 3600 * 10**9,
|
| 650 |
+
pd.Timestamp("2021-01-01").to_period("D"),
|
| 651 |
+
],
|
| 652 |
+
)
|
| 653 |
+
@pytest.mark.parametrize("index", [True, False])
|
| 654 |
+
def test_searchsorted_invalid_types(self, other, index):
|
| 655 |
+
data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
|
| 656 |
+
arr = pd.DatetimeIndex(data, freq="D")._data
|
| 657 |
+
if index:
|
| 658 |
+
arr = pd.Index(arr)
|
| 659 |
+
|
| 660 |
+
msg = "|".join(
|
| 661 |
+
[
|
| 662 |
+
"searchsorted requires compatible dtype or scalar",
|
| 663 |
+
"value should be a 'Timestamp', 'NaT', or array of those. Got",
|
| 664 |
+
]
|
| 665 |
+
)
|
| 666 |
+
with pytest.raises(TypeError, match=msg):
|
| 667 |
+
arr.searchsorted(other)
|
| 668 |
+
|
| 669 |
+
def test_shift_fill_value(self):
|
| 670 |
+
dti = pd.date_range("2016-01-01", periods=3)
|
| 671 |
+
|
| 672 |
+
dta = dti._data
|
| 673 |
+
expected = DatetimeArray._from_sequence(np.roll(dta._ndarray, 1))
|
| 674 |
+
|
| 675 |
+
fv = dta[-1]
|
| 676 |
+
for fill_value in [fv, fv.to_pydatetime(), fv.to_datetime64()]:
|
| 677 |
+
result = dta.shift(1, fill_value=fill_value)
|
| 678 |
+
tm.assert_datetime_array_equal(result, expected)
|
| 679 |
+
|
| 680 |
+
dta = dta.tz_localize("UTC")
|
| 681 |
+
expected = expected.tz_localize("UTC")
|
| 682 |
+
fv = dta[-1]
|
| 683 |
+
for fill_value in [fv, fv.to_pydatetime()]:
|
| 684 |
+
result = dta.shift(1, fill_value=fill_value)
|
| 685 |
+
tm.assert_datetime_array_equal(result, expected)
|
| 686 |
+
|
| 687 |
+
def test_shift_value_tzawareness_mismatch(self):
|
| 688 |
+
dti = pd.date_range("2016-01-01", periods=3)
|
| 689 |
+
|
| 690 |
+
dta = dti._data
|
| 691 |
+
|
| 692 |
+
fv = dta[-1].tz_localize("UTC")
|
| 693 |
+
for invalid in [fv, fv.to_pydatetime()]:
|
| 694 |
+
with pytest.raises(TypeError, match="Cannot compare"):
|
| 695 |
+
dta.shift(1, fill_value=invalid)
|
| 696 |
+
|
| 697 |
+
dta = dta.tz_localize("UTC")
|
| 698 |
+
fv = dta[-1].tz_localize(None)
|
| 699 |
+
for invalid in [fv, fv.to_pydatetime(), fv.to_datetime64()]:
|
| 700 |
+
with pytest.raises(TypeError, match="Cannot compare"):
|
| 701 |
+
dta.shift(1, fill_value=invalid)
|
| 702 |
+
|
| 703 |
+
def test_shift_requires_tzmatch(self):
|
| 704 |
+
# pre-2.0 we required exact tz match, in 2.0 we require just
|
| 705 |
+
# matching tzawareness
|
| 706 |
+
dti = pd.date_range("2016-01-01", periods=3, tz="UTC")
|
| 707 |
+
dta = dti._data
|
| 708 |
+
|
| 709 |
+
fill_value = pd.Timestamp("2020-10-18 18:44", tz="US/Pacific")
|
| 710 |
+
|
| 711 |
+
result = dta.shift(1, fill_value=fill_value)
|
| 712 |
+
expected = dta.shift(1, fill_value=fill_value.tz_convert("UTC"))
|
| 713 |
+
tm.assert_equal(result, expected)
|
| 714 |
+
|
| 715 |
+
def test_tz_localize_t2d(self):
|
| 716 |
+
dti = pd.date_range("1994-05-12", periods=12, tz="US/Pacific")
|
| 717 |
+
dta = dti._data.reshape(3, 4)
|
| 718 |
+
result = dta.tz_localize(None)
|
| 719 |
+
|
| 720 |
+
expected = dta.ravel().tz_localize(None).reshape(dta.shape)
|
| 721 |
+
tm.assert_datetime_array_equal(result, expected)
|
| 722 |
+
|
| 723 |
+
roundtrip = expected.tz_localize("US/Pacific")
|
| 724 |
+
tm.assert_datetime_array_equal(roundtrip, dta)
|
| 725 |
+
|
| 726 |
+
easts = ["US/Eastern", "dateutil/US/Eastern"]
|
| 727 |
+
if ZoneInfo is not None:
|
| 728 |
+
try:
|
| 729 |
+
tz = ZoneInfo("US/Eastern")
|
| 730 |
+
except KeyError:
|
| 731 |
+
# no tzdata
|
| 732 |
+
pass
|
| 733 |
+
else:
|
| 734 |
+
# Argument 1 to "append" of "list" has incompatible type "ZoneInfo";
|
| 735 |
+
# expected "str"
|
| 736 |
+
easts.append(tz) # type: ignore[arg-type]
|
| 737 |
+
|
| 738 |
+
@pytest.mark.parametrize("tz", easts)
|
| 739 |
+
def test_iter_zoneinfo_fold(self, tz):
|
| 740 |
+
# GH#49684
|
| 741 |
+
utc_vals = np.array(
|
| 742 |
+
[1320552000, 1320555600, 1320559200, 1320562800], dtype=np.int64
|
| 743 |
+
)
|
| 744 |
+
utc_vals *= 1_000_000_000
|
| 745 |
+
|
| 746 |
+
dta = DatetimeArray._from_sequence(utc_vals).tz_localize("UTC").tz_convert(tz)
|
| 747 |
+
|
| 748 |
+
left = dta[2]
|
| 749 |
+
right = list(dta)[2]
|
| 750 |
+
assert str(left) == str(right)
|
| 751 |
+
# previously there was a bug where with non-pytz right would be
|
| 752 |
+
# Timestamp('2011-11-06 01:00:00-0400', tz='US/Eastern')
|
| 753 |
+
# while left would be
|
| 754 |
+
# Timestamp('2011-11-06 01:00:00-0500', tz='US/Eastern')
|
| 755 |
+
# The .value's would match (so they would compare as equal),
|
| 756 |
+
# but the folds would not
|
| 757 |
+
assert left.utcoffset() == right.utcoffset()
|
| 758 |
+
|
| 759 |
+
# The same bug in ints_to_pydatetime affected .astype, so we test
|
| 760 |
+
# that here.
|
| 761 |
+
right2 = dta.astype(object)[2]
|
| 762 |
+
assert str(left) == str(right2)
|
| 763 |
+
assert left.utcoffset() == right2.utcoffset()
|
| 764 |
+
|
| 765 |
+
@pytest.mark.parametrize(
|
| 766 |
+
"freq, freq_depr",
|
| 767 |
+
[
|
| 768 |
+
("2ME", "2M"),
|
| 769 |
+
("2SME", "2SM"),
|
| 770 |
+
("2SME", "2sm"),
|
| 771 |
+
("2QE", "2Q"),
|
| 772 |
+
("2QE-SEP", "2Q-SEP"),
|
| 773 |
+
("1YE", "1Y"),
|
| 774 |
+
("2YE-MAR", "2Y-MAR"),
|
| 775 |
+
("1YE", "1A"),
|
| 776 |
+
("2YE-MAR", "2A-MAR"),
|
| 777 |
+
("2ME", "2m"),
|
| 778 |
+
("2QE-SEP", "2q-sep"),
|
| 779 |
+
("2YE-MAR", "2a-mar"),
|
| 780 |
+
("2YE", "2y"),
|
| 781 |
+
],
|
| 782 |
+
)
|
| 783 |
+
def test_date_range_frequency_M_Q_Y_A_deprecated(self, freq, freq_depr):
|
| 784 |
+
# GH#9586, GH#54275
|
| 785 |
+
depr_msg = f"'{freq_depr[1:]}' is deprecated and will be removed "
|
| 786 |
+
f"in a future version, please use '{freq[1:]}' instead."
|
| 787 |
+
|
| 788 |
+
expected = pd.date_range("1/1/2000", periods=4, freq=freq)
|
| 789 |
+
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
|
| 790 |
+
result = pd.date_range("1/1/2000", periods=4, freq=freq_depr)
|
| 791 |
+
tm.assert_index_equal(result, expected)
|
| 792 |
+
|
| 793 |
+
@pytest.mark.parametrize("freq_depr", ["2H", "2CBH", "2MIN", "2S", "2mS", "2Us"])
|
| 794 |
+
def test_date_range_uppercase_frequency_deprecated(self, freq_depr):
|
| 795 |
+
# GH#9586, GH#54939
|
| 796 |
+
depr_msg = f"'{freq_depr[1:]}' is deprecated and will be removed in a "
|
| 797 |
+
f"future version. Please use '{freq_depr.lower()[1:]}' instead."
|
| 798 |
+
|
| 799 |
+
expected = pd.date_range("1/1/2000", periods=4, freq=freq_depr.lower())
|
| 800 |
+
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
|
| 801 |
+
result = pd.date_range("1/1/2000", periods=4, freq=freq_depr)
|
| 802 |
+
tm.assert_index_equal(result, expected)
|
| 803 |
+
|
| 804 |
+
@pytest.mark.parametrize(
|
| 805 |
+
"freq_depr",
|
| 806 |
+
[
|
| 807 |
+
"2ye-mar",
|
| 808 |
+
"2ys",
|
| 809 |
+
"2qe",
|
| 810 |
+
"2qs-feb",
|
| 811 |
+
"2bqs",
|
| 812 |
+
"2sms",
|
| 813 |
+
"2bms",
|
| 814 |
+
"2cbme",
|
| 815 |
+
"2me",
|
| 816 |
+
"2w",
|
| 817 |
+
],
|
| 818 |
+
)
|
| 819 |
+
def test_date_range_lowercase_frequency_deprecated(self, freq_depr):
|
| 820 |
+
# GH#9586, GH#54939
|
| 821 |
+
depr_msg = f"'{freq_depr[1:]}' is deprecated and will be removed in a "
|
| 822 |
+
f"future version, please use '{freq_depr.upper()[1:]}' instead."
|
| 823 |
+
|
| 824 |
+
expected = pd.date_range("1/1/2000", periods=4, freq=freq_depr.upper())
|
| 825 |
+
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
|
| 826 |
+
result = pd.date_range("1/1/2000", periods=4, freq=freq_depr)
|
| 827 |
+
tm.assert_index_equal(result, expected)
|
| 828 |
+
|
| 829 |
+
|
| 830 |
+
def test_factorize_sort_without_freq():
|
| 831 |
+
dta = DatetimeArray._from_sequence([0, 2, 1], dtype="M8[ns]")
|
| 832 |
+
|
| 833 |
+
msg = r"call pd.factorize\(obj, sort=True\) instead"
|
| 834 |
+
with pytest.raises(NotImplementedError, match=msg):
|
| 835 |
+
dta.factorize(sort=True)
|
| 836 |
+
|
| 837 |
+
# Do TimedeltaArray while we're here
|
| 838 |
+
tda = dta - dta[0]
|
| 839 |
+
with pytest.raises(NotImplementedError, match=msg):
|
| 840 |
+
tda.factorize(sort=True)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/arrays/test_ndarray_backed.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Tests for subclasses of NDArrayBackedExtensionArray
|
| 3 |
+
"""
|
| 4 |
+
import numpy as np
|
| 5 |
+
|
| 6 |
+
from pandas import (
|
| 7 |
+
CategoricalIndex,
|
| 8 |
+
date_range,
|
| 9 |
+
)
|
| 10 |
+
from pandas.core.arrays import (
|
| 11 |
+
Categorical,
|
| 12 |
+
DatetimeArray,
|
| 13 |
+
NumpyExtensionArray,
|
| 14 |
+
TimedeltaArray,
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class TestEmpty:
|
| 19 |
+
def test_empty_categorical(self):
|
| 20 |
+
ci = CategoricalIndex(["a", "b", "c"], ordered=True)
|
| 21 |
+
dtype = ci.dtype
|
| 22 |
+
|
| 23 |
+
# case with int8 codes
|
| 24 |
+
shape = (4,)
|
| 25 |
+
result = Categorical._empty(shape, dtype=dtype)
|
| 26 |
+
assert isinstance(result, Categorical)
|
| 27 |
+
assert result.shape == shape
|
| 28 |
+
assert result._ndarray.dtype == np.int8
|
| 29 |
+
|
| 30 |
+
# case where repr would segfault if we didn't override base implementation
|
| 31 |
+
result = Categorical._empty((4096,), dtype=dtype)
|
| 32 |
+
assert isinstance(result, Categorical)
|
| 33 |
+
assert result.shape == (4096,)
|
| 34 |
+
assert result._ndarray.dtype == np.int8
|
| 35 |
+
repr(result)
|
| 36 |
+
|
| 37 |
+
# case with int16 codes
|
| 38 |
+
ci = CategoricalIndex(list(range(512)) * 4, ordered=False)
|
| 39 |
+
dtype = ci.dtype
|
| 40 |
+
result = Categorical._empty(shape, dtype=dtype)
|
| 41 |
+
assert isinstance(result, Categorical)
|
| 42 |
+
assert result.shape == shape
|
| 43 |
+
assert result._ndarray.dtype == np.int16
|
| 44 |
+
|
| 45 |
+
def test_empty_dt64tz(self):
|
| 46 |
+
dti = date_range("2016-01-01", periods=2, tz="Asia/Tokyo")
|
| 47 |
+
dtype = dti.dtype
|
| 48 |
+
|
| 49 |
+
shape = (0,)
|
| 50 |
+
result = DatetimeArray._empty(shape, dtype=dtype)
|
| 51 |
+
assert result.dtype == dtype
|
| 52 |
+
assert isinstance(result, DatetimeArray)
|
| 53 |
+
assert result.shape == shape
|
| 54 |
+
|
| 55 |
+
def test_empty_dt64(self):
|
| 56 |
+
shape = (3, 9)
|
| 57 |
+
result = DatetimeArray._empty(shape, dtype="datetime64[ns]")
|
| 58 |
+
assert isinstance(result, DatetimeArray)
|
| 59 |
+
assert result.shape == shape
|
| 60 |
+
|
| 61 |
+
def test_empty_td64(self):
|
| 62 |
+
shape = (3, 9)
|
| 63 |
+
result = TimedeltaArray._empty(shape, dtype="m8[ns]")
|
| 64 |
+
assert isinstance(result, TimedeltaArray)
|
| 65 |
+
assert result.shape == shape
|
| 66 |
+
|
| 67 |
+
def test_empty_pandas_array(self):
|
| 68 |
+
arr = NumpyExtensionArray(np.array([1, 2]))
|
| 69 |
+
dtype = arr.dtype
|
| 70 |
+
|
| 71 |
+
shape = (3, 9)
|
| 72 |
+
result = NumpyExtensionArray._empty(shape, dtype=dtype)
|
| 73 |
+
assert isinstance(result, NumpyExtensionArray)
|
| 74 |
+
assert result.dtype == dtype
|
| 75 |
+
assert result.shape == shape
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/arrays/test_period.py
ADDED
|
@@ -0,0 +1,184 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
from pandas._libs.tslibs import iNaT
|
| 5 |
+
from pandas._libs.tslibs.period import IncompatibleFrequency
|
| 6 |
+
|
| 7 |
+
from pandas.core.dtypes.base import _registry as registry
|
| 8 |
+
from pandas.core.dtypes.dtypes import PeriodDtype
|
| 9 |
+
|
| 10 |
+
import pandas as pd
|
| 11 |
+
import pandas._testing as tm
|
| 12 |
+
from pandas.core.arrays import PeriodArray
|
| 13 |
+
|
| 14 |
+
# ----------------------------------------------------------------------------
|
| 15 |
+
# Dtype
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def test_registered():
|
| 19 |
+
assert PeriodDtype in registry.dtypes
|
| 20 |
+
result = registry.find("Period[D]")
|
| 21 |
+
expected = PeriodDtype("D")
|
| 22 |
+
assert result == expected
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
# ----------------------------------------------------------------------------
|
| 26 |
+
# period_array
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def test_asi8():
|
| 30 |
+
result = PeriodArray._from_sequence(["2000", "2001", None], dtype="period[D]").asi8
|
| 31 |
+
expected = np.array([10957, 11323, iNaT])
|
| 32 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def test_take_raises():
|
| 36 |
+
arr = PeriodArray._from_sequence(["2000", "2001"], dtype="period[D]")
|
| 37 |
+
with pytest.raises(IncompatibleFrequency, match="freq"):
|
| 38 |
+
arr.take([0, -1], allow_fill=True, fill_value=pd.Period("2000", freq="W"))
|
| 39 |
+
|
| 40 |
+
msg = "value should be a 'Period' or 'NaT'. Got 'str' instead"
|
| 41 |
+
with pytest.raises(TypeError, match=msg):
|
| 42 |
+
arr.take([0, -1], allow_fill=True, fill_value="foo")
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def test_fillna_raises():
|
| 46 |
+
arr = PeriodArray._from_sequence(["2000", "2001", "2002"], dtype="period[D]")
|
| 47 |
+
with pytest.raises(ValueError, match="Length"):
|
| 48 |
+
arr.fillna(arr[:2])
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def test_fillna_copies():
|
| 52 |
+
arr = PeriodArray._from_sequence(["2000", "2001", "2002"], dtype="period[D]")
|
| 53 |
+
result = arr.fillna(pd.Period("2000", "D"))
|
| 54 |
+
assert result is not arr
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
# ----------------------------------------------------------------------------
|
| 58 |
+
# setitem
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
@pytest.mark.parametrize(
|
| 62 |
+
"key, value, expected",
|
| 63 |
+
[
|
| 64 |
+
([0], pd.Period("2000", "D"), [10957, 1, 2]),
|
| 65 |
+
([0], None, [iNaT, 1, 2]),
|
| 66 |
+
([0], np.nan, [iNaT, 1, 2]),
|
| 67 |
+
([0, 1, 2], pd.Period("2000", "D"), [10957] * 3),
|
| 68 |
+
(
|
| 69 |
+
[0, 1, 2],
|
| 70 |
+
[pd.Period("2000", "D"), pd.Period("2001", "D"), pd.Period("2002", "D")],
|
| 71 |
+
[10957, 11323, 11688],
|
| 72 |
+
),
|
| 73 |
+
],
|
| 74 |
+
)
|
| 75 |
+
def test_setitem(key, value, expected):
|
| 76 |
+
arr = PeriodArray(np.arange(3), dtype="period[D]")
|
| 77 |
+
expected = PeriodArray(expected, dtype="period[D]")
|
| 78 |
+
arr[key] = value
|
| 79 |
+
tm.assert_period_array_equal(arr, expected)
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
def test_setitem_raises_incompatible_freq():
|
| 83 |
+
arr = PeriodArray(np.arange(3), dtype="period[D]")
|
| 84 |
+
with pytest.raises(IncompatibleFrequency, match="freq"):
|
| 85 |
+
arr[0] = pd.Period("2000", freq="Y")
|
| 86 |
+
|
| 87 |
+
other = PeriodArray._from_sequence(["2000", "2001"], dtype="period[Y]")
|
| 88 |
+
with pytest.raises(IncompatibleFrequency, match="freq"):
|
| 89 |
+
arr[[0, 1]] = other
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def test_setitem_raises_length():
|
| 93 |
+
arr = PeriodArray(np.arange(3), dtype="period[D]")
|
| 94 |
+
with pytest.raises(ValueError, match="length"):
|
| 95 |
+
arr[[0, 1]] = [pd.Period("2000", freq="D")]
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
def test_setitem_raises_type():
|
| 99 |
+
arr = PeriodArray(np.arange(3), dtype="period[D]")
|
| 100 |
+
with pytest.raises(TypeError, match="int"):
|
| 101 |
+
arr[0] = 1
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
# ----------------------------------------------------------------------------
|
| 105 |
+
# Ops
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
def test_sub_period():
|
| 109 |
+
arr = PeriodArray._from_sequence(["2000", "2001"], dtype="period[D]")
|
| 110 |
+
other = pd.Period("2000", freq="M")
|
| 111 |
+
with pytest.raises(IncompatibleFrequency, match="freq"):
|
| 112 |
+
arr - other
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
def test_sub_period_overflow():
|
| 116 |
+
# GH#47538
|
| 117 |
+
dti = pd.date_range("1677-09-22", periods=2, freq="D")
|
| 118 |
+
pi = dti.to_period("ns")
|
| 119 |
+
|
| 120 |
+
per = pd.Period._from_ordinal(10**14, pi.freq)
|
| 121 |
+
|
| 122 |
+
with pytest.raises(OverflowError, match="Overflow in int64 addition"):
|
| 123 |
+
pi - per
|
| 124 |
+
|
| 125 |
+
with pytest.raises(OverflowError, match="Overflow in int64 addition"):
|
| 126 |
+
per - pi
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
# ----------------------------------------------------------------------------
|
| 130 |
+
# Methods
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
@pytest.mark.parametrize(
|
| 134 |
+
"other",
|
| 135 |
+
[
|
| 136 |
+
pd.Period("2000", freq="h"),
|
| 137 |
+
PeriodArray._from_sequence(["2000", "2001", "2000"], dtype="period[h]"),
|
| 138 |
+
],
|
| 139 |
+
)
|
| 140 |
+
def test_where_different_freq_raises(other):
|
| 141 |
+
# GH#45768 The PeriodArray method raises, the Series method coerces
|
| 142 |
+
ser = pd.Series(
|
| 143 |
+
PeriodArray._from_sequence(["2000", "2001", "2002"], dtype="period[D]")
|
| 144 |
+
)
|
| 145 |
+
cond = np.array([True, False, True])
|
| 146 |
+
|
| 147 |
+
with pytest.raises(IncompatibleFrequency, match="freq"):
|
| 148 |
+
ser.array._where(cond, other)
|
| 149 |
+
|
| 150 |
+
res = ser.where(cond, other)
|
| 151 |
+
expected = ser.astype(object).where(cond, other)
|
| 152 |
+
tm.assert_series_equal(res, expected)
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
# ----------------------------------------------------------------------------
|
| 156 |
+
# Printing
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
def test_repr_small():
|
| 160 |
+
arr = PeriodArray._from_sequence(["2000", "2001"], dtype="period[D]")
|
| 161 |
+
result = str(arr)
|
| 162 |
+
expected = (
|
| 163 |
+
"<PeriodArray>\n['2000-01-01', '2001-01-01']\nLength: 2, dtype: period[D]"
|
| 164 |
+
)
|
| 165 |
+
assert result == expected
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
def test_repr_large():
|
| 169 |
+
arr = PeriodArray._from_sequence(["2000", "2001"] * 500, dtype="period[D]")
|
| 170 |
+
result = str(arr)
|
| 171 |
+
expected = (
|
| 172 |
+
"<PeriodArray>\n"
|
| 173 |
+
"['2000-01-01', '2001-01-01', '2000-01-01', '2001-01-01', "
|
| 174 |
+
"'2000-01-01',\n"
|
| 175 |
+
" '2001-01-01', '2000-01-01', '2001-01-01', '2000-01-01', "
|
| 176 |
+
"'2001-01-01',\n"
|
| 177 |
+
" ...\n"
|
| 178 |
+
" '2000-01-01', '2001-01-01', '2000-01-01', '2001-01-01', "
|
| 179 |
+
"'2000-01-01',\n"
|
| 180 |
+
" '2001-01-01', '2000-01-01', '2001-01-01', '2000-01-01', "
|
| 181 |
+
"'2001-01-01']\n"
|
| 182 |
+
"Length: 1000, dtype: period[D]"
|
| 183 |
+
)
|
| 184 |
+
assert result == expected
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/arrays/test_timedeltas.py
ADDED
|
@@ -0,0 +1,313 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datetime import timedelta
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
import pytest
|
| 5 |
+
|
| 6 |
+
import pandas as pd
|
| 7 |
+
from pandas import Timedelta
|
| 8 |
+
import pandas._testing as tm
|
| 9 |
+
from pandas.core.arrays import (
|
| 10 |
+
DatetimeArray,
|
| 11 |
+
TimedeltaArray,
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class TestNonNano:
|
| 16 |
+
@pytest.fixture(params=["s", "ms", "us"])
|
| 17 |
+
def unit(self, request):
|
| 18 |
+
return request.param
|
| 19 |
+
|
| 20 |
+
@pytest.fixture
|
| 21 |
+
def tda(self, unit):
|
| 22 |
+
arr = np.arange(5, dtype=np.int64).view(f"m8[{unit}]")
|
| 23 |
+
return TimedeltaArray._simple_new(arr, dtype=arr.dtype)
|
| 24 |
+
|
| 25 |
+
def test_non_nano(self, unit):
|
| 26 |
+
arr = np.arange(5, dtype=np.int64).view(f"m8[{unit}]")
|
| 27 |
+
tda = TimedeltaArray._simple_new(arr, dtype=arr.dtype)
|
| 28 |
+
|
| 29 |
+
assert tda.dtype == arr.dtype
|
| 30 |
+
assert tda[0].unit == unit
|
| 31 |
+
|
| 32 |
+
def test_as_unit_raises(self, tda):
|
| 33 |
+
# GH#50616
|
| 34 |
+
with pytest.raises(ValueError, match="Supported units"):
|
| 35 |
+
tda.as_unit("D")
|
| 36 |
+
|
| 37 |
+
tdi = pd.Index(tda)
|
| 38 |
+
with pytest.raises(ValueError, match="Supported units"):
|
| 39 |
+
tdi.as_unit("D")
|
| 40 |
+
|
| 41 |
+
@pytest.mark.parametrize("field", TimedeltaArray._field_ops)
|
| 42 |
+
def test_fields(self, tda, field):
|
| 43 |
+
as_nano = tda._ndarray.astype("m8[ns]")
|
| 44 |
+
tda_nano = TimedeltaArray._simple_new(as_nano, dtype=as_nano.dtype)
|
| 45 |
+
|
| 46 |
+
result = getattr(tda, field)
|
| 47 |
+
expected = getattr(tda_nano, field)
|
| 48 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 49 |
+
|
| 50 |
+
def test_to_pytimedelta(self, tda):
|
| 51 |
+
as_nano = tda._ndarray.astype("m8[ns]")
|
| 52 |
+
tda_nano = TimedeltaArray._simple_new(as_nano, dtype=as_nano.dtype)
|
| 53 |
+
|
| 54 |
+
result = tda.to_pytimedelta()
|
| 55 |
+
expected = tda_nano.to_pytimedelta()
|
| 56 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 57 |
+
|
| 58 |
+
def test_total_seconds(self, unit, tda):
|
| 59 |
+
as_nano = tda._ndarray.astype("m8[ns]")
|
| 60 |
+
tda_nano = TimedeltaArray._simple_new(as_nano, dtype=as_nano.dtype)
|
| 61 |
+
|
| 62 |
+
result = tda.total_seconds()
|
| 63 |
+
expected = tda_nano.total_seconds()
|
| 64 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 65 |
+
|
| 66 |
+
def test_timedelta_array_total_seconds(self):
|
| 67 |
+
# GH34290
|
| 68 |
+
expected = Timedelta("2 min").total_seconds()
|
| 69 |
+
|
| 70 |
+
result = pd.array([Timedelta("2 min")]).total_seconds()[0]
|
| 71 |
+
assert result == expected
|
| 72 |
+
|
| 73 |
+
def test_total_seconds_nanoseconds(self):
|
| 74 |
+
# issue #48521
|
| 75 |
+
start_time = pd.Series(["2145-11-02 06:00:00"]).astype("datetime64[ns]")
|
| 76 |
+
end_time = pd.Series(["2145-11-02 07:06:00"]).astype("datetime64[ns]")
|
| 77 |
+
expected = (end_time - start_time).values / np.timedelta64(1, "s")
|
| 78 |
+
result = (end_time - start_time).dt.total_seconds().values
|
| 79 |
+
assert result == expected
|
| 80 |
+
|
| 81 |
+
@pytest.mark.parametrize(
|
| 82 |
+
"nat", [np.datetime64("NaT", "ns"), np.datetime64("NaT", "us")]
|
| 83 |
+
)
|
| 84 |
+
def test_add_nat_datetimelike_scalar(self, nat, tda):
|
| 85 |
+
result = tda + nat
|
| 86 |
+
assert isinstance(result, DatetimeArray)
|
| 87 |
+
assert result._creso == tda._creso
|
| 88 |
+
assert result.isna().all()
|
| 89 |
+
|
| 90 |
+
result = nat + tda
|
| 91 |
+
assert isinstance(result, DatetimeArray)
|
| 92 |
+
assert result._creso == tda._creso
|
| 93 |
+
assert result.isna().all()
|
| 94 |
+
|
| 95 |
+
def test_add_pdnat(self, tda):
|
| 96 |
+
result = tda + pd.NaT
|
| 97 |
+
assert isinstance(result, TimedeltaArray)
|
| 98 |
+
assert result._creso == tda._creso
|
| 99 |
+
assert result.isna().all()
|
| 100 |
+
|
| 101 |
+
result = pd.NaT + tda
|
| 102 |
+
assert isinstance(result, TimedeltaArray)
|
| 103 |
+
assert result._creso == tda._creso
|
| 104 |
+
assert result.isna().all()
|
| 105 |
+
|
| 106 |
+
# TODO: 2022-07-11 this is the only test that gets to DTA.tz_convert
|
| 107 |
+
# or tz_localize with non-nano; implement tests specific to that.
|
| 108 |
+
def test_add_datetimelike_scalar(self, tda, tz_naive_fixture):
|
| 109 |
+
ts = pd.Timestamp("2016-01-01", tz=tz_naive_fixture).as_unit("ns")
|
| 110 |
+
|
| 111 |
+
expected = tda.as_unit("ns") + ts
|
| 112 |
+
res = tda + ts
|
| 113 |
+
tm.assert_extension_array_equal(res, expected)
|
| 114 |
+
res = ts + tda
|
| 115 |
+
tm.assert_extension_array_equal(res, expected)
|
| 116 |
+
|
| 117 |
+
ts += Timedelta(1) # case where we can't cast losslessly
|
| 118 |
+
|
| 119 |
+
exp_values = tda._ndarray + ts.asm8
|
| 120 |
+
expected = (
|
| 121 |
+
DatetimeArray._simple_new(exp_values, dtype=exp_values.dtype)
|
| 122 |
+
.tz_localize("UTC")
|
| 123 |
+
.tz_convert(ts.tz)
|
| 124 |
+
)
|
| 125 |
+
|
| 126 |
+
result = tda + ts
|
| 127 |
+
tm.assert_extension_array_equal(result, expected)
|
| 128 |
+
|
| 129 |
+
result = ts + tda
|
| 130 |
+
tm.assert_extension_array_equal(result, expected)
|
| 131 |
+
|
| 132 |
+
def test_mul_scalar(self, tda):
|
| 133 |
+
other = 2
|
| 134 |
+
result = tda * other
|
| 135 |
+
expected = TimedeltaArray._simple_new(tda._ndarray * other, dtype=tda.dtype)
|
| 136 |
+
tm.assert_extension_array_equal(result, expected)
|
| 137 |
+
assert result._creso == tda._creso
|
| 138 |
+
|
| 139 |
+
def test_mul_listlike(self, tda):
|
| 140 |
+
other = np.arange(len(tda))
|
| 141 |
+
result = tda * other
|
| 142 |
+
expected = TimedeltaArray._simple_new(tda._ndarray * other, dtype=tda.dtype)
|
| 143 |
+
tm.assert_extension_array_equal(result, expected)
|
| 144 |
+
assert result._creso == tda._creso
|
| 145 |
+
|
| 146 |
+
def test_mul_listlike_object(self, tda):
|
| 147 |
+
other = np.arange(len(tda))
|
| 148 |
+
result = tda * other.astype(object)
|
| 149 |
+
expected = TimedeltaArray._simple_new(tda._ndarray * other, dtype=tda.dtype)
|
| 150 |
+
tm.assert_extension_array_equal(result, expected)
|
| 151 |
+
assert result._creso == tda._creso
|
| 152 |
+
|
| 153 |
+
def test_div_numeric_scalar(self, tda):
|
| 154 |
+
other = 2
|
| 155 |
+
result = tda / other
|
| 156 |
+
expected = TimedeltaArray._simple_new(tda._ndarray / other, dtype=tda.dtype)
|
| 157 |
+
tm.assert_extension_array_equal(result, expected)
|
| 158 |
+
assert result._creso == tda._creso
|
| 159 |
+
|
| 160 |
+
def test_div_td_scalar(self, tda):
|
| 161 |
+
other = timedelta(seconds=1)
|
| 162 |
+
result = tda / other
|
| 163 |
+
expected = tda._ndarray / np.timedelta64(1, "s")
|
| 164 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 165 |
+
|
| 166 |
+
def test_div_numeric_array(self, tda):
|
| 167 |
+
other = np.arange(len(tda))
|
| 168 |
+
result = tda / other
|
| 169 |
+
expected = TimedeltaArray._simple_new(tda._ndarray / other, dtype=tda.dtype)
|
| 170 |
+
tm.assert_extension_array_equal(result, expected)
|
| 171 |
+
assert result._creso == tda._creso
|
| 172 |
+
|
| 173 |
+
def test_div_td_array(self, tda):
|
| 174 |
+
other = tda._ndarray + tda._ndarray[-1]
|
| 175 |
+
result = tda / other
|
| 176 |
+
expected = tda._ndarray / other
|
| 177 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 178 |
+
|
| 179 |
+
def test_add_timedeltaarraylike(self, tda):
|
| 180 |
+
tda_nano = tda.astype("m8[ns]")
|
| 181 |
+
|
| 182 |
+
expected = tda_nano * 2
|
| 183 |
+
res = tda_nano + tda
|
| 184 |
+
tm.assert_extension_array_equal(res, expected)
|
| 185 |
+
res = tda + tda_nano
|
| 186 |
+
tm.assert_extension_array_equal(res, expected)
|
| 187 |
+
|
| 188 |
+
expected = tda_nano * 0
|
| 189 |
+
res = tda - tda_nano
|
| 190 |
+
tm.assert_extension_array_equal(res, expected)
|
| 191 |
+
|
| 192 |
+
res = tda_nano - tda
|
| 193 |
+
tm.assert_extension_array_equal(res, expected)
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
class TestTimedeltaArray:
|
| 197 |
+
@pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"])
|
| 198 |
+
def test_astype_int(self, dtype):
|
| 199 |
+
arr = TimedeltaArray._from_sequence(
|
| 200 |
+
[Timedelta("1h"), Timedelta("2h")], dtype="m8[ns]"
|
| 201 |
+
)
|
| 202 |
+
|
| 203 |
+
if np.dtype(dtype) != np.int64:
|
| 204 |
+
with pytest.raises(TypeError, match=r"Do obj.astype\('int64'\)"):
|
| 205 |
+
arr.astype(dtype)
|
| 206 |
+
return
|
| 207 |
+
|
| 208 |
+
result = arr.astype(dtype)
|
| 209 |
+
expected = arr._ndarray.view("i8")
|
| 210 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 211 |
+
|
| 212 |
+
def test_setitem_clears_freq(self):
|
| 213 |
+
a = pd.timedelta_range("1h", periods=2, freq="h")._data
|
| 214 |
+
a[0] = Timedelta("1h")
|
| 215 |
+
assert a.freq is None
|
| 216 |
+
|
| 217 |
+
@pytest.mark.parametrize(
|
| 218 |
+
"obj",
|
| 219 |
+
[
|
| 220 |
+
Timedelta(seconds=1),
|
| 221 |
+
Timedelta(seconds=1).to_timedelta64(),
|
| 222 |
+
Timedelta(seconds=1).to_pytimedelta(),
|
| 223 |
+
],
|
| 224 |
+
)
|
| 225 |
+
def test_setitem_objects(self, obj):
|
| 226 |
+
# make sure we accept timedelta64 and timedelta in addition to Timedelta
|
| 227 |
+
tdi = pd.timedelta_range("2 Days", periods=4, freq="h")
|
| 228 |
+
arr = tdi._data
|
| 229 |
+
|
| 230 |
+
arr[0] = obj
|
| 231 |
+
assert arr[0] == Timedelta(seconds=1)
|
| 232 |
+
|
| 233 |
+
@pytest.mark.parametrize(
|
| 234 |
+
"other",
|
| 235 |
+
[
|
| 236 |
+
1,
|
| 237 |
+
np.int64(1),
|
| 238 |
+
1.0,
|
| 239 |
+
np.datetime64("NaT"),
|
| 240 |
+
pd.Timestamp("2021-01-01"),
|
| 241 |
+
"invalid",
|
| 242 |
+
np.arange(10, dtype="i8") * 24 * 3600 * 10**9,
|
| 243 |
+
(np.arange(10) * 24 * 3600 * 10**9).view("datetime64[ns]"),
|
| 244 |
+
pd.Timestamp("2021-01-01").to_period("D"),
|
| 245 |
+
],
|
| 246 |
+
)
|
| 247 |
+
@pytest.mark.parametrize("index", [True, False])
|
| 248 |
+
def test_searchsorted_invalid_types(self, other, index):
|
| 249 |
+
data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
|
| 250 |
+
arr = pd.TimedeltaIndex(data, freq="D")._data
|
| 251 |
+
if index:
|
| 252 |
+
arr = pd.Index(arr)
|
| 253 |
+
|
| 254 |
+
msg = "|".join(
|
| 255 |
+
[
|
| 256 |
+
"searchsorted requires compatible dtype or scalar",
|
| 257 |
+
"value should be a 'Timedelta', 'NaT', or array of those. Got",
|
| 258 |
+
]
|
| 259 |
+
)
|
| 260 |
+
with pytest.raises(TypeError, match=msg):
|
| 261 |
+
arr.searchsorted(other)
|
| 262 |
+
|
| 263 |
+
|
| 264 |
+
class TestUnaryOps:
|
| 265 |
+
def test_abs(self):
|
| 266 |
+
vals = np.array([-3600 * 10**9, "NaT", 7200 * 10**9], dtype="m8[ns]")
|
| 267 |
+
arr = TimedeltaArray._from_sequence(vals)
|
| 268 |
+
|
| 269 |
+
evals = np.array([3600 * 10**9, "NaT", 7200 * 10**9], dtype="m8[ns]")
|
| 270 |
+
expected = TimedeltaArray._from_sequence(evals)
|
| 271 |
+
|
| 272 |
+
result = abs(arr)
|
| 273 |
+
tm.assert_timedelta_array_equal(result, expected)
|
| 274 |
+
|
| 275 |
+
result2 = np.abs(arr)
|
| 276 |
+
tm.assert_timedelta_array_equal(result2, expected)
|
| 277 |
+
|
| 278 |
+
def test_pos(self):
|
| 279 |
+
vals = np.array([-3600 * 10**9, "NaT", 7200 * 10**9], dtype="m8[ns]")
|
| 280 |
+
arr = TimedeltaArray._from_sequence(vals)
|
| 281 |
+
|
| 282 |
+
result = +arr
|
| 283 |
+
tm.assert_timedelta_array_equal(result, arr)
|
| 284 |
+
assert not tm.shares_memory(result, arr)
|
| 285 |
+
|
| 286 |
+
result2 = np.positive(arr)
|
| 287 |
+
tm.assert_timedelta_array_equal(result2, arr)
|
| 288 |
+
assert not tm.shares_memory(result2, arr)
|
| 289 |
+
|
| 290 |
+
def test_neg(self):
|
| 291 |
+
vals = np.array([-3600 * 10**9, "NaT", 7200 * 10**9], dtype="m8[ns]")
|
| 292 |
+
arr = TimedeltaArray._from_sequence(vals)
|
| 293 |
+
|
| 294 |
+
evals = np.array([3600 * 10**9, "NaT", -7200 * 10**9], dtype="m8[ns]")
|
| 295 |
+
expected = TimedeltaArray._from_sequence(evals)
|
| 296 |
+
|
| 297 |
+
result = -arr
|
| 298 |
+
tm.assert_timedelta_array_equal(result, expected)
|
| 299 |
+
|
| 300 |
+
result2 = np.negative(arr)
|
| 301 |
+
tm.assert_timedelta_array_equal(result2, expected)
|
| 302 |
+
|
| 303 |
+
def test_neg_freq(self):
|
| 304 |
+
tdi = pd.timedelta_range("2 Days", periods=4, freq="h")
|
| 305 |
+
arr = tdi._data
|
| 306 |
+
|
| 307 |
+
expected = -tdi._data
|
| 308 |
+
|
| 309 |
+
result = -arr
|
| 310 |
+
tm.assert_timedelta_array_equal(result, expected)
|
| 311 |
+
|
| 312 |
+
result2 = np.negative(arr)
|
| 313 |
+
tm.assert_timedelta_array_equal(result2, expected)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/base/__init__.py
ADDED
|
File without changes
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/base/common.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Any
|
| 2 |
+
|
| 3 |
+
from pandas import Index
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def allow_na_ops(obj: Any) -> bool:
|
| 7 |
+
"""Whether to skip test cases including NaN"""
|
| 8 |
+
is_bool_index = isinstance(obj, Index) and obj.inferred_type == "boolean"
|
| 9 |
+
return not is_bool_index and obj._can_hold_na
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/base/test_constructors.py
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datetime import datetime
|
| 2 |
+
import sys
|
| 3 |
+
|
| 4 |
+
import numpy as np
|
| 5 |
+
import pytest
|
| 6 |
+
|
| 7 |
+
from pandas.compat import PYPY
|
| 8 |
+
|
| 9 |
+
import pandas as pd
|
| 10 |
+
from pandas import (
|
| 11 |
+
DataFrame,
|
| 12 |
+
Index,
|
| 13 |
+
Series,
|
| 14 |
+
)
|
| 15 |
+
import pandas._testing as tm
|
| 16 |
+
from pandas.core.accessor import PandasDelegate
|
| 17 |
+
from pandas.core.base import (
|
| 18 |
+
NoNewAttributesMixin,
|
| 19 |
+
PandasObject,
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def series_via_frame_from_dict(x, **kwargs):
|
| 24 |
+
return DataFrame({"a": x}, **kwargs)["a"]
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def series_via_frame_from_scalar(x, **kwargs):
|
| 28 |
+
return DataFrame(x, **kwargs)[0]
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
@pytest.fixture(
|
| 32 |
+
params=[
|
| 33 |
+
Series,
|
| 34 |
+
series_via_frame_from_dict,
|
| 35 |
+
series_via_frame_from_scalar,
|
| 36 |
+
Index,
|
| 37 |
+
],
|
| 38 |
+
ids=["Series", "DataFrame-dict", "DataFrame-array", "Index"],
|
| 39 |
+
)
|
| 40 |
+
def constructor(request):
|
| 41 |
+
return request.param
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
class TestPandasDelegate:
|
| 45 |
+
class Delegator:
|
| 46 |
+
_properties = ["prop"]
|
| 47 |
+
_methods = ["test_method"]
|
| 48 |
+
|
| 49 |
+
def _set_prop(self, value):
|
| 50 |
+
self.prop = value
|
| 51 |
+
|
| 52 |
+
def _get_prop(self):
|
| 53 |
+
return self.prop
|
| 54 |
+
|
| 55 |
+
prop = property(_get_prop, _set_prop, doc="foo property")
|
| 56 |
+
|
| 57 |
+
def test_method(self, *args, **kwargs):
|
| 58 |
+
"""a test method"""
|
| 59 |
+
|
| 60 |
+
class Delegate(PandasDelegate, PandasObject):
|
| 61 |
+
def __init__(self, obj) -> None:
|
| 62 |
+
self.obj = obj
|
| 63 |
+
|
| 64 |
+
def test_invalid_delegation(self):
|
| 65 |
+
# these show that in order for the delegation to work
|
| 66 |
+
# the _delegate_* methods need to be overridden to not raise
|
| 67 |
+
# a TypeError
|
| 68 |
+
|
| 69 |
+
self.Delegate._add_delegate_accessors(
|
| 70 |
+
delegate=self.Delegator,
|
| 71 |
+
accessors=self.Delegator._properties,
|
| 72 |
+
typ="property",
|
| 73 |
+
)
|
| 74 |
+
self.Delegate._add_delegate_accessors(
|
| 75 |
+
delegate=self.Delegator, accessors=self.Delegator._methods, typ="method"
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
delegate = self.Delegate(self.Delegator())
|
| 79 |
+
|
| 80 |
+
msg = "You cannot access the property prop"
|
| 81 |
+
with pytest.raises(TypeError, match=msg):
|
| 82 |
+
delegate.prop
|
| 83 |
+
|
| 84 |
+
msg = "The property prop cannot be set"
|
| 85 |
+
with pytest.raises(TypeError, match=msg):
|
| 86 |
+
delegate.prop = 5
|
| 87 |
+
|
| 88 |
+
msg = "You cannot access the property prop"
|
| 89 |
+
with pytest.raises(TypeError, match=msg):
|
| 90 |
+
delegate.prop
|
| 91 |
+
|
| 92 |
+
@pytest.mark.skipif(PYPY, reason="not relevant for PyPy")
|
| 93 |
+
def test_memory_usage(self):
|
| 94 |
+
# Delegate does not implement memory_usage.
|
| 95 |
+
# Check that we fall back to in-built `__sizeof__`
|
| 96 |
+
# GH 12924
|
| 97 |
+
delegate = self.Delegate(self.Delegator())
|
| 98 |
+
sys.getsizeof(delegate)
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
class TestNoNewAttributesMixin:
|
| 102 |
+
def test_mixin(self):
|
| 103 |
+
class T(NoNewAttributesMixin):
|
| 104 |
+
pass
|
| 105 |
+
|
| 106 |
+
t = T()
|
| 107 |
+
assert not hasattr(t, "__frozen")
|
| 108 |
+
|
| 109 |
+
t.a = "test"
|
| 110 |
+
assert t.a == "test"
|
| 111 |
+
|
| 112 |
+
t._freeze()
|
| 113 |
+
assert "__frozen" in dir(t)
|
| 114 |
+
assert getattr(t, "__frozen")
|
| 115 |
+
msg = "You cannot add any new attribute"
|
| 116 |
+
with pytest.raises(AttributeError, match=msg):
|
| 117 |
+
t.b = "test"
|
| 118 |
+
|
| 119 |
+
assert not hasattr(t, "b")
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
class TestConstruction:
|
| 123 |
+
# test certain constructor behaviours on dtype inference across Series,
|
| 124 |
+
# Index and DataFrame
|
| 125 |
+
|
| 126 |
+
@pytest.mark.parametrize(
|
| 127 |
+
"a",
|
| 128 |
+
[
|
| 129 |
+
np.array(["2263-01-01"], dtype="datetime64[D]"),
|
| 130 |
+
np.array([datetime(2263, 1, 1)], dtype=object),
|
| 131 |
+
np.array([np.datetime64("2263-01-01", "D")], dtype=object),
|
| 132 |
+
np.array(["2263-01-01"], dtype=object),
|
| 133 |
+
],
|
| 134 |
+
ids=[
|
| 135 |
+
"datetime64[D]",
|
| 136 |
+
"object-datetime.datetime",
|
| 137 |
+
"object-numpy-scalar",
|
| 138 |
+
"object-string",
|
| 139 |
+
],
|
| 140 |
+
)
|
| 141 |
+
def test_constructor_datetime_outofbound(
|
| 142 |
+
self, a, constructor, request, using_infer_string
|
| 143 |
+
):
|
| 144 |
+
# GH-26853 (+ bug GH-26206 out of bound non-ns unit)
|
| 145 |
+
|
| 146 |
+
# No dtype specified (dtype inference)
|
| 147 |
+
# datetime64[non-ns] raise error, other cases result in object dtype
|
| 148 |
+
# and preserve original data
|
| 149 |
+
if a.dtype.kind == "M":
|
| 150 |
+
# Can't fit in nanosecond bounds -> get the nearest supported unit
|
| 151 |
+
result = constructor(a)
|
| 152 |
+
assert result.dtype == "M8[s]"
|
| 153 |
+
else:
|
| 154 |
+
result = constructor(a)
|
| 155 |
+
if using_infer_string and "object-string" in request.node.callspec.id:
|
| 156 |
+
assert result.dtype == "string"
|
| 157 |
+
else:
|
| 158 |
+
assert result.dtype == "object"
|
| 159 |
+
tm.assert_numpy_array_equal(result.to_numpy(), a)
|
| 160 |
+
|
| 161 |
+
# Explicit dtype specified
|
| 162 |
+
# Forced conversion fails for all -> all cases raise error
|
| 163 |
+
msg = "Out of bounds|Out of bounds .* present at position 0"
|
| 164 |
+
with pytest.raises(pd.errors.OutOfBoundsDatetime, match=msg):
|
| 165 |
+
constructor(a, dtype="datetime64[ns]")
|
| 166 |
+
|
| 167 |
+
def test_constructor_datetime_nonns(self, constructor):
|
| 168 |
+
arr = np.array(["2020-01-01T00:00:00.000000"], dtype="datetime64[us]")
|
| 169 |
+
dta = pd.core.arrays.DatetimeArray._simple_new(arr, dtype=arr.dtype)
|
| 170 |
+
expected = constructor(dta)
|
| 171 |
+
assert expected.dtype == arr.dtype
|
| 172 |
+
|
| 173 |
+
result = constructor(arr)
|
| 174 |
+
tm.assert_equal(result, expected)
|
| 175 |
+
|
| 176 |
+
# https://github.com/pandas-dev/pandas/issues/34843
|
| 177 |
+
arr.flags.writeable = False
|
| 178 |
+
result = constructor(arr)
|
| 179 |
+
tm.assert_equal(result, expected)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/base/test_conversion.py
ADDED
|
@@ -0,0 +1,562 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
from pandas.core.dtypes.dtypes import DatetimeTZDtype
|
| 5 |
+
|
| 6 |
+
import pandas as pd
|
| 7 |
+
from pandas import (
|
| 8 |
+
CategoricalIndex,
|
| 9 |
+
Series,
|
| 10 |
+
Timedelta,
|
| 11 |
+
Timestamp,
|
| 12 |
+
date_range,
|
| 13 |
+
)
|
| 14 |
+
import pandas._testing as tm
|
| 15 |
+
from pandas.core.arrays import (
|
| 16 |
+
DatetimeArray,
|
| 17 |
+
IntervalArray,
|
| 18 |
+
NumpyExtensionArray,
|
| 19 |
+
PeriodArray,
|
| 20 |
+
SparseArray,
|
| 21 |
+
TimedeltaArray,
|
| 22 |
+
)
|
| 23 |
+
from pandas.core.arrays.string_arrow import ArrowStringArrayNumpySemantics
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class TestToIterable:
|
| 27 |
+
# test that we convert an iterable to python types
|
| 28 |
+
|
| 29 |
+
dtypes = [
|
| 30 |
+
("int8", int),
|
| 31 |
+
("int16", int),
|
| 32 |
+
("int32", int),
|
| 33 |
+
("int64", int),
|
| 34 |
+
("uint8", int),
|
| 35 |
+
("uint16", int),
|
| 36 |
+
("uint32", int),
|
| 37 |
+
("uint64", int),
|
| 38 |
+
("float16", float),
|
| 39 |
+
("float32", float),
|
| 40 |
+
("float64", float),
|
| 41 |
+
("datetime64[ns]", Timestamp),
|
| 42 |
+
("datetime64[ns, US/Eastern]", Timestamp),
|
| 43 |
+
("timedelta64[ns]", Timedelta),
|
| 44 |
+
]
|
| 45 |
+
|
| 46 |
+
@pytest.mark.parametrize("dtype, rdtype", dtypes)
|
| 47 |
+
@pytest.mark.parametrize(
|
| 48 |
+
"method",
|
| 49 |
+
[
|
| 50 |
+
lambda x: x.tolist(),
|
| 51 |
+
lambda x: x.to_list(),
|
| 52 |
+
lambda x: list(x),
|
| 53 |
+
lambda x: list(x.__iter__()),
|
| 54 |
+
],
|
| 55 |
+
ids=["tolist", "to_list", "list", "iter"],
|
| 56 |
+
)
|
| 57 |
+
def test_iterable(self, index_or_series, method, dtype, rdtype):
|
| 58 |
+
# gh-10904
|
| 59 |
+
# gh-13258
|
| 60 |
+
# coerce iteration to underlying python / pandas types
|
| 61 |
+
typ = index_or_series
|
| 62 |
+
if dtype == "float16" and issubclass(typ, pd.Index):
|
| 63 |
+
with pytest.raises(NotImplementedError, match="float16 indexes are not "):
|
| 64 |
+
typ([1], dtype=dtype)
|
| 65 |
+
return
|
| 66 |
+
s = typ([1], dtype=dtype)
|
| 67 |
+
result = method(s)[0]
|
| 68 |
+
assert isinstance(result, rdtype)
|
| 69 |
+
|
| 70 |
+
@pytest.mark.parametrize(
|
| 71 |
+
"dtype, rdtype, obj",
|
| 72 |
+
[
|
| 73 |
+
("object", object, "a"),
|
| 74 |
+
("object", int, 1),
|
| 75 |
+
("category", object, "a"),
|
| 76 |
+
("category", int, 1),
|
| 77 |
+
],
|
| 78 |
+
)
|
| 79 |
+
@pytest.mark.parametrize(
|
| 80 |
+
"method",
|
| 81 |
+
[
|
| 82 |
+
lambda x: x.tolist(),
|
| 83 |
+
lambda x: x.to_list(),
|
| 84 |
+
lambda x: list(x),
|
| 85 |
+
lambda x: list(x.__iter__()),
|
| 86 |
+
],
|
| 87 |
+
ids=["tolist", "to_list", "list", "iter"],
|
| 88 |
+
)
|
| 89 |
+
def test_iterable_object_and_category(
|
| 90 |
+
self, index_or_series, method, dtype, rdtype, obj
|
| 91 |
+
):
|
| 92 |
+
# gh-10904
|
| 93 |
+
# gh-13258
|
| 94 |
+
# coerce iteration to underlying python / pandas types
|
| 95 |
+
typ = index_or_series
|
| 96 |
+
s = typ([obj], dtype=dtype)
|
| 97 |
+
result = method(s)[0]
|
| 98 |
+
assert isinstance(result, rdtype)
|
| 99 |
+
|
| 100 |
+
@pytest.mark.parametrize("dtype, rdtype", dtypes)
|
| 101 |
+
def test_iterable_items(self, dtype, rdtype):
|
| 102 |
+
# gh-13258
|
| 103 |
+
# test if items yields the correct boxed scalars
|
| 104 |
+
# this only applies to series
|
| 105 |
+
s = Series([1], dtype=dtype)
|
| 106 |
+
_, result = next(iter(s.items()))
|
| 107 |
+
assert isinstance(result, rdtype)
|
| 108 |
+
|
| 109 |
+
_, result = next(iter(s.items()))
|
| 110 |
+
assert isinstance(result, rdtype)
|
| 111 |
+
|
| 112 |
+
@pytest.mark.parametrize(
|
| 113 |
+
"dtype, rdtype", dtypes + [("object", int), ("category", int)]
|
| 114 |
+
)
|
| 115 |
+
def test_iterable_map(self, index_or_series, dtype, rdtype):
|
| 116 |
+
# gh-13236
|
| 117 |
+
# coerce iteration to underlying python / pandas types
|
| 118 |
+
typ = index_or_series
|
| 119 |
+
if dtype == "float16" and issubclass(typ, pd.Index):
|
| 120 |
+
with pytest.raises(NotImplementedError, match="float16 indexes are not "):
|
| 121 |
+
typ([1], dtype=dtype)
|
| 122 |
+
return
|
| 123 |
+
s = typ([1], dtype=dtype)
|
| 124 |
+
result = s.map(type)[0]
|
| 125 |
+
if not isinstance(rdtype, tuple):
|
| 126 |
+
rdtype = (rdtype,)
|
| 127 |
+
assert result in rdtype
|
| 128 |
+
|
| 129 |
+
@pytest.mark.parametrize(
|
| 130 |
+
"method",
|
| 131 |
+
[
|
| 132 |
+
lambda x: x.tolist(),
|
| 133 |
+
lambda x: x.to_list(),
|
| 134 |
+
lambda x: list(x),
|
| 135 |
+
lambda x: list(x.__iter__()),
|
| 136 |
+
],
|
| 137 |
+
ids=["tolist", "to_list", "list", "iter"],
|
| 138 |
+
)
|
| 139 |
+
def test_categorial_datetimelike(self, method):
|
| 140 |
+
i = CategoricalIndex([Timestamp("1999-12-31"), Timestamp("2000-12-31")])
|
| 141 |
+
|
| 142 |
+
result = method(i)[0]
|
| 143 |
+
assert isinstance(result, Timestamp)
|
| 144 |
+
|
| 145 |
+
def test_iter_box_dt64(self, unit):
|
| 146 |
+
vals = [Timestamp("2011-01-01"), Timestamp("2011-01-02")]
|
| 147 |
+
ser = Series(vals).dt.as_unit(unit)
|
| 148 |
+
assert ser.dtype == f"datetime64[{unit}]"
|
| 149 |
+
for res, exp in zip(ser, vals):
|
| 150 |
+
assert isinstance(res, Timestamp)
|
| 151 |
+
assert res.tz is None
|
| 152 |
+
assert res == exp
|
| 153 |
+
assert res.unit == unit
|
| 154 |
+
|
| 155 |
+
def test_iter_box_dt64tz(self, unit):
|
| 156 |
+
vals = [
|
| 157 |
+
Timestamp("2011-01-01", tz="US/Eastern"),
|
| 158 |
+
Timestamp("2011-01-02", tz="US/Eastern"),
|
| 159 |
+
]
|
| 160 |
+
ser = Series(vals).dt.as_unit(unit)
|
| 161 |
+
|
| 162 |
+
assert ser.dtype == f"datetime64[{unit}, US/Eastern]"
|
| 163 |
+
for res, exp in zip(ser, vals):
|
| 164 |
+
assert isinstance(res, Timestamp)
|
| 165 |
+
assert res.tz == exp.tz
|
| 166 |
+
assert res == exp
|
| 167 |
+
assert res.unit == unit
|
| 168 |
+
|
| 169 |
+
def test_iter_box_timedelta64(self, unit):
|
| 170 |
+
# timedelta
|
| 171 |
+
vals = [Timedelta("1 days"), Timedelta("2 days")]
|
| 172 |
+
ser = Series(vals).dt.as_unit(unit)
|
| 173 |
+
assert ser.dtype == f"timedelta64[{unit}]"
|
| 174 |
+
for res, exp in zip(ser, vals):
|
| 175 |
+
assert isinstance(res, Timedelta)
|
| 176 |
+
assert res == exp
|
| 177 |
+
assert res.unit == unit
|
| 178 |
+
|
| 179 |
+
def test_iter_box_period(self):
|
| 180 |
+
# period
|
| 181 |
+
vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")]
|
| 182 |
+
s = Series(vals)
|
| 183 |
+
assert s.dtype == "Period[M]"
|
| 184 |
+
for res, exp in zip(s, vals):
|
| 185 |
+
assert isinstance(res, pd.Period)
|
| 186 |
+
assert res.freq == "ME"
|
| 187 |
+
assert res == exp
|
| 188 |
+
|
| 189 |
+
|
| 190 |
+
@pytest.mark.parametrize(
|
| 191 |
+
"arr, expected_type, dtype",
|
| 192 |
+
[
|
| 193 |
+
(np.array([0, 1], dtype=np.int64), np.ndarray, "int64"),
|
| 194 |
+
(np.array(["a", "b"]), np.ndarray, "object"),
|
| 195 |
+
(pd.Categorical(["a", "b"]), pd.Categorical, "category"),
|
| 196 |
+
(
|
| 197 |
+
pd.DatetimeIndex(["2017", "2018"], tz="US/Central"),
|
| 198 |
+
DatetimeArray,
|
| 199 |
+
"datetime64[ns, US/Central]",
|
| 200 |
+
),
|
| 201 |
+
(
|
| 202 |
+
pd.PeriodIndex([2018, 2019], freq="Y"),
|
| 203 |
+
PeriodArray,
|
| 204 |
+
pd.core.dtypes.dtypes.PeriodDtype("Y-DEC"),
|
| 205 |
+
),
|
| 206 |
+
(pd.IntervalIndex.from_breaks([0, 1, 2]), IntervalArray, "interval"),
|
| 207 |
+
(
|
| 208 |
+
pd.DatetimeIndex(["2017", "2018"]),
|
| 209 |
+
DatetimeArray,
|
| 210 |
+
"datetime64[ns]",
|
| 211 |
+
),
|
| 212 |
+
(
|
| 213 |
+
pd.TimedeltaIndex([10**10]),
|
| 214 |
+
TimedeltaArray,
|
| 215 |
+
"m8[ns]",
|
| 216 |
+
),
|
| 217 |
+
],
|
| 218 |
+
)
|
| 219 |
+
def test_values_consistent(arr, expected_type, dtype, using_infer_string):
|
| 220 |
+
if using_infer_string and dtype == "object":
|
| 221 |
+
expected_type = ArrowStringArrayNumpySemantics
|
| 222 |
+
l_values = Series(arr)._values
|
| 223 |
+
r_values = pd.Index(arr)._values
|
| 224 |
+
assert type(l_values) is expected_type
|
| 225 |
+
assert type(l_values) is type(r_values)
|
| 226 |
+
|
| 227 |
+
tm.assert_equal(l_values, r_values)
|
| 228 |
+
|
| 229 |
+
|
| 230 |
+
@pytest.mark.parametrize("arr", [np.array([1, 2, 3])])
|
| 231 |
+
def test_numpy_array(arr):
|
| 232 |
+
ser = Series(arr)
|
| 233 |
+
result = ser.array
|
| 234 |
+
expected = NumpyExtensionArray(arr)
|
| 235 |
+
tm.assert_extension_array_equal(result, expected)
|
| 236 |
+
|
| 237 |
+
|
| 238 |
+
def test_numpy_array_all_dtypes(any_numpy_dtype):
|
| 239 |
+
ser = Series(dtype=any_numpy_dtype)
|
| 240 |
+
result = ser.array
|
| 241 |
+
if np.dtype(any_numpy_dtype).kind == "M":
|
| 242 |
+
assert isinstance(result, DatetimeArray)
|
| 243 |
+
elif np.dtype(any_numpy_dtype).kind == "m":
|
| 244 |
+
assert isinstance(result, TimedeltaArray)
|
| 245 |
+
else:
|
| 246 |
+
assert isinstance(result, NumpyExtensionArray)
|
| 247 |
+
|
| 248 |
+
|
| 249 |
+
@pytest.mark.parametrize(
|
| 250 |
+
"arr, attr",
|
| 251 |
+
[
|
| 252 |
+
(pd.Categorical(["a", "b"]), "_codes"),
|
| 253 |
+
(PeriodArray._from_sequence(["2000", "2001"], dtype="period[D]"), "_ndarray"),
|
| 254 |
+
(pd.array([0, np.nan], dtype="Int64"), "_data"),
|
| 255 |
+
(IntervalArray.from_breaks([0, 1]), "_left"),
|
| 256 |
+
(SparseArray([0, 1]), "_sparse_values"),
|
| 257 |
+
(
|
| 258 |
+
DatetimeArray._from_sequence(np.array([1, 2], dtype="datetime64[ns]")),
|
| 259 |
+
"_ndarray",
|
| 260 |
+
),
|
| 261 |
+
# tz-aware Datetime
|
| 262 |
+
(
|
| 263 |
+
DatetimeArray._from_sequence(
|
| 264 |
+
np.array(
|
| 265 |
+
["2000-01-01T12:00:00", "2000-01-02T12:00:00"], dtype="M8[ns]"
|
| 266 |
+
),
|
| 267 |
+
dtype=DatetimeTZDtype(tz="US/Central"),
|
| 268 |
+
),
|
| 269 |
+
"_ndarray",
|
| 270 |
+
),
|
| 271 |
+
],
|
| 272 |
+
)
|
| 273 |
+
def test_array(arr, attr, index_or_series, request):
|
| 274 |
+
box = index_or_series
|
| 275 |
+
|
| 276 |
+
result = box(arr, copy=False).array
|
| 277 |
+
|
| 278 |
+
if attr:
|
| 279 |
+
arr = getattr(arr, attr)
|
| 280 |
+
result = getattr(result, attr)
|
| 281 |
+
|
| 282 |
+
assert result is arr
|
| 283 |
+
|
| 284 |
+
|
| 285 |
+
def test_array_multiindex_raises():
|
| 286 |
+
idx = pd.MultiIndex.from_product([["A"], ["a", "b"]])
|
| 287 |
+
msg = "MultiIndex has no single backing array"
|
| 288 |
+
with pytest.raises(ValueError, match=msg):
|
| 289 |
+
idx.array
|
| 290 |
+
|
| 291 |
+
|
| 292 |
+
@pytest.mark.parametrize(
|
| 293 |
+
"arr, expected",
|
| 294 |
+
[
|
| 295 |
+
(np.array([1, 2], dtype=np.int64), np.array([1, 2], dtype=np.int64)),
|
| 296 |
+
(pd.Categorical(["a", "b"]), np.array(["a", "b"], dtype=object)),
|
| 297 |
+
(
|
| 298 |
+
pd.core.arrays.period_array(["2000", "2001"], freq="D"),
|
| 299 |
+
np.array([pd.Period("2000", freq="D"), pd.Period("2001", freq="D")]),
|
| 300 |
+
),
|
| 301 |
+
(pd.array([0, np.nan], dtype="Int64"), np.array([0, np.nan])),
|
| 302 |
+
(
|
| 303 |
+
IntervalArray.from_breaks([0, 1, 2]),
|
| 304 |
+
np.array([pd.Interval(0, 1), pd.Interval(1, 2)], dtype=object),
|
| 305 |
+
),
|
| 306 |
+
(SparseArray([0, 1]), np.array([0, 1], dtype=np.int64)),
|
| 307 |
+
# tz-naive datetime
|
| 308 |
+
(
|
| 309 |
+
DatetimeArray._from_sequence(np.array(["2000", "2001"], dtype="M8[ns]")),
|
| 310 |
+
np.array(["2000", "2001"], dtype="M8[ns]"),
|
| 311 |
+
),
|
| 312 |
+
# tz-aware stays tz`-aware
|
| 313 |
+
(
|
| 314 |
+
DatetimeArray._from_sequence(
|
| 315 |
+
np.array(["2000-01-01T06:00:00", "2000-01-02T06:00:00"], dtype="M8[ns]")
|
| 316 |
+
)
|
| 317 |
+
.tz_localize("UTC")
|
| 318 |
+
.tz_convert("US/Central"),
|
| 319 |
+
np.array(
|
| 320 |
+
[
|
| 321 |
+
Timestamp("2000-01-01", tz="US/Central"),
|
| 322 |
+
Timestamp("2000-01-02", tz="US/Central"),
|
| 323 |
+
]
|
| 324 |
+
),
|
| 325 |
+
),
|
| 326 |
+
# Timedelta
|
| 327 |
+
(
|
| 328 |
+
TimedeltaArray._from_sequence(
|
| 329 |
+
np.array([0, 3600000000000], dtype="i8").view("m8[ns]")
|
| 330 |
+
),
|
| 331 |
+
np.array([0, 3600000000000], dtype="m8[ns]"),
|
| 332 |
+
),
|
| 333 |
+
# GH#26406 tz is preserved in Categorical[dt64tz]
|
| 334 |
+
(
|
| 335 |
+
pd.Categorical(date_range("2016-01-01", periods=2, tz="US/Pacific")),
|
| 336 |
+
np.array(
|
| 337 |
+
[
|
| 338 |
+
Timestamp("2016-01-01", tz="US/Pacific"),
|
| 339 |
+
Timestamp("2016-01-02", tz="US/Pacific"),
|
| 340 |
+
]
|
| 341 |
+
),
|
| 342 |
+
),
|
| 343 |
+
],
|
| 344 |
+
)
|
| 345 |
+
def test_to_numpy(arr, expected, index_or_series_or_array, request):
|
| 346 |
+
box = index_or_series_or_array
|
| 347 |
+
|
| 348 |
+
with tm.assert_produces_warning(None):
|
| 349 |
+
thing = box(arr)
|
| 350 |
+
|
| 351 |
+
result = thing.to_numpy()
|
| 352 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 353 |
+
|
| 354 |
+
result = np.asarray(thing)
|
| 355 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 356 |
+
|
| 357 |
+
|
| 358 |
+
@pytest.mark.parametrize("as_series", [True, False])
|
| 359 |
+
@pytest.mark.parametrize(
|
| 360 |
+
"arr", [np.array([1, 2, 3], dtype="int64"), np.array(["a", "b", "c"], dtype=object)]
|
| 361 |
+
)
|
| 362 |
+
def test_to_numpy_copy(arr, as_series, using_infer_string):
|
| 363 |
+
obj = pd.Index(arr, copy=False)
|
| 364 |
+
if as_series:
|
| 365 |
+
obj = Series(obj.values, copy=False)
|
| 366 |
+
|
| 367 |
+
# no copy by default
|
| 368 |
+
result = obj.to_numpy()
|
| 369 |
+
if using_infer_string and arr.dtype == object:
|
| 370 |
+
assert np.shares_memory(arr, result) is False
|
| 371 |
+
else:
|
| 372 |
+
assert np.shares_memory(arr, result) is True
|
| 373 |
+
|
| 374 |
+
result = obj.to_numpy(copy=False)
|
| 375 |
+
if using_infer_string and arr.dtype == object:
|
| 376 |
+
assert np.shares_memory(arr, result) is False
|
| 377 |
+
else:
|
| 378 |
+
assert np.shares_memory(arr, result) is True
|
| 379 |
+
|
| 380 |
+
# copy=True
|
| 381 |
+
result = obj.to_numpy(copy=True)
|
| 382 |
+
assert np.shares_memory(arr, result) is False
|
| 383 |
+
|
| 384 |
+
|
| 385 |
+
@pytest.mark.parametrize("as_series", [True, False])
|
| 386 |
+
def test_to_numpy_dtype(as_series, unit):
|
| 387 |
+
tz = "US/Eastern"
|
| 388 |
+
obj = pd.DatetimeIndex(["2000", "2001"], tz=tz)
|
| 389 |
+
if as_series:
|
| 390 |
+
obj = Series(obj)
|
| 391 |
+
|
| 392 |
+
# preserve tz by default
|
| 393 |
+
result = obj.to_numpy()
|
| 394 |
+
expected = np.array(
|
| 395 |
+
[Timestamp("2000", tz=tz), Timestamp("2001", tz=tz)], dtype=object
|
| 396 |
+
)
|
| 397 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 398 |
+
|
| 399 |
+
result = obj.to_numpy(dtype="object")
|
| 400 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 401 |
+
|
| 402 |
+
result = obj.to_numpy(dtype="M8[ns]")
|
| 403 |
+
expected = np.array(["2000-01-01T05", "2001-01-01T05"], dtype="M8[ns]")
|
| 404 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 405 |
+
|
| 406 |
+
|
| 407 |
+
@pytest.mark.parametrize(
|
| 408 |
+
"values, dtype, na_value, expected",
|
| 409 |
+
[
|
| 410 |
+
([1, 2, None], "float64", 0, [1.0, 2.0, 0.0]),
|
| 411 |
+
(
|
| 412 |
+
[Timestamp("2000"), Timestamp("2000"), pd.NaT],
|
| 413 |
+
None,
|
| 414 |
+
Timestamp("2000"),
|
| 415 |
+
[np.datetime64("2000-01-01T00:00:00.000000000")] * 3,
|
| 416 |
+
),
|
| 417 |
+
],
|
| 418 |
+
)
|
| 419 |
+
def test_to_numpy_na_value_numpy_dtype(
|
| 420 |
+
index_or_series, values, dtype, na_value, expected
|
| 421 |
+
):
|
| 422 |
+
obj = index_or_series(values)
|
| 423 |
+
result = obj.to_numpy(dtype=dtype, na_value=na_value)
|
| 424 |
+
expected = np.array(expected)
|
| 425 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 426 |
+
|
| 427 |
+
|
| 428 |
+
@pytest.mark.parametrize(
|
| 429 |
+
"data, multiindex, dtype, na_value, expected",
|
| 430 |
+
[
|
| 431 |
+
(
|
| 432 |
+
[1, 2, None, 4],
|
| 433 |
+
[(0, "a"), (0, "b"), (1, "b"), (1, "c")],
|
| 434 |
+
float,
|
| 435 |
+
None,
|
| 436 |
+
[1.0, 2.0, np.nan, 4.0],
|
| 437 |
+
),
|
| 438 |
+
(
|
| 439 |
+
[1, 2, None, 4],
|
| 440 |
+
[(0, "a"), (0, "b"), (1, "b"), (1, "c")],
|
| 441 |
+
float,
|
| 442 |
+
np.nan,
|
| 443 |
+
[1.0, 2.0, np.nan, 4.0],
|
| 444 |
+
),
|
| 445 |
+
(
|
| 446 |
+
[1.0, 2.0, np.nan, 4.0],
|
| 447 |
+
[("a", 0), ("a", 1), ("a", 2), ("b", 0)],
|
| 448 |
+
int,
|
| 449 |
+
0,
|
| 450 |
+
[1, 2, 0, 4],
|
| 451 |
+
),
|
| 452 |
+
(
|
| 453 |
+
[Timestamp("2000"), Timestamp("2000"), pd.NaT],
|
| 454 |
+
[(0, Timestamp("2021")), (0, Timestamp("2022")), (1, Timestamp("2000"))],
|
| 455 |
+
None,
|
| 456 |
+
Timestamp("2000"),
|
| 457 |
+
[np.datetime64("2000-01-01T00:00:00.000000000")] * 3,
|
| 458 |
+
),
|
| 459 |
+
],
|
| 460 |
+
)
|
| 461 |
+
def test_to_numpy_multiindex_series_na_value(
|
| 462 |
+
data, multiindex, dtype, na_value, expected
|
| 463 |
+
):
|
| 464 |
+
index = pd.MultiIndex.from_tuples(multiindex)
|
| 465 |
+
series = Series(data, index=index)
|
| 466 |
+
result = series.to_numpy(dtype=dtype, na_value=na_value)
|
| 467 |
+
expected = np.array(expected)
|
| 468 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 469 |
+
|
| 470 |
+
|
| 471 |
+
def test_to_numpy_kwargs_raises():
|
| 472 |
+
# numpy
|
| 473 |
+
s = Series([1, 2, 3])
|
| 474 |
+
msg = r"to_numpy\(\) got an unexpected keyword argument 'foo'"
|
| 475 |
+
with pytest.raises(TypeError, match=msg):
|
| 476 |
+
s.to_numpy(foo=True)
|
| 477 |
+
|
| 478 |
+
# extension
|
| 479 |
+
s = Series([1, 2, 3], dtype="Int64")
|
| 480 |
+
with pytest.raises(TypeError, match=msg):
|
| 481 |
+
s.to_numpy(foo=True)
|
| 482 |
+
|
| 483 |
+
|
| 484 |
+
@pytest.mark.parametrize(
|
| 485 |
+
"data",
|
| 486 |
+
[
|
| 487 |
+
{"a": [1, 2, 3], "b": [1, 2, None]},
|
| 488 |
+
{"a": np.array([1, 2, 3]), "b": np.array([1, 2, np.nan])},
|
| 489 |
+
{"a": pd.array([1, 2, 3]), "b": pd.array([1, 2, None])},
|
| 490 |
+
],
|
| 491 |
+
)
|
| 492 |
+
@pytest.mark.parametrize("dtype, na_value", [(float, np.nan), (object, None)])
|
| 493 |
+
def test_to_numpy_dataframe_na_value(data, dtype, na_value):
|
| 494 |
+
# https://github.com/pandas-dev/pandas/issues/33820
|
| 495 |
+
df = pd.DataFrame(data)
|
| 496 |
+
result = df.to_numpy(dtype=dtype, na_value=na_value)
|
| 497 |
+
expected = np.array([[1, 1], [2, 2], [3, na_value]], dtype=dtype)
|
| 498 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 499 |
+
|
| 500 |
+
|
| 501 |
+
@pytest.mark.parametrize(
|
| 502 |
+
"data, expected",
|
| 503 |
+
[
|
| 504 |
+
(
|
| 505 |
+
{"a": pd.array([1, 2, None])},
|
| 506 |
+
np.array([[1.0], [2.0], [np.nan]], dtype=float),
|
| 507 |
+
),
|
| 508 |
+
(
|
| 509 |
+
{"a": [1, 2, 3], "b": [1, 2, 3]},
|
| 510 |
+
np.array([[1, 1], [2, 2], [3, 3]], dtype=float),
|
| 511 |
+
),
|
| 512 |
+
],
|
| 513 |
+
)
|
| 514 |
+
def test_to_numpy_dataframe_single_block(data, expected):
|
| 515 |
+
# https://github.com/pandas-dev/pandas/issues/33820
|
| 516 |
+
df = pd.DataFrame(data)
|
| 517 |
+
result = df.to_numpy(dtype=float, na_value=np.nan)
|
| 518 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 519 |
+
|
| 520 |
+
|
| 521 |
+
def test_to_numpy_dataframe_single_block_no_mutate():
|
| 522 |
+
# https://github.com/pandas-dev/pandas/issues/33820
|
| 523 |
+
result = pd.DataFrame(np.array([1.0, 2.0, np.nan]))
|
| 524 |
+
expected = pd.DataFrame(np.array([1.0, 2.0, np.nan]))
|
| 525 |
+
result.to_numpy(na_value=0.0)
|
| 526 |
+
tm.assert_frame_equal(result, expected)
|
| 527 |
+
|
| 528 |
+
|
| 529 |
+
class TestAsArray:
|
| 530 |
+
@pytest.mark.parametrize("tz", [None, "US/Central"])
|
| 531 |
+
def test_asarray_object_dt64(self, tz):
|
| 532 |
+
ser = Series(date_range("2000", periods=2, tz=tz))
|
| 533 |
+
|
| 534 |
+
with tm.assert_produces_warning(None):
|
| 535 |
+
# Future behavior (for tzaware case) with no warning
|
| 536 |
+
result = np.asarray(ser, dtype=object)
|
| 537 |
+
|
| 538 |
+
expected = np.array(
|
| 539 |
+
[Timestamp("2000-01-01", tz=tz), Timestamp("2000-01-02", tz=tz)]
|
| 540 |
+
)
|
| 541 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 542 |
+
|
| 543 |
+
def test_asarray_tz_naive(self):
|
| 544 |
+
# This shouldn't produce a warning.
|
| 545 |
+
ser = Series(date_range("2000", periods=2))
|
| 546 |
+
expected = np.array(["2000-01-01", "2000-01-02"], dtype="M8[ns]")
|
| 547 |
+
result = np.asarray(ser)
|
| 548 |
+
|
| 549 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 550 |
+
|
| 551 |
+
def test_asarray_tz_aware(self):
|
| 552 |
+
tz = "US/Central"
|
| 553 |
+
ser = Series(date_range("2000", periods=2, tz=tz))
|
| 554 |
+
expected = np.array(["2000-01-01T06", "2000-01-02T06"], dtype="M8[ns]")
|
| 555 |
+
result = np.asarray(ser, dtype="datetime64[ns]")
|
| 556 |
+
|
| 557 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 558 |
+
|
| 559 |
+
# Old behavior with no warning
|
| 560 |
+
result = np.asarray(ser, dtype="M8[ns]")
|
| 561 |
+
|
| 562 |
+
tm.assert_numpy_array_equal(result, expected)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/base/test_fillna.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Though Index.fillna and Series.fillna has separate impl,
|
| 3 |
+
test here to confirm these works as the same
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import numpy as np
|
| 7 |
+
import pytest
|
| 8 |
+
|
| 9 |
+
from pandas import MultiIndex
|
| 10 |
+
import pandas._testing as tm
|
| 11 |
+
from pandas.tests.base.common import allow_na_ops
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def test_fillna(index_or_series_obj):
|
| 15 |
+
# GH 11343
|
| 16 |
+
obj = index_or_series_obj
|
| 17 |
+
|
| 18 |
+
if isinstance(obj, MultiIndex):
|
| 19 |
+
msg = "isna is not defined for MultiIndex"
|
| 20 |
+
with pytest.raises(NotImplementedError, match=msg):
|
| 21 |
+
obj.fillna(0)
|
| 22 |
+
return
|
| 23 |
+
|
| 24 |
+
# values will not be changed
|
| 25 |
+
fill_value = obj.values[0] if len(obj) > 0 else 0
|
| 26 |
+
result = obj.fillna(fill_value)
|
| 27 |
+
|
| 28 |
+
tm.assert_equal(obj, result)
|
| 29 |
+
|
| 30 |
+
# check shallow_copied
|
| 31 |
+
assert obj is not result
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
@pytest.mark.parametrize("null_obj", [np.nan, None])
|
| 35 |
+
def test_fillna_null(null_obj, index_or_series_obj):
|
| 36 |
+
# GH 11343
|
| 37 |
+
obj = index_or_series_obj
|
| 38 |
+
klass = type(obj)
|
| 39 |
+
|
| 40 |
+
if not allow_na_ops(obj):
|
| 41 |
+
pytest.skip(f"{klass} doesn't allow for NA operations")
|
| 42 |
+
elif len(obj) < 1:
|
| 43 |
+
pytest.skip("Test doesn't make sense on empty data")
|
| 44 |
+
elif isinstance(obj, MultiIndex):
|
| 45 |
+
pytest.skip(f"MultiIndex can't hold '{null_obj}'")
|
| 46 |
+
|
| 47 |
+
values = obj._values
|
| 48 |
+
fill_value = values[0]
|
| 49 |
+
expected = values.copy()
|
| 50 |
+
values[0:2] = null_obj
|
| 51 |
+
expected[0:2] = fill_value
|
| 52 |
+
|
| 53 |
+
expected = klass(expected)
|
| 54 |
+
obj = klass(values)
|
| 55 |
+
|
| 56 |
+
result = obj.fillna(fill_value)
|
| 57 |
+
tm.assert_equal(result, expected)
|
| 58 |
+
|
| 59 |
+
# check shallow_copied
|
| 60 |
+
assert obj is not result
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/base/test_misc.py
ADDED
|
@@ -0,0 +1,191 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
import pytest
|
| 5 |
+
|
| 6 |
+
from pandas._config import using_pyarrow_string_dtype
|
| 7 |
+
|
| 8 |
+
from pandas.compat import PYPY
|
| 9 |
+
|
| 10 |
+
from pandas.core.dtypes.common import (
|
| 11 |
+
is_dtype_equal,
|
| 12 |
+
is_object_dtype,
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
import pandas as pd
|
| 16 |
+
from pandas import (
|
| 17 |
+
Index,
|
| 18 |
+
Series,
|
| 19 |
+
)
|
| 20 |
+
import pandas._testing as tm
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def test_isnull_notnull_docstrings():
|
| 24 |
+
# GH#41855 make sure its clear these are aliases
|
| 25 |
+
doc = pd.DataFrame.notnull.__doc__
|
| 26 |
+
assert doc.startswith("\nDataFrame.notnull is an alias for DataFrame.notna.\n")
|
| 27 |
+
doc = pd.DataFrame.isnull.__doc__
|
| 28 |
+
assert doc.startswith("\nDataFrame.isnull is an alias for DataFrame.isna.\n")
|
| 29 |
+
|
| 30 |
+
doc = Series.notnull.__doc__
|
| 31 |
+
assert doc.startswith("\nSeries.notnull is an alias for Series.notna.\n")
|
| 32 |
+
doc = Series.isnull.__doc__
|
| 33 |
+
assert doc.startswith("\nSeries.isnull is an alias for Series.isna.\n")
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
@pytest.mark.parametrize(
|
| 37 |
+
"op_name, op",
|
| 38 |
+
[
|
| 39 |
+
("add", "+"),
|
| 40 |
+
("sub", "-"),
|
| 41 |
+
("mul", "*"),
|
| 42 |
+
("mod", "%"),
|
| 43 |
+
("pow", "**"),
|
| 44 |
+
("truediv", "/"),
|
| 45 |
+
("floordiv", "//"),
|
| 46 |
+
],
|
| 47 |
+
)
|
| 48 |
+
def test_binary_ops_docstring(frame_or_series, op_name, op):
|
| 49 |
+
# not using the all_arithmetic_functions fixture with _get_opstr
|
| 50 |
+
# as _get_opstr is used internally in the dynamic implementation of the docstring
|
| 51 |
+
klass = frame_or_series
|
| 52 |
+
|
| 53 |
+
operand1 = klass.__name__.lower()
|
| 54 |
+
operand2 = "other"
|
| 55 |
+
expected_str = " ".join([operand1, op, operand2])
|
| 56 |
+
assert expected_str in getattr(klass, op_name).__doc__
|
| 57 |
+
|
| 58 |
+
# reverse version of the binary ops
|
| 59 |
+
expected_str = " ".join([operand2, op, operand1])
|
| 60 |
+
assert expected_str in getattr(klass, "r" + op_name).__doc__
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def test_ndarray_compat_properties(index_or_series_obj):
|
| 64 |
+
obj = index_or_series_obj
|
| 65 |
+
|
| 66 |
+
# Check that we work.
|
| 67 |
+
for p in ["shape", "dtype", "T", "nbytes"]:
|
| 68 |
+
assert getattr(obj, p, None) is not None
|
| 69 |
+
|
| 70 |
+
# deprecated properties
|
| 71 |
+
for p in ["strides", "itemsize", "base", "data"]:
|
| 72 |
+
assert not hasattr(obj, p)
|
| 73 |
+
|
| 74 |
+
msg = "can only convert an array of size 1 to a Python scalar"
|
| 75 |
+
with pytest.raises(ValueError, match=msg):
|
| 76 |
+
obj.item() # len > 1
|
| 77 |
+
|
| 78 |
+
assert obj.ndim == 1
|
| 79 |
+
assert obj.size == len(obj)
|
| 80 |
+
|
| 81 |
+
assert Index([1]).item() == 1
|
| 82 |
+
assert Series([1]).item() == 1
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
@pytest.mark.skipif(
|
| 86 |
+
PYPY or using_pyarrow_string_dtype(),
|
| 87 |
+
reason="not relevant for PyPy doesn't work properly for arrow strings",
|
| 88 |
+
)
|
| 89 |
+
def test_memory_usage(index_or_series_memory_obj):
|
| 90 |
+
obj = index_or_series_memory_obj
|
| 91 |
+
# Clear index caches so that len(obj) == 0 report 0 memory usage
|
| 92 |
+
if isinstance(obj, Series):
|
| 93 |
+
is_ser = True
|
| 94 |
+
obj.index._engine.clear_mapping()
|
| 95 |
+
else:
|
| 96 |
+
is_ser = False
|
| 97 |
+
obj._engine.clear_mapping()
|
| 98 |
+
|
| 99 |
+
res = obj.memory_usage()
|
| 100 |
+
res_deep = obj.memory_usage(deep=True)
|
| 101 |
+
|
| 102 |
+
is_object = is_object_dtype(obj) or (is_ser and is_object_dtype(obj.index))
|
| 103 |
+
is_categorical = isinstance(obj.dtype, pd.CategoricalDtype) or (
|
| 104 |
+
is_ser and isinstance(obj.index.dtype, pd.CategoricalDtype)
|
| 105 |
+
)
|
| 106 |
+
is_object_string = is_dtype_equal(obj, "string[python]") or (
|
| 107 |
+
is_ser and is_dtype_equal(obj.index.dtype, "string[python]")
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
if len(obj) == 0:
|
| 111 |
+
expected = 0
|
| 112 |
+
assert res_deep == res == expected
|
| 113 |
+
elif is_object or is_categorical or is_object_string:
|
| 114 |
+
# only deep will pick them up
|
| 115 |
+
assert res_deep > res
|
| 116 |
+
else:
|
| 117 |
+
assert res == res_deep
|
| 118 |
+
|
| 119 |
+
# sys.getsizeof will call the .memory_usage with
|
| 120 |
+
# deep=True, and add on some GC overhead
|
| 121 |
+
diff = res_deep - sys.getsizeof(obj)
|
| 122 |
+
assert abs(diff) < 100
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
def test_memory_usage_components_series(series_with_simple_index):
|
| 126 |
+
series = series_with_simple_index
|
| 127 |
+
total_usage = series.memory_usage(index=True)
|
| 128 |
+
non_index_usage = series.memory_usage(index=False)
|
| 129 |
+
index_usage = series.index.memory_usage()
|
| 130 |
+
assert total_usage == non_index_usage + index_usage
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
@pytest.mark.parametrize("dtype", tm.NARROW_NP_DTYPES)
|
| 134 |
+
def test_memory_usage_components_narrow_series(dtype):
|
| 135 |
+
series = Series(range(5), dtype=dtype, index=[f"i-{i}" for i in range(5)], name="a")
|
| 136 |
+
total_usage = series.memory_usage(index=True)
|
| 137 |
+
non_index_usage = series.memory_usage(index=False)
|
| 138 |
+
index_usage = series.index.memory_usage()
|
| 139 |
+
assert total_usage == non_index_usage + index_usage
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
def test_searchsorted(request, index_or_series_obj):
|
| 143 |
+
# numpy.searchsorted calls obj.searchsorted under the hood.
|
| 144 |
+
# See gh-12238
|
| 145 |
+
obj = index_or_series_obj
|
| 146 |
+
|
| 147 |
+
if isinstance(obj, pd.MultiIndex):
|
| 148 |
+
# See gh-14833
|
| 149 |
+
request.applymarker(
|
| 150 |
+
pytest.mark.xfail(
|
| 151 |
+
reason="np.searchsorted doesn't work on pd.MultiIndex: GH 14833"
|
| 152 |
+
)
|
| 153 |
+
)
|
| 154 |
+
elif obj.dtype.kind == "c" and isinstance(obj, Index):
|
| 155 |
+
# TODO: Should Series cases also raise? Looks like they use numpy
|
| 156 |
+
# comparison semantics https://github.com/numpy/numpy/issues/15981
|
| 157 |
+
mark = pytest.mark.xfail(reason="complex objects are not comparable")
|
| 158 |
+
request.applymarker(mark)
|
| 159 |
+
|
| 160 |
+
max_obj = max(obj, default=0)
|
| 161 |
+
index = np.searchsorted(obj, max_obj)
|
| 162 |
+
assert 0 <= index <= len(obj)
|
| 163 |
+
|
| 164 |
+
index = np.searchsorted(obj, max_obj, sorter=range(len(obj)))
|
| 165 |
+
assert 0 <= index <= len(obj)
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
def test_access_by_position(index_flat):
|
| 169 |
+
index = index_flat
|
| 170 |
+
|
| 171 |
+
if len(index) == 0:
|
| 172 |
+
pytest.skip("Test doesn't make sense on empty data")
|
| 173 |
+
|
| 174 |
+
series = Series(index)
|
| 175 |
+
assert index[0] == series.iloc[0]
|
| 176 |
+
assert index[5] == series.iloc[5]
|
| 177 |
+
assert index[-1] == series.iloc[-1]
|
| 178 |
+
|
| 179 |
+
size = len(index)
|
| 180 |
+
assert index[-1] == index[size - 1]
|
| 181 |
+
|
| 182 |
+
msg = f"index {size} is out of bounds for axis 0 with size {size}"
|
| 183 |
+
if is_dtype_equal(index.dtype, "string[pyarrow]") or is_dtype_equal(
|
| 184 |
+
index.dtype, "string[pyarrow_numpy]"
|
| 185 |
+
):
|
| 186 |
+
msg = "index out of bounds"
|
| 187 |
+
with pytest.raises(IndexError, match=msg):
|
| 188 |
+
index[size]
|
| 189 |
+
msg = "single positional indexer is out-of-bounds"
|
| 190 |
+
with pytest.raises(IndexError, match=msg):
|
| 191 |
+
series.iloc[size]
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/base/test_transpose.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
from pandas import (
|
| 5 |
+
CategoricalDtype,
|
| 6 |
+
DataFrame,
|
| 7 |
+
)
|
| 8 |
+
import pandas._testing as tm
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def test_transpose(index_or_series_obj):
|
| 12 |
+
obj = index_or_series_obj
|
| 13 |
+
tm.assert_equal(obj.transpose(), obj)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def test_transpose_non_default_axes(index_or_series_obj):
|
| 17 |
+
msg = "the 'axes' parameter is not supported"
|
| 18 |
+
obj = index_or_series_obj
|
| 19 |
+
with pytest.raises(ValueError, match=msg):
|
| 20 |
+
obj.transpose(1)
|
| 21 |
+
with pytest.raises(ValueError, match=msg):
|
| 22 |
+
obj.transpose(axes=1)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def test_numpy_transpose(index_or_series_obj):
|
| 26 |
+
msg = "the 'axes' parameter is not supported"
|
| 27 |
+
obj = index_or_series_obj
|
| 28 |
+
tm.assert_equal(np.transpose(obj), obj)
|
| 29 |
+
|
| 30 |
+
with pytest.raises(ValueError, match=msg):
|
| 31 |
+
np.transpose(obj, axes=1)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
@pytest.mark.parametrize(
|
| 35 |
+
"data, transposed_data, index, columns, dtype",
|
| 36 |
+
[
|
| 37 |
+
([[1], [2]], [[1, 2]], ["a", "a"], ["b"], int),
|
| 38 |
+
([[1], [2]], [[1, 2]], ["a", "a"], ["b"], CategoricalDtype([1, 2])),
|
| 39 |
+
([[1, 2]], [[1], [2]], ["b"], ["a", "a"], int),
|
| 40 |
+
([[1, 2]], [[1], [2]], ["b"], ["a", "a"], CategoricalDtype([1, 2])),
|
| 41 |
+
([[1, 2], [3, 4]], [[1, 3], [2, 4]], ["a", "a"], ["b", "b"], int),
|
| 42 |
+
(
|
| 43 |
+
[[1, 2], [3, 4]],
|
| 44 |
+
[[1, 3], [2, 4]],
|
| 45 |
+
["a", "a"],
|
| 46 |
+
["b", "b"],
|
| 47 |
+
CategoricalDtype([1, 2, 3, 4]),
|
| 48 |
+
),
|
| 49 |
+
],
|
| 50 |
+
)
|
| 51 |
+
def test_duplicate_labels(data, transposed_data, index, columns, dtype):
|
| 52 |
+
# GH 42380
|
| 53 |
+
df = DataFrame(data, index=index, columns=columns, dtype=dtype)
|
| 54 |
+
result = df.T
|
| 55 |
+
expected = DataFrame(transposed_data, index=columns, columns=index, dtype=dtype)
|
| 56 |
+
tm.assert_frame_equal(result, expected)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/base/test_unique.py
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
from pandas._config import using_pyarrow_string_dtype
|
| 5 |
+
|
| 6 |
+
import pandas as pd
|
| 7 |
+
import pandas._testing as tm
|
| 8 |
+
from pandas.tests.base.common import allow_na_ops
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
|
| 12 |
+
def test_unique(index_or_series_obj):
|
| 13 |
+
obj = index_or_series_obj
|
| 14 |
+
obj = np.repeat(obj, range(1, len(obj) + 1))
|
| 15 |
+
result = obj.unique()
|
| 16 |
+
|
| 17 |
+
# dict.fromkeys preserves the order
|
| 18 |
+
unique_values = list(dict.fromkeys(obj.values))
|
| 19 |
+
if isinstance(obj, pd.MultiIndex):
|
| 20 |
+
expected = pd.MultiIndex.from_tuples(unique_values)
|
| 21 |
+
expected.names = obj.names
|
| 22 |
+
tm.assert_index_equal(result, expected, exact=True)
|
| 23 |
+
elif isinstance(obj, pd.Index):
|
| 24 |
+
expected = pd.Index(unique_values, dtype=obj.dtype)
|
| 25 |
+
if isinstance(obj.dtype, pd.DatetimeTZDtype):
|
| 26 |
+
expected = expected.normalize()
|
| 27 |
+
tm.assert_index_equal(result, expected, exact=True)
|
| 28 |
+
else:
|
| 29 |
+
expected = np.array(unique_values)
|
| 30 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
|
| 34 |
+
@pytest.mark.parametrize("null_obj", [np.nan, None])
|
| 35 |
+
def test_unique_null(null_obj, index_or_series_obj):
|
| 36 |
+
obj = index_or_series_obj
|
| 37 |
+
|
| 38 |
+
if not allow_na_ops(obj):
|
| 39 |
+
pytest.skip("type doesn't allow for NA operations")
|
| 40 |
+
elif len(obj) < 1:
|
| 41 |
+
pytest.skip("Test doesn't make sense on empty data")
|
| 42 |
+
elif isinstance(obj, pd.MultiIndex):
|
| 43 |
+
pytest.skip(f"MultiIndex can't hold '{null_obj}'")
|
| 44 |
+
|
| 45 |
+
values = obj._values
|
| 46 |
+
values[0:2] = null_obj
|
| 47 |
+
|
| 48 |
+
klass = type(obj)
|
| 49 |
+
repeated_values = np.repeat(values, range(1, len(values) + 1))
|
| 50 |
+
obj = klass(repeated_values, dtype=obj.dtype)
|
| 51 |
+
result = obj.unique()
|
| 52 |
+
|
| 53 |
+
unique_values_raw = dict.fromkeys(obj.values)
|
| 54 |
+
# because np.nan == np.nan is False, but None == None is True
|
| 55 |
+
# np.nan would be duplicated, whereas None wouldn't
|
| 56 |
+
unique_values_not_null = [val for val in unique_values_raw if not pd.isnull(val)]
|
| 57 |
+
unique_values = [null_obj] + unique_values_not_null
|
| 58 |
+
|
| 59 |
+
if isinstance(obj, pd.Index):
|
| 60 |
+
expected = pd.Index(unique_values, dtype=obj.dtype)
|
| 61 |
+
if isinstance(obj.dtype, pd.DatetimeTZDtype):
|
| 62 |
+
result = result.normalize()
|
| 63 |
+
expected = expected.normalize()
|
| 64 |
+
tm.assert_index_equal(result, expected, exact=True)
|
| 65 |
+
else:
|
| 66 |
+
expected = np.array(unique_values, dtype=obj.dtype)
|
| 67 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def test_nunique(index_or_series_obj):
|
| 71 |
+
obj = index_or_series_obj
|
| 72 |
+
obj = np.repeat(obj, range(1, len(obj) + 1))
|
| 73 |
+
expected = len(obj.unique())
|
| 74 |
+
assert obj.nunique(dropna=False) == expected
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
@pytest.mark.parametrize("null_obj", [np.nan, None])
|
| 78 |
+
def test_nunique_null(null_obj, index_or_series_obj):
|
| 79 |
+
obj = index_or_series_obj
|
| 80 |
+
|
| 81 |
+
if not allow_na_ops(obj):
|
| 82 |
+
pytest.skip("type doesn't allow for NA operations")
|
| 83 |
+
elif isinstance(obj, pd.MultiIndex):
|
| 84 |
+
pytest.skip(f"MultiIndex can't hold '{null_obj}'")
|
| 85 |
+
|
| 86 |
+
values = obj._values
|
| 87 |
+
values[0:2] = null_obj
|
| 88 |
+
|
| 89 |
+
klass = type(obj)
|
| 90 |
+
repeated_values = np.repeat(values, range(1, len(values) + 1))
|
| 91 |
+
obj = klass(repeated_values, dtype=obj.dtype)
|
| 92 |
+
|
| 93 |
+
if isinstance(obj, pd.CategoricalIndex):
|
| 94 |
+
assert obj.nunique() == len(obj.categories)
|
| 95 |
+
assert obj.nunique(dropna=False) == len(obj.categories) + 1
|
| 96 |
+
else:
|
| 97 |
+
num_unique_values = len(obj.unique())
|
| 98 |
+
assert obj.nunique() == max(0, num_unique_values - 1)
|
| 99 |
+
assert obj.nunique(dropna=False) == max(0, num_unique_values)
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
@pytest.mark.single_cpu
|
| 103 |
+
@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="decoding fails")
|
| 104 |
+
def test_unique_bad_unicode(index_or_series):
|
| 105 |
+
# regression test for #34550
|
| 106 |
+
uval = "\ud83d" # smiley emoji
|
| 107 |
+
|
| 108 |
+
obj = index_or_series([uval] * 2)
|
| 109 |
+
result = obj.unique()
|
| 110 |
+
|
| 111 |
+
if isinstance(obj, pd.Index):
|
| 112 |
+
expected = pd.Index(["\ud83d"], dtype=object)
|
| 113 |
+
tm.assert_index_equal(result, expected, exact=True)
|
| 114 |
+
else:
|
| 115 |
+
expected = np.array(["\ud83d"], dtype=object)
|
| 116 |
+
tm.assert_numpy_array_equal(result, expected)
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
@pytest.mark.parametrize("dropna", [True, False])
|
| 120 |
+
def test_nunique_dropna(dropna):
|
| 121 |
+
# GH37566
|
| 122 |
+
ser = pd.Series(["yes", "yes", pd.NA, np.nan, None, pd.NaT])
|
| 123 |
+
res = ser.nunique(dropna)
|
| 124 |
+
assert res == 1 if dropna else 5
|