BryanW commited on
Commit
a838e8c
·
verified ·
1 Parent(s): bfeb483

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/__pycache__/__init__.cpython-312.pyc +0 -0
  2. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/__pycache__/_typing.cpython-312.pyc +0 -0
  3. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/__pycache__/_version.cpython-312.pyc +0 -0
  4. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/__pycache__/_version_meson.cpython-312.pyc +0 -0
  5. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/__pycache__/conftest.cpython-312.pyc +0 -0
  6. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/__pycache__/testing.cpython-312.pyc +0 -0
  7. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/_testing/__init__.py +639 -0
  8. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/_testing/_hypothesis.py +93 -0
  9. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/_testing/_io.py +170 -0
  10. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/_testing/_warnings.py +232 -0
  11. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/_testing/asserters.py +1435 -0
  12. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/_testing/compat.py +29 -0
  13. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/_testing/contexts.py +257 -0
  14. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/arrays/__init__.py +53 -0
  15. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/errors/__init__.py +850 -0
  16. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/io/feather_format.py +143 -0
  17. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/io/gbq.py +255 -0
  18. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/io/html.py +1259 -0
  19. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/io/pickle.py +210 -0
  20. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/io/pytables.py +0 -0
  21. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/io/sql.py +0 -0
  22. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/__init__.cpython-312.pyc +0 -0
  23. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_aggregation.cpython-312.pyc +0 -0
  24. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_common.cpython-312.pyc +0 -0
  25. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_downstream.cpython-312.pyc +0 -0
  26. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_errors.cpython-312.pyc +0 -0
  27. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_expressions.cpython-312.pyc +0 -0
  28. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_flags.cpython-312.pyc +0 -0
  29. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_multilevel.cpython-312.pyc +0 -0
  30. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_nanops.cpython-312.pyc +0 -0
  31. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_optional_dependency.cpython-312.pyc +0 -0
  32. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_register_accessor.cpython-312.pyc +0 -0
  33. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_sorting.cpython-312.pyc +0 -0
  34. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_take.cpython-312.pyc +0 -0
  35. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/arrays/__init__.py +0 -0
  36. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/arrays/masked_shared.py +154 -0
  37. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/arrays/test_array.py +478 -0
  38. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/arrays/test_datetimelike.py +1344 -0
  39. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/arrays/test_datetimes.py +840 -0
  40. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/arrays/test_ndarray_backed.py +75 -0
  41. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/arrays/test_period.py +184 -0
  42. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/arrays/test_timedeltas.py +313 -0
  43. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/base/__init__.py +0 -0
  44. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/base/common.py +9 -0
  45. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/base/test_constructors.py +179 -0
  46. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/base/test_conversion.py +562 -0
  47. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/base/test_fillna.py +60 -0
  48. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/base/test_misc.py +191 -0
  49. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/base/test_transpose.py +56 -0
  50. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/base/test_unique.py +124 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (7.71 kB). View file
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/__pycache__/_typing.cpython-312.pyc ADDED
Binary file (14.7 kB). View file
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/__pycache__/_version.cpython-312.pyc ADDED
Binary file (22 kB). View file
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/__pycache__/_version_meson.cpython-312.pyc ADDED
Binary file (312 Bytes). View file
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/__pycache__/conftest.cpython-312.pyc ADDED
Binary file (67.4 kB). View file
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/__pycache__/testing.cpython-312.pyc ADDED
Binary file (478 Bytes). View file
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/_testing/__init__.py ADDED
@@ -0,0 +1,639 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from decimal import Decimal
4
+ import operator
5
+ import os
6
+ from sys import byteorder
7
+ from typing import (
8
+ TYPE_CHECKING,
9
+ Callable,
10
+ ContextManager,
11
+ cast,
12
+ )
13
+ import warnings
14
+
15
+ import numpy as np
16
+
17
+ from pandas._config.localization import (
18
+ can_set_locale,
19
+ get_locales,
20
+ set_locale,
21
+ )
22
+
23
+ from pandas.compat import pa_version_under10p1
24
+
25
+ from pandas.core.dtypes.common import is_string_dtype
26
+
27
+ import pandas as pd
28
+ from pandas import (
29
+ ArrowDtype,
30
+ DataFrame,
31
+ Index,
32
+ MultiIndex,
33
+ RangeIndex,
34
+ Series,
35
+ )
36
+ from pandas._testing._io import (
37
+ round_trip_localpath,
38
+ round_trip_pathlib,
39
+ round_trip_pickle,
40
+ write_to_compressed,
41
+ )
42
+ from pandas._testing._warnings import (
43
+ assert_produces_warning,
44
+ maybe_produces_warning,
45
+ )
46
+ from pandas._testing.asserters import (
47
+ assert_almost_equal,
48
+ assert_attr_equal,
49
+ assert_categorical_equal,
50
+ assert_class_equal,
51
+ assert_contains_all,
52
+ assert_copy,
53
+ assert_datetime_array_equal,
54
+ assert_dict_equal,
55
+ assert_equal,
56
+ assert_extension_array_equal,
57
+ assert_frame_equal,
58
+ assert_index_equal,
59
+ assert_indexing_slices_equivalent,
60
+ assert_interval_array_equal,
61
+ assert_is_sorted,
62
+ assert_is_valid_plot_return_object,
63
+ assert_metadata_equivalent,
64
+ assert_numpy_array_equal,
65
+ assert_period_array_equal,
66
+ assert_series_equal,
67
+ assert_sp_array_equal,
68
+ assert_timedelta_array_equal,
69
+ raise_assert_detail,
70
+ )
71
+ from pandas._testing.compat import (
72
+ get_dtype,
73
+ get_obj,
74
+ )
75
+ from pandas._testing.contexts import (
76
+ assert_cow_warning,
77
+ decompress_file,
78
+ ensure_clean,
79
+ raises_chained_assignment_error,
80
+ set_timezone,
81
+ use_numexpr,
82
+ with_csv_dialect,
83
+ )
84
+ from pandas.core.arrays import (
85
+ BaseMaskedArray,
86
+ ExtensionArray,
87
+ NumpyExtensionArray,
88
+ )
89
+ from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
90
+ from pandas.core.construction import extract_array
91
+
92
+ if TYPE_CHECKING:
93
+ from pandas._typing import (
94
+ Dtype,
95
+ NpDtype,
96
+ )
97
+
98
+ from pandas.core.arrays import ArrowExtensionArray
99
+
100
+ UNSIGNED_INT_NUMPY_DTYPES: list[NpDtype] = ["uint8", "uint16", "uint32", "uint64"]
101
+ UNSIGNED_INT_EA_DTYPES: list[Dtype] = ["UInt8", "UInt16", "UInt32", "UInt64"]
102
+ SIGNED_INT_NUMPY_DTYPES: list[NpDtype] = [int, "int8", "int16", "int32", "int64"]
103
+ SIGNED_INT_EA_DTYPES: list[Dtype] = ["Int8", "Int16", "Int32", "Int64"]
104
+ ALL_INT_NUMPY_DTYPES = UNSIGNED_INT_NUMPY_DTYPES + SIGNED_INT_NUMPY_DTYPES
105
+ ALL_INT_EA_DTYPES = UNSIGNED_INT_EA_DTYPES + SIGNED_INT_EA_DTYPES
106
+ ALL_INT_DTYPES: list[Dtype] = [*ALL_INT_NUMPY_DTYPES, *ALL_INT_EA_DTYPES]
107
+
108
+ FLOAT_NUMPY_DTYPES: list[NpDtype] = [float, "float32", "float64"]
109
+ FLOAT_EA_DTYPES: list[Dtype] = ["Float32", "Float64"]
110
+ ALL_FLOAT_DTYPES: list[Dtype] = [*FLOAT_NUMPY_DTYPES, *FLOAT_EA_DTYPES]
111
+
112
+ COMPLEX_DTYPES: list[Dtype] = [complex, "complex64", "complex128"]
113
+ STRING_DTYPES: list[Dtype] = [str, "str", "U"]
114
+ COMPLEX_FLOAT_DTYPES: list[Dtype] = [*COMPLEX_DTYPES, *FLOAT_NUMPY_DTYPES]
115
+
116
+ DATETIME64_DTYPES: list[Dtype] = ["datetime64[ns]", "M8[ns]"]
117
+ TIMEDELTA64_DTYPES: list[Dtype] = ["timedelta64[ns]", "m8[ns]"]
118
+
119
+ BOOL_DTYPES: list[Dtype] = [bool, "bool"]
120
+ BYTES_DTYPES: list[Dtype] = [bytes, "bytes"]
121
+ OBJECT_DTYPES: list[Dtype] = [object, "object"]
122
+
123
+ ALL_REAL_NUMPY_DTYPES = FLOAT_NUMPY_DTYPES + ALL_INT_NUMPY_DTYPES
124
+ ALL_REAL_EXTENSION_DTYPES = FLOAT_EA_DTYPES + ALL_INT_EA_DTYPES
125
+ ALL_REAL_DTYPES: list[Dtype] = [*ALL_REAL_NUMPY_DTYPES, *ALL_REAL_EXTENSION_DTYPES]
126
+ ALL_NUMERIC_DTYPES: list[Dtype] = [*ALL_REAL_DTYPES, *COMPLEX_DTYPES]
127
+
128
+ ALL_NUMPY_DTYPES = (
129
+ ALL_REAL_NUMPY_DTYPES
130
+ + COMPLEX_DTYPES
131
+ + STRING_DTYPES
132
+ + DATETIME64_DTYPES
133
+ + TIMEDELTA64_DTYPES
134
+ + BOOL_DTYPES
135
+ + OBJECT_DTYPES
136
+ + BYTES_DTYPES
137
+ )
138
+
139
+ NARROW_NP_DTYPES = [
140
+ np.float16,
141
+ np.float32,
142
+ np.int8,
143
+ np.int16,
144
+ np.int32,
145
+ np.uint8,
146
+ np.uint16,
147
+ np.uint32,
148
+ ]
149
+
150
+ PYTHON_DATA_TYPES = [
151
+ str,
152
+ int,
153
+ float,
154
+ complex,
155
+ list,
156
+ tuple,
157
+ range,
158
+ dict,
159
+ set,
160
+ frozenset,
161
+ bool,
162
+ bytes,
163
+ bytearray,
164
+ memoryview,
165
+ ]
166
+
167
+ ENDIAN = {"little": "<", "big": ">"}[byteorder]
168
+
169
+ NULL_OBJECTS = [None, np.nan, pd.NaT, float("nan"), pd.NA, Decimal("NaN")]
170
+ NP_NAT_OBJECTS = [
171
+ cls("NaT", unit)
172
+ for cls in [np.datetime64, np.timedelta64]
173
+ for unit in [
174
+ "Y",
175
+ "M",
176
+ "W",
177
+ "D",
178
+ "h",
179
+ "m",
180
+ "s",
181
+ "ms",
182
+ "us",
183
+ "ns",
184
+ "ps",
185
+ "fs",
186
+ "as",
187
+ ]
188
+ ]
189
+
190
+ if not pa_version_under10p1:
191
+ import pyarrow as pa
192
+
193
+ UNSIGNED_INT_PYARROW_DTYPES = [pa.uint8(), pa.uint16(), pa.uint32(), pa.uint64()]
194
+ SIGNED_INT_PYARROW_DTYPES = [pa.int8(), pa.int16(), pa.int32(), pa.int64()]
195
+ ALL_INT_PYARROW_DTYPES = UNSIGNED_INT_PYARROW_DTYPES + SIGNED_INT_PYARROW_DTYPES
196
+ ALL_INT_PYARROW_DTYPES_STR_REPR = [
197
+ str(ArrowDtype(typ)) for typ in ALL_INT_PYARROW_DTYPES
198
+ ]
199
+
200
+ # pa.float16 doesn't seem supported
201
+ # https://github.com/apache/arrow/blob/master/python/pyarrow/src/arrow/python/helpers.cc#L86
202
+ FLOAT_PYARROW_DTYPES = [pa.float32(), pa.float64()]
203
+ FLOAT_PYARROW_DTYPES_STR_REPR = [
204
+ str(ArrowDtype(typ)) for typ in FLOAT_PYARROW_DTYPES
205
+ ]
206
+ DECIMAL_PYARROW_DTYPES = [pa.decimal128(7, 3)]
207
+ STRING_PYARROW_DTYPES = [pa.string()]
208
+ BINARY_PYARROW_DTYPES = [pa.binary()]
209
+
210
+ TIME_PYARROW_DTYPES = [
211
+ pa.time32("s"),
212
+ pa.time32("ms"),
213
+ pa.time64("us"),
214
+ pa.time64("ns"),
215
+ ]
216
+ DATE_PYARROW_DTYPES = [pa.date32(), pa.date64()]
217
+ DATETIME_PYARROW_DTYPES = [
218
+ pa.timestamp(unit=unit, tz=tz)
219
+ for unit in ["s", "ms", "us", "ns"]
220
+ for tz in [None, "UTC", "US/Pacific", "US/Eastern"]
221
+ ]
222
+ TIMEDELTA_PYARROW_DTYPES = [pa.duration(unit) for unit in ["s", "ms", "us", "ns"]]
223
+
224
+ BOOL_PYARROW_DTYPES = [pa.bool_()]
225
+
226
+ # TODO: Add container like pyarrow types:
227
+ # https://arrow.apache.org/docs/python/api/datatypes.html#factory-functions
228
+ ALL_PYARROW_DTYPES = (
229
+ ALL_INT_PYARROW_DTYPES
230
+ + FLOAT_PYARROW_DTYPES
231
+ + DECIMAL_PYARROW_DTYPES
232
+ + STRING_PYARROW_DTYPES
233
+ + BINARY_PYARROW_DTYPES
234
+ + TIME_PYARROW_DTYPES
235
+ + DATE_PYARROW_DTYPES
236
+ + DATETIME_PYARROW_DTYPES
237
+ + TIMEDELTA_PYARROW_DTYPES
238
+ + BOOL_PYARROW_DTYPES
239
+ )
240
+ ALL_REAL_PYARROW_DTYPES_STR_REPR = (
241
+ ALL_INT_PYARROW_DTYPES_STR_REPR + FLOAT_PYARROW_DTYPES_STR_REPR
242
+ )
243
+ else:
244
+ FLOAT_PYARROW_DTYPES_STR_REPR = []
245
+ ALL_INT_PYARROW_DTYPES_STR_REPR = []
246
+ ALL_PYARROW_DTYPES = []
247
+ ALL_REAL_PYARROW_DTYPES_STR_REPR = []
248
+
249
+ ALL_REAL_NULLABLE_DTYPES = (
250
+ FLOAT_NUMPY_DTYPES + ALL_REAL_EXTENSION_DTYPES + ALL_REAL_PYARROW_DTYPES_STR_REPR
251
+ )
252
+
253
+ arithmetic_dunder_methods = [
254
+ "__add__",
255
+ "__radd__",
256
+ "__sub__",
257
+ "__rsub__",
258
+ "__mul__",
259
+ "__rmul__",
260
+ "__floordiv__",
261
+ "__rfloordiv__",
262
+ "__truediv__",
263
+ "__rtruediv__",
264
+ "__pow__",
265
+ "__rpow__",
266
+ "__mod__",
267
+ "__rmod__",
268
+ ]
269
+
270
+ comparison_dunder_methods = ["__eq__", "__ne__", "__le__", "__lt__", "__ge__", "__gt__"]
271
+
272
+
273
+ # -----------------------------------------------------------------------------
274
+ # Comparators
275
+
276
+
277
+ def box_expected(expected, box_cls, transpose: bool = True):
278
+ """
279
+ Helper function to wrap the expected output of a test in a given box_class.
280
+
281
+ Parameters
282
+ ----------
283
+ expected : np.ndarray, Index, Series
284
+ box_cls : {Index, Series, DataFrame}
285
+
286
+ Returns
287
+ -------
288
+ subclass of box_cls
289
+ """
290
+ if box_cls is pd.array:
291
+ if isinstance(expected, RangeIndex):
292
+ # pd.array would return an IntegerArray
293
+ expected = NumpyExtensionArray(np.asarray(expected._values))
294
+ else:
295
+ expected = pd.array(expected, copy=False)
296
+ elif box_cls is Index:
297
+ with warnings.catch_warnings():
298
+ warnings.filterwarnings("ignore", "Dtype inference", category=FutureWarning)
299
+ expected = Index(expected)
300
+ elif box_cls is Series:
301
+ with warnings.catch_warnings():
302
+ warnings.filterwarnings("ignore", "Dtype inference", category=FutureWarning)
303
+ expected = Series(expected)
304
+ elif box_cls is DataFrame:
305
+ with warnings.catch_warnings():
306
+ warnings.filterwarnings("ignore", "Dtype inference", category=FutureWarning)
307
+ expected = Series(expected).to_frame()
308
+ if transpose:
309
+ # for vector operations, we need a DataFrame to be a single-row,
310
+ # not a single-column, in order to operate against non-DataFrame
311
+ # vectors of the same length. But convert to two rows to avoid
312
+ # single-row special cases in datetime arithmetic
313
+ expected = expected.T
314
+ expected = pd.concat([expected] * 2, ignore_index=True)
315
+ elif box_cls is np.ndarray or box_cls is np.array:
316
+ expected = np.array(expected)
317
+ elif box_cls is to_array:
318
+ expected = to_array(expected)
319
+ else:
320
+ raise NotImplementedError(box_cls)
321
+ return expected
322
+
323
+
324
+ def to_array(obj):
325
+ """
326
+ Similar to pd.array, but does not cast numpy dtypes to nullable dtypes.
327
+ """
328
+ # temporary implementation until we get pd.array in place
329
+ dtype = getattr(obj, "dtype", None)
330
+
331
+ if dtype is None:
332
+ return np.asarray(obj)
333
+
334
+ return extract_array(obj, extract_numpy=True)
335
+
336
+
337
+ class SubclassedSeries(Series):
338
+ _metadata = ["testattr", "name"]
339
+
340
+ @property
341
+ def _constructor(self):
342
+ # For testing, those properties return a generic callable, and not
343
+ # the actual class. In this case that is equivalent, but it is to
344
+ # ensure we don't rely on the property returning a class
345
+ # See https://github.com/pandas-dev/pandas/pull/46018 and
346
+ # https://github.com/pandas-dev/pandas/issues/32638 and linked issues
347
+ return lambda *args, **kwargs: SubclassedSeries(*args, **kwargs)
348
+
349
+ @property
350
+ def _constructor_expanddim(self):
351
+ return lambda *args, **kwargs: SubclassedDataFrame(*args, **kwargs)
352
+
353
+
354
+ class SubclassedDataFrame(DataFrame):
355
+ _metadata = ["testattr"]
356
+
357
+ @property
358
+ def _constructor(self):
359
+ return lambda *args, **kwargs: SubclassedDataFrame(*args, **kwargs)
360
+
361
+ @property
362
+ def _constructor_sliced(self):
363
+ return lambda *args, **kwargs: SubclassedSeries(*args, **kwargs)
364
+
365
+
366
+ def convert_rows_list_to_csv_str(rows_list: list[str]) -> str:
367
+ """
368
+ Convert list of CSV rows to single CSV-formatted string for current OS.
369
+
370
+ This method is used for creating expected value of to_csv() method.
371
+
372
+ Parameters
373
+ ----------
374
+ rows_list : List[str]
375
+ Each element represents the row of csv.
376
+
377
+ Returns
378
+ -------
379
+ str
380
+ Expected output of to_csv() in current OS.
381
+ """
382
+ sep = os.linesep
383
+ return sep.join(rows_list) + sep
384
+
385
+
386
+ def external_error_raised(expected_exception: type[Exception]) -> ContextManager:
387
+ """
388
+ Helper function to mark pytest.raises that have an external error message.
389
+
390
+ Parameters
391
+ ----------
392
+ expected_exception : Exception
393
+ Expected error to raise.
394
+
395
+ Returns
396
+ -------
397
+ Callable
398
+ Regular `pytest.raises` function with `match` equal to `None`.
399
+ """
400
+ import pytest
401
+
402
+ return pytest.raises(expected_exception, match=None)
403
+
404
+
405
+ cython_table = pd.core.common._cython_table.items()
406
+
407
+
408
+ def get_cython_table_params(ndframe, func_names_and_expected):
409
+ """
410
+ Combine frame, functions from com._cython_table
411
+ keys and expected result.
412
+
413
+ Parameters
414
+ ----------
415
+ ndframe : DataFrame or Series
416
+ func_names_and_expected : Sequence of two items
417
+ The first item is a name of a NDFrame method ('sum', 'prod') etc.
418
+ The second item is the expected return value.
419
+
420
+ Returns
421
+ -------
422
+ list
423
+ List of three items (DataFrame, function, expected result)
424
+ """
425
+ results = []
426
+ for func_name, expected in func_names_and_expected:
427
+ results.append((ndframe, func_name, expected))
428
+ results += [
429
+ (ndframe, func, expected)
430
+ for func, name in cython_table
431
+ if name == func_name
432
+ ]
433
+ return results
434
+
435
+
436
+ def get_op_from_name(op_name: str) -> Callable:
437
+ """
438
+ The operator function for a given op name.
439
+
440
+ Parameters
441
+ ----------
442
+ op_name : str
443
+ The op name, in form of "add" or "__add__".
444
+
445
+ Returns
446
+ -------
447
+ function
448
+ A function performing the operation.
449
+ """
450
+ short_opname = op_name.strip("_")
451
+ try:
452
+ op = getattr(operator, short_opname)
453
+ except AttributeError:
454
+ # Assume it is the reverse operator
455
+ rop = getattr(operator, short_opname[1:])
456
+ op = lambda x, y: rop(y, x)
457
+
458
+ return op
459
+
460
+
461
+ # -----------------------------------------------------------------------------
462
+ # Indexing test helpers
463
+
464
+
465
+ def getitem(x):
466
+ return x
467
+
468
+
469
+ def setitem(x):
470
+ return x
471
+
472
+
473
+ def loc(x):
474
+ return x.loc
475
+
476
+
477
+ def iloc(x):
478
+ return x.iloc
479
+
480
+
481
+ def at(x):
482
+ return x.at
483
+
484
+
485
+ def iat(x):
486
+ return x.iat
487
+
488
+
489
+ # -----------------------------------------------------------------------------
490
+
491
+ _UNITS = ["s", "ms", "us", "ns"]
492
+
493
+
494
+ def get_finest_unit(left: str, right: str):
495
+ """
496
+ Find the higher of two datetime64 units.
497
+ """
498
+ if _UNITS.index(left) >= _UNITS.index(right):
499
+ return left
500
+ return right
501
+
502
+
503
+ def shares_memory(left, right) -> bool:
504
+ """
505
+ Pandas-compat for np.shares_memory.
506
+ """
507
+ if isinstance(left, np.ndarray) and isinstance(right, np.ndarray):
508
+ return np.shares_memory(left, right)
509
+ elif isinstance(left, np.ndarray):
510
+ # Call with reversed args to get to unpacking logic below.
511
+ return shares_memory(right, left)
512
+
513
+ if isinstance(left, RangeIndex):
514
+ return False
515
+ if isinstance(left, MultiIndex):
516
+ return shares_memory(left._codes, right)
517
+ if isinstance(left, (Index, Series)):
518
+ return shares_memory(left._values, right)
519
+
520
+ if isinstance(left, NDArrayBackedExtensionArray):
521
+ return shares_memory(left._ndarray, right)
522
+ if isinstance(left, pd.core.arrays.SparseArray):
523
+ return shares_memory(left.sp_values, right)
524
+ if isinstance(left, pd.core.arrays.IntervalArray):
525
+ return shares_memory(left._left, right) or shares_memory(left._right, right)
526
+
527
+ if (
528
+ isinstance(left, ExtensionArray)
529
+ and is_string_dtype(left.dtype)
530
+ and left.dtype.storage in ("pyarrow", "pyarrow_numpy") # type: ignore[attr-defined]
531
+ ):
532
+ # https://github.com/pandas-dev/pandas/pull/43930#discussion_r736862669
533
+ left = cast("ArrowExtensionArray", left)
534
+ if (
535
+ isinstance(right, ExtensionArray)
536
+ and is_string_dtype(right.dtype)
537
+ and right.dtype.storage in ("pyarrow", "pyarrow_numpy") # type: ignore[attr-defined]
538
+ ):
539
+ right = cast("ArrowExtensionArray", right)
540
+ left_pa_data = left._pa_array
541
+ right_pa_data = right._pa_array
542
+ left_buf1 = left_pa_data.chunk(0).buffers()[1]
543
+ right_buf1 = right_pa_data.chunk(0).buffers()[1]
544
+ return left_buf1 == right_buf1
545
+
546
+ if isinstance(left, BaseMaskedArray) and isinstance(right, BaseMaskedArray):
547
+ # By convention, we'll say these share memory if they share *either*
548
+ # the _data or the _mask
549
+ return np.shares_memory(left._data, right._data) or np.shares_memory(
550
+ left._mask, right._mask
551
+ )
552
+
553
+ if isinstance(left, DataFrame) and len(left._mgr.arrays) == 1:
554
+ arr = left._mgr.arrays[0]
555
+ return shares_memory(arr, right)
556
+
557
+ raise NotImplementedError(type(left), type(right))
558
+
559
+
560
+ __all__ = [
561
+ "ALL_INT_EA_DTYPES",
562
+ "ALL_INT_NUMPY_DTYPES",
563
+ "ALL_NUMPY_DTYPES",
564
+ "ALL_REAL_NUMPY_DTYPES",
565
+ "assert_almost_equal",
566
+ "assert_attr_equal",
567
+ "assert_categorical_equal",
568
+ "assert_class_equal",
569
+ "assert_contains_all",
570
+ "assert_copy",
571
+ "assert_datetime_array_equal",
572
+ "assert_dict_equal",
573
+ "assert_equal",
574
+ "assert_extension_array_equal",
575
+ "assert_frame_equal",
576
+ "assert_index_equal",
577
+ "assert_indexing_slices_equivalent",
578
+ "assert_interval_array_equal",
579
+ "assert_is_sorted",
580
+ "assert_is_valid_plot_return_object",
581
+ "assert_metadata_equivalent",
582
+ "assert_numpy_array_equal",
583
+ "assert_period_array_equal",
584
+ "assert_produces_warning",
585
+ "assert_series_equal",
586
+ "assert_sp_array_equal",
587
+ "assert_timedelta_array_equal",
588
+ "assert_cow_warning",
589
+ "at",
590
+ "BOOL_DTYPES",
591
+ "box_expected",
592
+ "BYTES_DTYPES",
593
+ "can_set_locale",
594
+ "COMPLEX_DTYPES",
595
+ "convert_rows_list_to_csv_str",
596
+ "DATETIME64_DTYPES",
597
+ "decompress_file",
598
+ "ENDIAN",
599
+ "ensure_clean",
600
+ "external_error_raised",
601
+ "FLOAT_EA_DTYPES",
602
+ "FLOAT_NUMPY_DTYPES",
603
+ "get_cython_table_params",
604
+ "get_dtype",
605
+ "getitem",
606
+ "get_locales",
607
+ "get_finest_unit",
608
+ "get_obj",
609
+ "get_op_from_name",
610
+ "iat",
611
+ "iloc",
612
+ "loc",
613
+ "maybe_produces_warning",
614
+ "NARROW_NP_DTYPES",
615
+ "NP_NAT_OBJECTS",
616
+ "NULL_OBJECTS",
617
+ "OBJECT_DTYPES",
618
+ "raise_assert_detail",
619
+ "raises_chained_assignment_error",
620
+ "round_trip_localpath",
621
+ "round_trip_pathlib",
622
+ "round_trip_pickle",
623
+ "setitem",
624
+ "set_locale",
625
+ "set_timezone",
626
+ "shares_memory",
627
+ "SIGNED_INT_EA_DTYPES",
628
+ "SIGNED_INT_NUMPY_DTYPES",
629
+ "STRING_DTYPES",
630
+ "SubclassedDataFrame",
631
+ "SubclassedSeries",
632
+ "TIMEDELTA64_DTYPES",
633
+ "to_array",
634
+ "UNSIGNED_INT_EA_DTYPES",
635
+ "UNSIGNED_INT_NUMPY_DTYPES",
636
+ "use_numexpr",
637
+ "with_csv_dialect",
638
+ "write_to_compressed",
639
+ ]
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/_testing/_hypothesis.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Hypothesis data generator helpers.
3
+ """
4
+ from datetime import datetime
5
+
6
+ from hypothesis import strategies as st
7
+ from hypothesis.extra.dateutil import timezones as dateutil_timezones
8
+ from hypothesis.extra.pytz import timezones as pytz_timezones
9
+
10
+ from pandas.compat import is_platform_windows
11
+
12
+ import pandas as pd
13
+
14
+ from pandas.tseries.offsets import (
15
+ BMonthBegin,
16
+ BMonthEnd,
17
+ BQuarterBegin,
18
+ BQuarterEnd,
19
+ BYearBegin,
20
+ BYearEnd,
21
+ MonthBegin,
22
+ MonthEnd,
23
+ QuarterBegin,
24
+ QuarterEnd,
25
+ YearBegin,
26
+ YearEnd,
27
+ )
28
+
29
+ OPTIONAL_INTS = st.lists(st.one_of(st.integers(), st.none()), max_size=10, min_size=3)
30
+
31
+ OPTIONAL_FLOATS = st.lists(st.one_of(st.floats(), st.none()), max_size=10, min_size=3)
32
+
33
+ OPTIONAL_TEXT = st.lists(st.one_of(st.none(), st.text()), max_size=10, min_size=3)
34
+
35
+ OPTIONAL_DICTS = st.lists(
36
+ st.one_of(st.none(), st.dictionaries(st.text(), st.integers())),
37
+ max_size=10,
38
+ min_size=3,
39
+ )
40
+
41
+ OPTIONAL_LISTS = st.lists(
42
+ st.one_of(st.none(), st.lists(st.text(), max_size=10, min_size=3)),
43
+ max_size=10,
44
+ min_size=3,
45
+ )
46
+
47
+ OPTIONAL_ONE_OF_ALL = st.one_of(
48
+ OPTIONAL_DICTS, OPTIONAL_FLOATS, OPTIONAL_INTS, OPTIONAL_LISTS, OPTIONAL_TEXT
49
+ )
50
+
51
+ if is_platform_windows():
52
+ DATETIME_NO_TZ = st.datetimes(min_value=datetime(1900, 1, 1))
53
+ else:
54
+ DATETIME_NO_TZ = st.datetimes()
55
+
56
+ DATETIME_JAN_1_1900_OPTIONAL_TZ = st.datetimes(
57
+ min_value=pd.Timestamp(
58
+ 1900, 1, 1
59
+ ).to_pydatetime(), # pyright: ignore[reportGeneralTypeIssues]
60
+ max_value=pd.Timestamp(
61
+ 1900, 1, 1
62
+ ).to_pydatetime(), # pyright: ignore[reportGeneralTypeIssues]
63
+ timezones=st.one_of(st.none(), dateutil_timezones(), pytz_timezones()),
64
+ )
65
+
66
+ DATETIME_IN_PD_TIMESTAMP_RANGE_NO_TZ = st.datetimes(
67
+ min_value=pd.Timestamp.min.to_pydatetime(warn=False),
68
+ max_value=pd.Timestamp.max.to_pydatetime(warn=False),
69
+ )
70
+
71
+ INT_NEG_999_TO_POS_999 = st.integers(-999, 999)
72
+
73
+ # The strategy for each type is registered in conftest.py, as they don't carry
74
+ # enough runtime information (e.g. type hints) to infer how to build them.
75
+ YQM_OFFSET = st.one_of(
76
+ *map(
77
+ st.from_type,
78
+ [
79
+ MonthBegin,
80
+ MonthEnd,
81
+ BMonthBegin,
82
+ BMonthEnd,
83
+ QuarterBegin,
84
+ QuarterEnd,
85
+ BQuarterBegin,
86
+ BQuarterEnd,
87
+ YearBegin,
88
+ YearEnd,
89
+ BYearBegin,
90
+ BYearEnd,
91
+ ],
92
+ )
93
+ )
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/_testing/_io.py ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import gzip
4
+ import io
5
+ import pathlib
6
+ import tarfile
7
+ from typing import (
8
+ TYPE_CHECKING,
9
+ Any,
10
+ Callable,
11
+ )
12
+ import uuid
13
+ import zipfile
14
+
15
+ from pandas.compat import (
16
+ get_bz2_file,
17
+ get_lzma_file,
18
+ )
19
+ from pandas.compat._optional import import_optional_dependency
20
+
21
+ import pandas as pd
22
+ from pandas._testing.contexts import ensure_clean
23
+
24
+ if TYPE_CHECKING:
25
+ from pandas._typing import (
26
+ FilePath,
27
+ ReadPickleBuffer,
28
+ )
29
+
30
+ from pandas import (
31
+ DataFrame,
32
+ Series,
33
+ )
34
+
35
+ # ------------------------------------------------------------------
36
+ # File-IO
37
+
38
+
39
+ def round_trip_pickle(
40
+ obj: Any, path: FilePath | ReadPickleBuffer | None = None
41
+ ) -> DataFrame | Series:
42
+ """
43
+ Pickle an object and then read it again.
44
+
45
+ Parameters
46
+ ----------
47
+ obj : any object
48
+ The object to pickle and then re-read.
49
+ path : str, path object or file-like object, default None
50
+ The path where the pickled object is written and then read.
51
+
52
+ Returns
53
+ -------
54
+ pandas object
55
+ The original object that was pickled and then re-read.
56
+ """
57
+ _path = path
58
+ if _path is None:
59
+ _path = f"__{uuid.uuid4()}__.pickle"
60
+ with ensure_clean(_path) as temp_path:
61
+ pd.to_pickle(obj, temp_path)
62
+ return pd.read_pickle(temp_path)
63
+
64
+
65
+ def round_trip_pathlib(writer, reader, path: str | None = None):
66
+ """
67
+ Write an object to file specified by a pathlib.Path and read it back
68
+
69
+ Parameters
70
+ ----------
71
+ writer : callable bound to pandas object
72
+ IO writing function (e.g. DataFrame.to_csv )
73
+ reader : callable
74
+ IO reading function (e.g. pd.read_csv )
75
+ path : str, default None
76
+ The path where the object is written and then read.
77
+
78
+ Returns
79
+ -------
80
+ pandas object
81
+ The original object that was serialized and then re-read.
82
+ """
83
+ Path = pathlib.Path
84
+ if path is None:
85
+ path = "___pathlib___"
86
+ with ensure_clean(path) as path:
87
+ writer(Path(path)) # type: ignore[arg-type]
88
+ obj = reader(Path(path)) # type: ignore[arg-type]
89
+ return obj
90
+
91
+
92
+ def round_trip_localpath(writer, reader, path: str | None = None):
93
+ """
94
+ Write an object to file specified by a py.path LocalPath and read it back.
95
+
96
+ Parameters
97
+ ----------
98
+ writer : callable bound to pandas object
99
+ IO writing function (e.g. DataFrame.to_csv )
100
+ reader : callable
101
+ IO reading function (e.g. pd.read_csv )
102
+ path : str, default None
103
+ The path where the object is written and then read.
104
+
105
+ Returns
106
+ -------
107
+ pandas object
108
+ The original object that was serialized and then re-read.
109
+ """
110
+ import pytest
111
+
112
+ LocalPath = pytest.importorskip("py.path").local
113
+ if path is None:
114
+ path = "___localpath___"
115
+ with ensure_clean(path) as path:
116
+ writer(LocalPath(path))
117
+ obj = reader(LocalPath(path))
118
+ return obj
119
+
120
+
121
+ def write_to_compressed(compression, path, data, dest: str = "test") -> None:
122
+ """
123
+ Write data to a compressed file.
124
+
125
+ Parameters
126
+ ----------
127
+ compression : {'gzip', 'bz2', 'zip', 'xz', 'zstd'}
128
+ The compression type to use.
129
+ path : str
130
+ The file path to write the data.
131
+ data : str
132
+ The data to write.
133
+ dest : str, default "test"
134
+ The destination file (for ZIP only)
135
+
136
+ Raises
137
+ ------
138
+ ValueError : An invalid compression value was passed in.
139
+ """
140
+ args: tuple[Any, ...] = (data,)
141
+ mode = "wb"
142
+ method = "write"
143
+ compress_method: Callable
144
+
145
+ if compression == "zip":
146
+ compress_method = zipfile.ZipFile
147
+ mode = "w"
148
+ args = (dest, data)
149
+ method = "writestr"
150
+ elif compression == "tar":
151
+ compress_method = tarfile.TarFile
152
+ mode = "w"
153
+ file = tarfile.TarInfo(name=dest)
154
+ bytes = io.BytesIO(data)
155
+ file.size = len(data)
156
+ args = (file, bytes)
157
+ method = "addfile"
158
+ elif compression == "gzip":
159
+ compress_method = gzip.GzipFile
160
+ elif compression == "bz2":
161
+ compress_method = get_bz2_file()
162
+ elif compression == "zstd":
163
+ compress_method = import_optional_dependency("zstandard").open
164
+ elif compression == "xz":
165
+ compress_method = get_lzma_file()
166
+ else:
167
+ raise ValueError(f"Unrecognized compression type: {compression}")
168
+
169
+ with compress_method(path, mode=mode) as f:
170
+ getattr(f, method)(*args)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/_testing/_warnings.py ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from contextlib import (
4
+ contextmanager,
5
+ nullcontext,
6
+ )
7
+ import inspect
8
+ import re
9
+ import sys
10
+ from typing import (
11
+ TYPE_CHECKING,
12
+ Literal,
13
+ cast,
14
+ )
15
+ import warnings
16
+
17
+ from pandas.compat import PY311
18
+
19
+ if TYPE_CHECKING:
20
+ from collections.abc import (
21
+ Generator,
22
+ Sequence,
23
+ )
24
+
25
+
26
+ @contextmanager
27
+ def assert_produces_warning(
28
+ expected_warning: type[Warning] | bool | tuple[type[Warning], ...] | None = Warning,
29
+ filter_level: Literal[
30
+ "error", "ignore", "always", "default", "module", "once"
31
+ ] = "always",
32
+ check_stacklevel: bool = True,
33
+ raise_on_extra_warnings: bool = True,
34
+ match: str | None = None,
35
+ ) -> Generator[list[warnings.WarningMessage], None, None]:
36
+ """
37
+ Context manager for running code expected to either raise a specific warning,
38
+ multiple specific warnings, or not raise any warnings. Verifies that the code
39
+ raises the expected warning(s), and that it does not raise any other unexpected
40
+ warnings. It is basically a wrapper around ``warnings.catch_warnings``.
41
+
42
+ Parameters
43
+ ----------
44
+ expected_warning : {Warning, False, tuple[Warning, ...], None}, default Warning
45
+ The type of Exception raised. ``exception.Warning`` is the base
46
+ class for all warnings. To raise multiple types of exceptions,
47
+ pass them as a tuple. To check that no warning is returned,
48
+ specify ``False`` or ``None``.
49
+ filter_level : str or None, default "always"
50
+ Specifies whether warnings are ignored, displayed, or turned
51
+ into errors.
52
+ Valid values are:
53
+
54
+ * "error" - turns matching warnings into exceptions
55
+ * "ignore" - discard the warning
56
+ * "always" - always emit a warning
57
+ * "default" - print the warning the first time it is generated
58
+ from each location
59
+ * "module" - print the warning the first time it is generated
60
+ from each module
61
+ * "once" - print the warning the first time it is generated
62
+
63
+ check_stacklevel : bool, default True
64
+ If True, displays the line that called the function containing
65
+ the warning to show were the function is called. Otherwise, the
66
+ line that implements the function is displayed.
67
+ raise_on_extra_warnings : bool, default True
68
+ Whether extra warnings not of the type `expected_warning` should
69
+ cause the test to fail.
70
+ match : str, optional
71
+ Match warning message.
72
+
73
+ Examples
74
+ --------
75
+ >>> import warnings
76
+ >>> with assert_produces_warning():
77
+ ... warnings.warn(UserWarning())
78
+ ...
79
+ >>> with assert_produces_warning(False):
80
+ ... warnings.warn(RuntimeWarning())
81
+ ...
82
+ Traceback (most recent call last):
83
+ ...
84
+ AssertionError: Caused unexpected warning(s): ['RuntimeWarning'].
85
+ >>> with assert_produces_warning(UserWarning):
86
+ ... warnings.warn(RuntimeWarning())
87
+ Traceback (most recent call last):
88
+ ...
89
+ AssertionError: Did not see expected warning of class 'UserWarning'.
90
+
91
+ ..warn:: This is *not* thread-safe.
92
+ """
93
+ __tracebackhide__ = True
94
+
95
+ with warnings.catch_warnings(record=True) as w:
96
+ warnings.simplefilter(filter_level)
97
+ try:
98
+ yield w
99
+ finally:
100
+ if expected_warning:
101
+ expected_warning = cast(type[Warning], expected_warning)
102
+ _assert_caught_expected_warning(
103
+ caught_warnings=w,
104
+ expected_warning=expected_warning,
105
+ match=match,
106
+ check_stacklevel=check_stacklevel,
107
+ )
108
+ if raise_on_extra_warnings:
109
+ _assert_caught_no_extra_warnings(
110
+ caught_warnings=w,
111
+ expected_warning=expected_warning,
112
+ )
113
+
114
+
115
+ def maybe_produces_warning(warning: type[Warning], condition: bool, **kwargs):
116
+ """
117
+ Return a context manager that possibly checks a warning based on the condition
118
+ """
119
+ if condition:
120
+ return assert_produces_warning(warning, **kwargs)
121
+ else:
122
+ return nullcontext()
123
+
124
+
125
+ def _assert_caught_expected_warning(
126
+ *,
127
+ caught_warnings: Sequence[warnings.WarningMessage],
128
+ expected_warning: type[Warning],
129
+ match: str | None,
130
+ check_stacklevel: bool,
131
+ ) -> None:
132
+ """Assert that there was the expected warning among the caught warnings."""
133
+ saw_warning = False
134
+ matched_message = False
135
+ unmatched_messages = []
136
+
137
+ for actual_warning in caught_warnings:
138
+ if issubclass(actual_warning.category, expected_warning):
139
+ saw_warning = True
140
+
141
+ if check_stacklevel:
142
+ _assert_raised_with_correct_stacklevel(actual_warning)
143
+
144
+ if match is not None:
145
+ if re.search(match, str(actual_warning.message)):
146
+ matched_message = True
147
+ else:
148
+ unmatched_messages.append(actual_warning.message)
149
+
150
+ if not saw_warning:
151
+ raise AssertionError(
152
+ f"Did not see expected warning of class "
153
+ f"{repr(expected_warning.__name__)}"
154
+ )
155
+
156
+ if match and not matched_message:
157
+ raise AssertionError(
158
+ f"Did not see warning {repr(expected_warning.__name__)} "
159
+ f"matching '{match}'. The emitted warning messages are "
160
+ f"{unmatched_messages}"
161
+ )
162
+
163
+
164
+ def _assert_caught_no_extra_warnings(
165
+ *,
166
+ caught_warnings: Sequence[warnings.WarningMessage],
167
+ expected_warning: type[Warning] | bool | tuple[type[Warning], ...] | None,
168
+ ) -> None:
169
+ """Assert that no extra warnings apart from the expected ones are caught."""
170
+ extra_warnings = []
171
+
172
+ for actual_warning in caught_warnings:
173
+ if _is_unexpected_warning(actual_warning, expected_warning):
174
+ # GH#38630 pytest.filterwarnings does not suppress these.
175
+ if actual_warning.category == ResourceWarning:
176
+ # GH 44732: Don't make the CI flaky by filtering SSL-related
177
+ # ResourceWarning from dependencies
178
+ if "unclosed <ssl.SSLSocket" in str(actual_warning.message):
179
+ continue
180
+ # GH 44844: Matplotlib leaves font files open during the entire process
181
+ # upon import. Don't make CI flaky if ResourceWarning raised
182
+ # due to these open files.
183
+ if any("matplotlib" in mod for mod in sys.modules):
184
+ continue
185
+ if PY311 and actual_warning.category == EncodingWarning:
186
+ # EncodingWarnings are checked in the CI
187
+ # pyproject.toml errors on EncodingWarnings in pandas
188
+ # Ignore EncodingWarnings from other libraries
189
+ continue
190
+ extra_warnings.append(
191
+ (
192
+ actual_warning.category.__name__,
193
+ actual_warning.message,
194
+ actual_warning.filename,
195
+ actual_warning.lineno,
196
+ )
197
+ )
198
+
199
+ if extra_warnings:
200
+ raise AssertionError(f"Caused unexpected warning(s): {repr(extra_warnings)}")
201
+
202
+
203
+ def _is_unexpected_warning(
204
+ actual_warning: warnings.WarningMessage,
205
+ expected_warning: type[Warning] | bool | tuple[type[Warning], ...] | None,
206
+ ) -> bool:
207
+ """Check if the actual warning issued is unexpected."""
208
+ if actual_warning and not expected_warning:
209
+ return True
210
+ expected_warning = cast(type[Warning], expected_warning)
211
+ return bool(not issubclass(actual_warning.category, expected_warning))
212
+
213
+
214
+ def _assert_raised_with_correct_stacklevel(
215
+ actual_warning: warnings.WarningMessage,
216
+ ) -> None:
217
+ # https://stackoverflow.com/questions/17407119/python-inspect-stack-is-slow
218
+ frame = inspect.currentframe()
219
+ for _ in range(4):
220
+ frame = frame.f_back # type: ignore[union-attr]
221
+ try:
222
+ caller_filename = inspect.getfile(frame) # type: ignore[arg-type]
223
+ finally:
224
+ # See note in
225
+ # https://docs.python.org/3/library/inspect.html#inspect.Traceback
226
+ del frame
227
+ msg = (
228
+ "Warning not set with correct stacklevel. "
229
+ f"File where warning is raised: {actual_warning.filename} != "
230
+ f"{caller_filename}. Warning message: {actual_warning.message}"
231
+ )
232
+ assert actual_warning.filename == caller_filename, msg
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/_testing/asserters.py ADDED
@@ -0,0 +1,1435 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import operator
4
+ from typing import (
5
+ TYPE_CHECKING,
6
+ Literal,
7
+ NoReturn,
8
+ cast,
9
+ )
10
+
11
+ import numpy as np
12
+
13
+ from pandas._libs import lib
14
+ from pandas._libs.missing import is_matching_na
15
+ from pandas._libs.sparse import SparseIndex
16
+ import pandas._libs.testing as _testing
17
+ from pandas._libs.tslibs.np_datetime import compare_mismatched_resolutions
18
+
19
+ from pandas.core.dtypes.common import (
20
+ is_bool,
21
+ is_float_dtype,
22
+ is_integer_dtype,
23
+ is_number,
24
+ is_numeric_dtype,
25
+ needs_i8_conversion,
26
+ )
27
+ from pandas.core.dtypes.dtypes import (
28
+ CategoricalDtype,
29
+ DatetimeTZDtype,
30
+ ExtensionDtype,
31
+ NumpyEADtype,
32
+ )
33
+ from pandas.core.dtypes.missing import array_equivalent
34
+
35
+ import pandas as pd
36
+ from pandas import (
37
+ Categorical,
38
+ DataFrame,
39
+ DatetimeIndex,
40
+ Index,
41
+ IntervalDtype,
42
+ IntervalIndex,
43
+ MultiIndex,
44
+ PeriodIndex,
45
+ RangeIndex,
46
+ Series,
47
+ TimedeltaIndex,
48
+ )
49
+ from pandas.core.arrays import (
50
+ DatetimeArray,
51
+ ExtensionArray,
52
+ IntervalArray,
53
+ PeriodArray,
54
+ TimedeltaArray,
55
+ )
56
+ from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin
57
+ from pandas.core.arrays.string_ import StringDtype
58
+ from pandas.core.indexes.api import safe_sort_index
59
+
60
+ from pandas.io.formats.printing import pprint_thing
61
+
62
+ if TYPE_CHECKING:
63
+ from pandas._typing import DtypeObj
64
+
65
+
66
+ def assert_almost_equal(
67
+ left,
68
+ right,
69
+ check_dtype: bool | Literal["equiv"] = "equiv",
70
+ rtol: float = 1.0e-5,
71
+ atol: float = 1.0e-8,
72
+ **kwargs,
73
+ ) -> None:
74
+ """
75
+ Check that the left and right objects are approximately equal.
76
+
77
+ By approximately equal, we refer to objects that are numbers or that
78
+ contain numbers which may be equivalent to specific levels of precision.
79
+
80
+ Parameters
81
+ ----------
82
+ left : object
83
+ right : object
84
+ check_dtype : bool or {'equiv'}, default 'equiv'
85
+ Check dtype if both a and b are the same type. If 'equiv' is passed in,
86
+ then `RangeIndex` and `Index` with int64 dtype are also considered
87
+ equivalent when doing type checking.
88
+ rtol : float, default 1e-5
89
+ Relative tolerance.
90
+ atol : float, default 1e-8
91
+ Absolute tolerance.
92
+ """
93
+ if isinstance(left, Index):
94
+ assert_index_equal(
95
+ left,
96
+ right,
97
+ check_exact=False,
98
+ exact=check_dtype,
99
+ rtol=rtol,
100
+ atol=atol,
101
+ **kwargs,
102
+ )
103
+
104
+ elif isinstance(left, Series):
105
+ assert_series_equal(
106
+ left,
107
+ right,
108
+ check_exact=False,
109
+ check_dtype=check_dtype,
110
+ rtol=rtol,
111
+ atol=atol,
112
+ **kwargs,
113
+ )
114
+
115
+ elif isinstance(left, DataFrame):
116
+ assert_frame_equal(
117
+ left,
118
+ right,
119
+ check_exact=False,
120
+ check_dtype=check_dtype,
121
+ rtol=rtol,
122
+ atol=atol,
123
+ **kwargs,
124
+ )
125
+
126
+ else:
127
+ # Other sequences.
128
+ if check_dtype:
129
+ if is_number(left) and is_number(right):
130
+ # Do not compare numeric classes, like np.float64 and float.
131
+ pass
132
+ elif is_bool(left) and is_bool(right):
133
+ # Do not compare bool classes, like np.bool_ and bool.
134
+ pass
135
+ else:
136
+ if isinstance(left, np.ndarray) or isinstance(right, np.ndarray):
137
+ obj = "numpy array"
138
+ else:
139
+ obj = "Input"
140
+ assert_class_equal(left, right, obj=obj)
141
+
142
+ # if we have "equiv", this becomes True
143
+ _testing.assert_almost_equal(
144
+ left, right, check_dtype=bool(check_dtype), rtol=rtol, atol=atol, **kwargs
145
+ )
146
+
147
+
148
+ def _check_isinstance(left, right, cls) -> None:
149
+ """
150
+ Helper method for our assert_* methods that ensures that
151
+ the two objects being compared have the right type before
152
+ proceeding with the comparison.
153
+
154
+ Parameters
155
+ ----------
156
+ left : The first object being compared.
157
+ right : The second object being compared.
158
+ cls : The class type to check against.
159
+
160
+ Raises
161
+ ------
162
+ AssertionError : Either `left` or `right` is not an instance of `cls`.
163
+ """
164
+ cls_name = cls.__name__
165
+
166
+ if not isinstance(left, cls):
167
+ raise AssertionError(
168
+ f"{cls_name} Expected type {cls}, found {type(left)} instead"
169
+ )
170
+ if not isinstance(right, cls):
171
+ raise AssertionError(
172
+ f"{cls_name} Expected type {cls}, found {type(right)} instead"
173
+ )
174
+
175
+
176
+ def assert_dict_equal(left, right, compare_keys: bool = True) -> None:
177
+ _check_isinstance(left, right, dict)
178
+ _testing.assert_dict_equal(left, right, compare_keys=compare_keys)
179
+
180
+
181
+ def assert_index_equal(
182
+ left: Index,
183
+ right: Index,
184
+ exact: bool | str = "equiv",
185
+ check_names: bool = True,
186
+ check_exact: bool = True,
187
+ check_categorical: bool = True,
188
+ check_order: bool = True,
189
+ rtol: float = 1.0e-5,
190
+ atol: float = 1.0e-8,
191
+ obj: str = "Index",
192
+ ) -> None:
193
+ """
194
+ Check that left and right Index are equal.
195
+
196
+ Parameters
197
+ ----------
198
+ left : Index
199
+ right : Index
200
+ exact : bool or {'equiv'}, default 'equiv'
201
+ Whether to check the Index class, dtype and inferred_type
202
+ are identical. If 'equiv', then RangeIndex can be substituted for
203
+ Index with an int64 dtype as well.
204
+ check_names : bool, default True
205
+ Whether to check the names attribute.
206
+ check_exact : bool, default True
207
+ Whether to compare number exactly.
208
+ check_categorical : bool, default True
209
+ Whether to compare internal Categorical exactly.
210
+ check_order : bool, default True
211
+ Whether to compare the order of index entries as well as their values.
212
+ If True, both indexes must contain the same elements, in the same order.
213
+ If False, both indexes must contain the same elements, but in any order.
214
+ rtol : float, default 1e-5
215
+ Relative tolerance. Only used when check_exact is False.
216
+ atol : float, default 1e-8
217
+ Absolute tolerance. Only used when check_exact is False.
218
+ obj : str, default 'Index'
219
+ Specify object name being compared, internally used to show appropriate
220
+ assertion message.
221
+
222
+ Examples
223
+ --------
224
+ >>> from pandas import testing as tm
225
+ >>> a = pd.Index([1, 2, 3])
226
+ >>> b = pd.Index([1, 2, 3])
227
+ >>> tm.assert_index_equal(a, b)
228
+ """
229
+ __tracebackhide__ = True
230
+
231
+ def _check_types(left, right, obj: str = "Index") -> None:
232
+ if not exact:
233
+ return
234
+
235
+ assert_class_equal(left, right, exact=exact, obj=obj)
236
+ assert_attr_equal("inferred_type", left, right, obj=obj)
237
+
238
+ # Skip exact dtype checking when `check_categorical` is False
239
+ if isinstance(left.dtype, CategoricalDtype) and isinstance(
240
+ right.dtype, CategoricalDtype
241
+ ):
242
+ if check_categorical:
243
+ assert_attr_equal("dtype", left, right, obj=obj)
244
+ assert_index_equal(left.categories, right.categories, exact=exact)
245
+ return
246
+
247
+ assert_attr_equal("dtype", left, right, obj=obj)
248
+
249
+ # instance validation
250
+ _check_isinstance(left, right, Index)
251
+
252
+ # class / dtype comparison
253
+ _check_types(left, right, obj=obj)
254
+
255
+ # level comparison
256
+ if left.nlevels != right.nlevels:
257
+ msg1 = f"{obj} levels are different"
258
+ msg2 = f"{left.nlevels}, {left}"
259
+ msg3 = f"{right.nlevels}, {right}"
260
+ raise_assert_detail(obj, msg1, msg2, msg3)
261
+
262
+ # length comparison
263
+ if len(left) != len(right):
264
+ msg1 = f"{obj} length are different"
265
+ msg2 = f"{len(left)}, {left}"
266
+ msg3 = f"{len(right)}, {right}"
267
+ raise_assert_detail(obj, msg1, msg2, msg3)
268
+
269
+ # If order doesn't matter then sort the index entries
270
+ if not check_order:
271
+ left = safe_sort_index(left)
272
+ right = safe_sort_index(right)
273
+
274
+ # MultiIndex special comparison for little-friendly error messages
275
+ if isinstance(left, MultiIndex):
276
+ right = cast(MultiIndex, right)
277
+
278
+ for level in range(left.nlevels):
279
+ lobj = f"MultiIndex level [{level}]"
280
+ try:
281
+ # try comparison on levels/codes to avoid densifying MultiIndex
282
+ assert_index_equal(
283
+ left.levels[level],
284
+ right.levels[level],
285
+ exact=exact,
286
+ check_names=check_names,
287
+ check_exact=check_exact,
288
+ check_categorical=check_categorical,
289
+ rtol=rtol,
290
+ atol=atol,
291
+ obj=lobj,
292
+ )
293
+ assert_numpy_array_equal(left.codes[level], right.codes[level])
294
+ except AssertionError:
295
+ llevel = left.get_level_values(level)
296
+ rlevel = right.get_level_values(level)
297
+
298
+ assert_index_equal(
299
+ llevel,
300
+ rlevel,
301
+ exact=exact,
302
+ check_names=check_names,
303
+ check_exact=check_exact,
304
+ check_categorical=check_categorical,
305
+ rtol=rtol,
306
+ atol=atol,
307
+ obj=lobj,
308
+ )
309
+ # get_level_values may change dtype
310
+ _check_types(left.levels[level], right.levels[level], obj=obj)
311
+
312
+ # skip exact index checking when `check_categorical` is False
313
+ elif check_exact and check_categorical:
314
+ if not left.equals(right):
315
+ mismatch = left._values != right._values
316
+
317
+ if not isinstance(mismatch, np.ndarray):
318
+ mismatch = cast("ExtensionArray", mismatch).fillna(True)
319
+
320
+ diff = np.sum(mismatch.astype(int)) * 100.0 / len(left)
321
+ msg = f"{obj} values are different ({np.round(diff, 5)} %)"
322
+ raise_assert_detail(obj, msg, left, right)
323
+ else:
324
+ # if we have "equiv", this becomes True
325
+ exact_bool = bool(exact)
326
+ _testing.assert_almost_equal(
327
+ left.values,
328
+ right.values,
329
+ rtol=rtol,
330
+ atol=atol,
331
+ check_dtype=exact_bool,
332
+ obj=obj,
333
+ lobj=left,
334
+ robj=right,
335
+ )
336
+
337
+ # metadata comparison
338
+ if check_names:
339
+ assert_attr_equal("names", left, right, obj=obj)
340
+ if isinstance(left, PeriodIndex) or isinstance(right, PeriodIndex):
341
+ assert_attr_equal("dtype", left, right, obj=obj)
342
+ if isinstance(left, IntervalIndex) or isinstance(right, IntervalIndex):
343
+ assert_interval_array_equal(left._values, right._values)
344
+
345
+ if check_categorical:
346
+ if isinstance(left.dtype, CategoricalDtype) or isinstance(
347
+ right.dtype, CategoricalDtype
348
+ ):
349
+ assert_categorical_equal(left._values, right._values, obj=f"{obj} category")
350
+
351
+
352
+ def assert_class_equal(
353
+ left, right, exact: bool | str = True, obj: str = "Input"
354
+ ) -> None:
355
+ """
356
+ Checks classes are equal.
357
+ """
358
+ __tracebackhide__ = True
359
+
360
+ def repr_class(x):
361
+ if isinstance(x, Index):
362
+ # return Index as it is to include values in the error message
363
+ return x
364
+
365
+ return type(x).__name__
366
+
367
+ def is_class_equiv(idx: Index) -> bool:
368
+ """Classes that are a RangeIndex (sub-)instance or exactly an `Index` .
369
+
370
+ This only checks class equivalence. There is a separate check that the
371
+ dtype is int64.
372
+ """
373
+ return type(idx) is Index or isinstance(idx, RangeIndex)
374
+
375
+ if type(left) == type(right):
376
+ return
377
+
378
+ if exact == "equiv":
379
+ if is_class_equiv(left) and is_class_equiv(right):
380
+ return
381
+
382
+ msg = f"{obj} classes are different"
383
+ raise_assert_detail(obj, msg, repr_class(left), repr_class(right))
384
+
385
+
386
+ def assert_attr_equal(attr: str, left, right, obj: str = "Attributes") -> None:
387
+ """
388
+ Check attributes are equal. Both objects must have attribute.
389
+
390
+ Parameters
391
+ ----------
392
+ attr : str
393
+ Attribute name being compared.
394
+ left : object
395
+ right : object
396
+ obj : str, default 'Attributes'
397
+ Specify object name being compared, internally used to show appropriate
398
+ assertion message
399
+ """
400
+ __tracebackhide__ = True
401
+
402
+ left_attr = getattr(left, attr)
403
+ right_attr = getattr(right, attr)
404
+
405
+ if left_attr is right_attr or is_matching_na(left_attr, right_attr):
406
+ # e.g. both np.nan, both NaT, both pd.NA, ...
407
+ return None
408
+
409
+ try:
410
+ result = left_attr == right_attr
411
+ except TypeError:
412
+ # datetimetz on rhs may raise TypeError
413
+ result = False
414
+ if (left_attr is pd.NA) ^ (right_attr is pd.NA):
415
+ result = False
416
+ elif not isinstance(result, bool):
417
+ result = result.all()
418
+
419
+ if not result:
420
+ msg = f'Attribute "{attr}" are different'
421
+ raise_assert_detail(obj, msg, left_attr, right_attr)
422
+ return None
423
+
424
+
425
+ def assert_is_valid_plot_return_object(objs) -> None:
426
+ from matplotlib.artist import Artist
427
+ from matplotlib.axes import Axes
428
+
429
+ if isinstance(objs, (Series, np.ndarray)):
430
+ if isinstance(objs, Series):
431
+ objs = objs._values
432
+ for el in objs.ravel():
433
+ msg = (
434
+ "one of 'objs' is not a matplotlib Axes instance, "
435
+ f"type encountered {repr(type(el).__name__)}"
436
+ )
437
+ assert isinstance(el, (Axes, dict)), msg
438
+ else:
439
+ msg = (
440
+ "objs is neither an ndarray of Artist instances nor a single "
441
+ "ArtistArtist instance, tuple, or dict, 'objs' is a "
442
+ f"{repr(type(objs).__name__)}"
443
+ )
444
+ assert isinstance(objs, (Artist, tuple, dict)), msg
445
+
446
+
447
+ def assert_is_sorted(seq) -> None:
448
+ """Assert that the sequence is sorted."""
449
+ if isinstance(seq, (Index, Series)):
450
+ seq = seq.values
451
+ # sorting does not change precisions
452
+ if isinstance(seq, np.ndarray):
453
+ assert_numpy_array_equal(seq, np.sort(np.array(seq)))
454
+ else:
455
+ assert_extension_array_equal(seq, seq[seq.argsort()])
456
+
457
+
458
+ def assert_categorical_equal(
459
+ left,
460
+ right,
461
+ check_dtype: bool = True,
462
+ check_category_order: bool = True,
463
+ obj: str = "Categorical",
464
+ ) -> None:
465
+ """
466
+ Test that Categoricals are equivalent.
467
+
468
+ Parameters
469
+ ----------
470
+ left : Categorical
471
+ right : Categorical
472
+ check_dtype : bool, default True
473
+ Check that integer dtype of the codes are the same.
474
+ check_category_order : bool, default True
475
+ Whether the order of the categories should be compared, which
476
+ implies identical integer codes. If False, only the resulting
477
+ values are compared. The ordered attribute is
478
+ checked regardless.
479
+ obj : str, default 'Categorical'
480
+ Specify object name being compared, internally used to show appropriate
481
+ assertion message.
482
+ """
483
+ _check_isinstance(left, right, Categorical)
484
+
485
+ exact: bool | str
486
+ if isinstance(left.categories, RangeIndex) or isinstance(
487
+ right.categories, RangeIndex
488
+ ):
489
+ exact = "equiv"
490
+ else:
491
+ # We still want to require exact matches for Index
492
+ exact = True
493
+
494
+ if check_category_order:
495
+ assert_index_equal(
496
+ left.categories, right.categories, obj=f"{obj}.categories", exact=exact
497
+ )
498
+ assert_numpy_array_equal(
499
+ left.codes, right.codes, check_dtype=check_dtype, obj=f"{obj}.codes"
500
+ )
501
+ else:
502
+ try:
503
+ lc = left.categories.sort_values()
504
+ rc = right.categories.sort_values()
505
+ except TypeError:
506
+ # e.g. '<' not supported between instances of 'int' and 'str'
507
+ lc, rc = left.categories, right.categories
508
+ assert_index_equal(lc, rc, obj=f"{obj}.categories", exact=exact)
509
+ assert_index_equal(
510
+ left.categories.take(left.codes),
511
+ right.categories.take(right.codes),
512
+ obj=f"{obj}.values",
513
+ exact=exact,
514
+ )
515
+
516
+ assert_attr_equal("ordered", left, right, obj=obj)
517
+
518
+
519
+ def assert_interval_array_equal(
520
+ left, right, exact: bool | Literal["equiv"] = "equiv", obj: str = "IntervalArray"
521
+ ) -> None:
522
+ """
523
+ Test that two IntervalArrays are equivalent.
524
+
525
+ Parameters
526
+ ----------
527
+ left, right : IntervalArray
528
+ The IntervalArrays to compare.
529
+ exact : bool or {'equiv'}, default 'equiv'
530
+ Whether to check the Index class, dtype and inferred_type
531
+ are identical. If 'equiv', then RangeIndex can be substituted for
532
+ Index with an int64 dtype as well.
533
+ obj : str, default 'IntervalArray'
534
+ Specify object name being compared, internally used to show appropriate
535
+ assertion message
536
+ """
537
+ _check_isinstance(left, right, IntervalArray)
538
+
539
+ kwargs = {}
540
+ if left._left.dtype.kind in "mM":
541
+ # We have a DatetimeArray or TimedeltaArray
542
+ kwargs["check_freq"] = False
543
+
544
+ assert_equal(left._left, right._left, obj=f"{obj}.left", **kwargs)
545
+ assert_equal(left._right, right._right, obj=f"{obj}.left", **kwargs)
546
+
547
+ assert_attr_equal("closed", left, right, obj=obj)
548
+
549
+
550
+ def assert_period_array_equal(left, right, obj: str = "PeriodArray") -> None:
551
+ _check_isinstance(left, right, PeriodArray)
552
+
553
+ assert_numpy_array_equal(left._ndarray, right._ndarray, obj=f"{obj}._ndarray")
554
+ assert_attr_equal("dtype", left, right, obj=obj)
555
+
556
+
557
+ def assert_datetime_array_equal(
558
+ left, right, obj: str = "DatetimeArray", check_freq: bool = True
559
+ ) -> None:
560
+ __tracebackhide__ = True
561
+ _check_isinstance(left, right, DatetimeArray)
562
+
563
+ assert_numpy_array_equal(left._ndarray, right._ndarray, obj=f"{obj}._ndarray")
564
+ if check_freq:
565
+ assert_attr_equal("freq", left, right, obj=obj)
566
+ assert_attr_equal("tz", left, right, obj=obj)
567
+
568
+
569
+ def assert_timedelta_array_equal(
570
+ left, right, obj: str = "TimedeltaArray", check_freq: bool = True
571
+ ) -> None:
572
+ __tracebackhide__ = True
573
+ _check_isinstance(left, right, TimedeltaArray)
574
+ assert_numpy_array_equal(left._ndarray, right._ndarray, obj=f"{obj}._ndarray")
575
+ if check_freq:
576
+ assert_attr_equal("freq", left, right, obj=obj)
577
+
578
+
579
+ def raise_assert_detail(
580
+ obj, message, left, right, diff=None, first_diff=None, index_values=None
581
+ ) -> NoReturn:
582
+ __tracebackhide__ = True
583
+
584
+ msg = f"""{obj} are different
585
+
586
+ {message}"""
587
+
588
+ if isinstance(index_values, Index):
589
+ index_values = np.asarray(index_values)
590
+
591
+ if isinstance(index_values, np.ndarray):
592
+ msg += f"\n[index]: {pprint_thing(index_values)}"
593
+
594
+ if isinstance(left, np.ndarray):
595
+ left = pprint_thing(left)
596
+ elif isinstance(left, (CategoricalDtype, NumpyEADtype, StringDtype)):
597
+ left = repr(left)
598
+
599
+ if isinstance(right, np.ndarray):
600
+ right = pprint_thing(right)
601
+ elif isinstance(right, (CategoricalDtype, NumpyEADtype, StringDtype)):
602
+ right = repr(right)
603
+
604
+ msg += f"""
605
+ [left]: {left}
606
+ [right]: {right}"""
607
+
608
+ if diff is not None:
609
+ msg += f"\n[diff]: {diff}"
610
+
611
+ if first_diff is not None:
612
+ msg += f"\n{first_diff}"
613
+
614
+ raise AssertionError(msg)
615
+
616
+
617
+ def assert_numpy_array_equal(
618
+ left,
619
+ right,
620
+ strict_nan: bool = False,
621
+ check_dtype: bool | Literal["equiv"] = True,
622
+ err_msg=None,
623
+ check_same=None,
624
+ obj: str = "numpy array",
625
+ index_values=None,
626
+ ) -> None:
627
+ """
628
+ Check that 'np.ndarray' is equivalent.
629
+
630
+ Parameters
631
+ ----------
632
+ left, right : numpy.ndarray or iterable
633
+ The two arrays to be compared.
634
+ strict_nan : bool, default False
635
+ If True, consider NaN and None to be different.
636
+ check_dtype : bool, default True
637
+ Check dtype if both a and b are np.ndarray.
638
+ err_msg : str, default None
639
+ If provided, used as assertion message.
640
+ check_same : None|'copy'|'same', default None
641
+ Ensure left and right refer/do not refer to the same memory area.
642
+ obj : str, default 'numpy array'
643
+ Specify object name being compared, internally used to show appropriate
644
+ assertion message.
645
+ index_values : Index | numpy.ndarray, default None
646
+ optional index (shared by both left and right), used in output.
647
+ """
648
+ __tracebackhide__ = True
649
+
650
+ # instance validation
651
+ # Show a detailed error message when classes are different
652
+ assert_class_equal(left, right, obj=obj)
653
+ # both classes must be an np.ndarray
654
+ _check_isinstance(left, right, np.ndarray)
655
+
656
+ def _get_base(obj):
657
+ return obj.base if getattr(obj, "base", None) is not None else obj
658
+
659
+ left_base = _get_base(left)
660
+ right_base = _get_base(right)
661
+
662
+ if check_same == "same":
663
+ if left_base is not right_base:
664
+ raise AssertionError(f"{repr(left_base)} is not {repr(right_base)}")
665
+ elif check_same == "copy":
666
+ if left_base is right_base:
667
+ raise AssertionError(f"{repr(left_base)} is {repr(right_base)}")
668
+
669
+ def _raise(left, right, err_msg) -> NoReturn:
670
+ if err_msg is None:
671
+ if left.shape != right.shape:
672
+ raise_assert_detail(
673
+ obj, f"{obj} shapes are different", left.shape, right.shape
674
+ )
675
+
676
+ diff = 0
677
+ for left_arr, right_arr in zip(left, right):
678
+ # count up differences
679
+ if not array_equivalent(left_arr, right_arr, strict_nan=strict_nan):
680
+ diff += 1
681
+
682
+ diff = diff * 100.0 / left.size
683
+ msg = f"{obj} values are different ({np.round(diff, 5)} %)"
684
+ raise_assert_detail(obj, msg, left, right, index_values=index_values)
685
+
686
+ raise AssertionError(err_msg)
687
+
688
+ # compare shape and values
689
+ if not array_equivalent(left, right, strict_nan=strict_nan):
690
+ _raise(left, right, err_msg)
691
+
692
+ if check_dtype:
693
+ if isinstance(left, np.ndarray) and isinstance(right, np.ndarray):
694
+ assert_attr_equal("dtype", left, right, obj=obj)
695
+
696
+
697
+ def assert_extension_array_equal(
698
+ left,
699
+ right,
700
+ check_dtype: bool | Literal["equiv"] = True,
701
+ index_values=None,
702
+ check_exact: bool | lib.NoDefault = lib.no_default,
703
+ rtol: float | lib.NoDefault = lib.no_default,
704
+ atol: float | lib.NoDefault = lib.no_default,
705
+ obj: str = "ExtensionArray",
706
+ ) -> None:
707
+ """
708
+ Check that left and right ExtensionArrays are equal.
709
+
710
+ Parameters
711
+ ----------
712
+ left, right : ExtensionArray
713
+ The two arrays to compare.
714
+ check_dtype : bool, default True
715
+ Whether to check if the ExtensionArray dtypes are identical.
716
+ index_values : Index | numpy.ndarray, default None
717
+ Optional index (shared by both left and right), used in output.
718
+ check_exact : bool, default False
719
+ Whether to compare number exactly.
720
+
721
+ .. versionchanged:: 2.2.0
722
+
723
+ Defaults to True for integer dtypes if none of
724
+ ``check_exact``, ``rtol`` and ``atol`` are specified.
725
+ rtol : float, default 1e-5
726
+ Relative tolerance. Only used when check_exact is False.
727
+ atol : float, default 1e-8
728
+ Absolute tolerance. Only used when check_exact is False.
729
+ obj : str, default 'ExtensionArray'
730
+ Specify object name being compared, internally used to show appropriate
731
+ assertion message.
732
+
733
+ .. versionadded:: 2.0.0
734
+
735
+ Notes
736
+ -----
737
+ Missing values are checked separately from valid values.
738
+ A mask of missing values is computed for each and checked to match.
739
+ The remaining all-valid values are cast to object dtype and checked.
740
+
741
+ Examples
742
+ --------
743
+ >>> from pandas import testing as tm
744
+ >>> a = pd.Series([1, 2, 3, 4])
745
+ >>> b, c = a.array, a.array
746
+ >>> tm.assert_extension_array_equal(b, c)
747
+ """
748
+ if (
749
+ check_exact is lib.no_default
750
+ and rtol is lib.no_default
751
+ and atol is lib.no_default
752
+ ):
753
+ check_exact = (
754
+ is_numeric_dtype(left.dtype)
755
+ and not is_float_dtype(left.dtype)
756
+ or is_numeric_dtype(right.dtype)
757
+ and not is_float_dtype(right.dtype)
758
+ )
759
+ elif check_exact is lib.no_default:
760
+ check_exact = False
761
+
762
+ rtol = rtol if rtol is not lib.no_default else 1.0e-5
763
+ atol = atol if atol is not lib.no_default else 1.0e-8
764
+
765
+ assert isinstance(left, ExtensionArray), "left is not an ExtensionArray"
766
+ assert isinstance(right, ExtensionArray), "right is not an ExtensionArray"
767
+ if check_dtype:
768
+ assert_attr_equal("dtype", left, right, obj=f"Attributes of {obj}")
769
+
770
+ if (
771
+ isinstance(left, DatetimeLikeArrayMixin)
772
+ and isinstance(right, DatetimeLikeArrayMixin)
773
+ and type(right) == type(left)
774
+ ):
775
+ # GH 52449
776
+ if not check_dtype and left.dtype.kind in "mM":
777
+ if not isinstance(left.dtype, np.dtype):
778
+ l_unit = cast(DatetimeTZDtype, left.dtype).unit
779
+ else:
780
+ l_unit = np.datetime_data(left.dtype)[0]
781
+ if not isinstance(right.dtype, np.dtype):
782
+ r_unit = cast(DatetimeTZDtype, right.dtype).unit
783
+ else:
784
+ r_unit = np.datetime_data(right.dtype)[0]
785
+ if (
786
+ l_unit != r_unit
787
+ and compare_mismatched_resolutions(
788
+ left._ndarray, right._ndarray, operator.eq
789
+ ).all()
790
+ ):
791
+ return
792
+ # Avoid slow object-dtype comparisons
793
+ # np.asarray for case where we have a np.MaskedArray
794
+ assert_numpy_array_equal(
795
+ np.asarray(left.asi8),
796
+ np.asarray(right.asi8),
797
+ index_values=index_values,
798
+ obj=obj,
799
+ )
800
+ return
801
+
802
+ left_na = np.asarray(left.isna())
803
+ right_na = np.asarray(right.isna())
804
+ assert_numpy_array_equal(
805
+ left_na, right_na, obj=f"{obj} NA mask", index_values=index_values
806
+ )
807
+
808
+ left_valid = left[~left_na].to_numpy(dtype=object)
809
+ right_valid = right[~right_na].to_numpy(dtype=object)
810
+ if check_exact:
811
+ assert_numpy_array_equal(
812
+ left_valid, right_valid, obj=obj, index_values=index_values
813
+ )
814
+ else:
815
+ _testing.assert_almost_equal(
816
+ left_valid,
817
+ right_valid,
818
+ check_dtype=bool(check_dtype),
819
+ rtol=rtol,
820
+ atol=atol,
821
+ obj=obj,
822
+ index_values=index_values,
823
+ )
824
+
825
+
826
+ # This could be refactored to use the NDFrame.equals method
827
+ def assert_series_equal(
828
+ left,
829
+ right,
830
+ check_dtype: bool | Literal["equiv"] = True,
831
+ check_index_type: bool | Literal["equiv"] = "equiv",
832
+ check_series_type: bool = True,
833
+ check_names: bool = True,
834
+ check_exact: bool | lib.NoDefault = lib.no_default,
835
+ check_datetimelike_compat: bool = False,
836
+ check_categorical: bool = True,
837
+ check_category_order: bool = True,
838
+ check_freq: bool = True,
839
+ check_flags: bool = True,
840
+ rtol: float | lib.NoDefault = lib.no_default,
841
+ atol: float | lib.NoDefault = lib.no_default,
842
+ obj: str = "Series",
843
+ *,
844
+ check_index: bool = True,
845
+ check_like: bool = False,
846
+ ) -> None:
847
+ """
848
+ Check that left and right Series are equal.
849
+
850
+ Parameters
851
+ ----------
852
+ left : Series
853
+ right : Series
854
+ check_dtype : bool, default True
855
+ Whether to check the Series dtype is identical.
856
+ check_index_type : bool or {'equiv'}, default 'equiv'
857
+ Whether to check the Index class, dtype and inferred_type
858
+ are identical.
859
+ check_series_type : bool, default True
860
+ Whether to check the Series class is identical.
861
+ check_names : bool, default True
862
+ Whether to check the Series and Index names attribute.
863
+ check_exact : bool, default False
864
+ Whether to compare number exactly.
865
+
866
+ .. versionchanged:: 2.2.0
867
+
868
+ Defaults to True for integer dtypes if none of
869
+ ``check_exact``, ``rtol`` and ``atol`` are specified.
870
+ check_datetimelike_compat : bool, default False
871
+ Compare datetime-like which is comparable ignoring dtype.
872
+ check_categorical : bool, default True
873
+ Whether to compare internal Categorical exactly.
874
+ check_category_order : bool, default True
875
+ Whether to compare category order of internal Categoricals.
876
+ check_freq : bool, default True
877
+ Whether to check the `freq` attribute on a DatetimeIndex or TimedeltaIndex.
878
+ check_flags : bool, default True
879
+ Whether to check the `flags` attribute.
880
+ rtol : float, default 1e-5
881
+ Relative tolerance. Only used when check_exact is False.
882
+ atol : float, default 1e-8
883
+ Absolute tolerance. Only used when check_exact is False.
884
+ obj : str, default 'Series'
885
+ Specify object name being compared, internally used to show appropriate
886
+ assertion message.
887
+ check_index : bool, default True
888
+ Whether to check index equivalence. If False, then compare only values.
889
+
890
+ .. versionadded:: 1.3.0
891
+ check_like : bool, default False
892
+ If True, ignore the order of the index. Must be False if check_index is False.
893
+ Note: same labels must be with the same data.
894
+
895
+ .. versionadded:: 1.5.0
896
+
897
+ Examples
898
+ --------
899
+ >>> from pandas import testing as tm
900
+ >>> a = pd.Series([1, 2, 3, 4])
901
+ >>> b = pd.Series([1, 2, 3, 4])
902
+ >>> tm.assert_series_equal(a, b)
903
+ """
904
+ __tracebackhide__ = True
905
+ check_exact_index = False if check_exact is lib.no_default else check_exact
906
+ if (
907
+ check_exact is lib.no_default
908
+ and rtol is lib.no_default
909
+ and atol is lib.no_default
910
+ ):
911
+ check_exact = (
912
+ is_numeric_dtype(left.dtype)
913
+ and not is_float_dtype(left.dtype)
914
+ or is_numeric_dtype(right.dtype)
915
+ and not is_float_dtype(right.dtype)
916
+ )
917
+ elif check_exact is lib.no_default:
918
+ check_exact = False
919
+
920
+ rtol = rtol if rtol is not lib.no_default else 1.0e-5
921
+ atol = atol if atol is not lib.no_default else 1.0e-8
922
+
923
+ if not check_index and check_like:
924
+ raise ValueError("check_like must be False if check_index is False")
925
+
926
+ # instance validation
927
+ _check_isinstance(left, right, Series)
928
+
929
+ if check_series_type:
930
+ assert_class_equal(left, right, obj=obj)
931
+
932
+ # length comparison
933
+ if len(left) != len(right):
934
+ msg1 = f"{len(left)}, {left.index}"
935
+ msg2 = f"{len(right)}, {right.index}"
936
+ raise_assert_detail(obj, "Series length are different", msg1, msg2)
937
+
938
+ if check_flags:
939
+ assert left.flags == right.flags, f"{repr(left.flags)} != {repr(right.flags)}"
940
+
941
+ if check_index:
942
+ # GH #38183
943
+ assert_index_equal(
944
+ left.index,
945
+ right.index,
946
+ exact=check_index_type,
947
+ check_names=check_names,
948
+ check_exact=check_exact_index,
949
+ check_categorical=check_categorical,
950
+ check_order=not check_like,
951
+ rtol=rtol,
952
+ atol=atol,
953
+ obj=f"{obj}.index",
954
+ )
955
+
956
+ if check_like:
957
+ left = left.reindex_like(right)
958
+
959
+ if check_freq and isinstance(left.index, (DatetimeIndex, TimedeltaIndex)):
960
+ lidx = left.index
961
+ ridx = right.index
962
+ assert lidx.freq == ridx.freq, (lidx.freq, ridx.freq)
963
+
964
+ if check_dtype:
965
+ # We want to skip exact dtype checking when `check_categorical`
966
+ # is False. We'll still raise if only one is a `Categorical`,
967
+ # regardless of `check_categorical`
968
+ if (
969
+ isinstance(left.dtype, CategoricalDtype)
970
+ and isinstance(right.dtype, CategoricalDtype)
971
+ and not check_categorical
972
+ ):
973
+ pass
974
+ else:
975
+ assert_attr_equal("dtype", left, right, obj=f"Attributes of {obj}")
976
+ if check_exact:
977
+ left_values = left._values
978
+ right_values = right._values
979
+ # Only check exact if dtype is numeric
980
+ if isinstance(left_values, ExtensionArray) and isinstance(
981
+ right_values, ExtensionArray
982
+ ):
983
+ assert_extension_array_equal(
984
+ left_values,
985
+ right_values,
986
+ check_dtype=check_dtype,
987
+ index_values=left.index,
988
+ obj=str(obj),
989
+ )
990
+ else:
991
+ # convert both to NumPy if not, check_dtype would raise earlier
992
+ lv, rv = left_values, right_values
993
+ if isinstance(left_values, ExtensionArray):
994
+ lv = left_values.to_numpy()
995
+ if isinstance(right_values, ExtensionArray):
996
+ rv = right_values.to_numpy()
997
+ assert_numpy_array_equal(
998
+ lv,
999
+ rv,
1000
+ check_dtype=check_dtype,
1001
+ obj=str(obj),
1002
+ index_values=left.index,
1003
+ )
1004
+ elif check_datetimelike_compat and (
1005
+ needs_i8_conversion(left.dtype) or needs_i8_conversion(right.dtype)
1006
+ ):
1007
+ # we want to check only if we have compat dtypes
1008
+ # e.g. integer and M|m are NOT compat, but we can simply check
1009
+ # the values in that case
1010
+
1011
+ # datetimelike may have different objects (e.g. datetime.datetime
1012
+ # vs Timestamp) but will compare equal
1013
+ if not Index(left._values).equals(Index(right._values)):
1014
+ msg = (
1015
+ f"[datetimelike_compat=True] {left._values} "
1016
+ f"is not equal to {right._values}."
1017
+ )
1018
+ raise AssertionError(msg)
1019
+ elif isinstance(left.dtype, IntervalDtype) and isinstance(
1020
+ right.dtype, IntervalDtype
1021
+ ):
1022
+ assert_interval_array_equal(left.array, right.array)
1023
+ elif isinstance(left.dtype, CategoricalDtype) or isinstance(
1024
+ right.dtype, CategoricalDtype
1025
+ ):
1026
+ _testing.assert_almost_equal(
1027
+ left._values,
1028
+ right._values,
1029
+ rtol=rtol,
1030
+ atol=atol,
1031
+ check_dtype=bool(check_dtype),
1032
+ obj=str(obj),
1033
+ index_values=left.index,
1034
+ )
1035
+ elif isinstance(left.dtype, ExtensionDtype) and isinstance(
1036
+ right.dtype, ExtensionDtype
1037
+ ):
1038
+ assert_extension_array_equal(
1039
+ left._values,
1040
+ right._values,
1041
+ rtol=rtol,
1042
+ atol=atol,
1043
+ check_dtype=check_dtype,
1044
+ index_values=left.index,
1045
+ obj=str(obj),
1046
+ )
1047
+ elif is_extension_array_dtype_and_needs_i8_conversion(
1048
+ left.dtype, right.dtype
1049
+ ) or is_extension_array_dtype_and_needs_i8_conversion(right.dtype, left.dtype):
1050
+ assert_extension_array_equal(
1051
+ left._values,
1052
+ right._values,
1053
+ check_dtype=check_dtype,
1054
+ index_values=left.index,
1055
+ obj=str(obj),
1056
+ )
1057
+ elif needs_i8_conversion(left.dtype) and needs_i8_conversion(right.dtype):
1058
+ # DatetimeArray or TimedeltaArray
1059
+ assert_extension_array_equal(
1060
+ left._values,
1061
+ right._values,
1062
+ check_dtype=check_dtype,
1063
+ index_values=left.index,
1064
+ obj=str(obj),
1065
+ )
1066
+ else:
1067
+ _testing.assert_almost_equal(
1068
+ left._values,
1069
+ right._values,
1070
+ rtol=rtol,
1071
+ atol=atol,
1072
+ check_dtype=bool(check_dtype),
1073
+ obj=str(obj),
1074
+ index_values=left.index,
1075
+ )
1076
+
1077
+ # metadata comparison
1078
+ if check_names:
1079
+ assert_attr_equal("name", left, right, obj=obj)
1080
+
1081
+ if check_categorical:
1082
+ if isinstance(left.dtype, CategoricalDtype) or isinstance(
1083
+ right.dtype, CategoricalDtype
1084
+ ):
1085
+ assert_categorical_equal(
1086
+ left._values,
1087
+ right._values,
1088
+ obj=f"{obj} category",
1089
+ check_category_order=check_category_order,
1090
+ )
1091
+
1092
+
1093
+ # This could be refactored to use the NDFrame.equals method
1094
+ def assert_frame_equal(
1095
+ left,
1096
+ right,
1097
+ check_dtype: bool | Literal["equiv"] = True,
1098
+ check_index_type: bool | Literal["equiv"] = "equiv",
1099
+ check_column_type: bool | Literal["equiv"] = "equiv",
1100
+ check_frame_type: bool = True,
1101
+ check_names: bool = True,
1102
+ by_blocks: bool = False,
1103
+ check_exact: bool | lib.NoDefault = lib.no_default,
1104
+ check_datetimelike_compat: bool = False,
1105
+ check_categorical: bool = True,
1106
+ check_like: bool = False,
1107
+ check_freq: bool = True,
1108
+ check_flags: bool = True,
1109
+ rtol: float | lib.NoDefault = lib.no_default,
1110
+ atol: float | lib.NoDefault = lib.no_default,
1111
+ obj: str = "DataFrame",
1112
+ ) -> None:
1113
+ """
1114
+ Check that left and right DataFrame are equal.
1115
+
1116
+ This function is intended to compare two DataFrames and output any
1117
+ differences. It is mostly intended for use in unit tests.
1118
+ Additional parameters allow varying the strictness of the
1119
+ equality checks performed.
1120
+
1121
+ Parameters
1122
+ ----------
1123
+ left : DataFrame
1124
+ First DataFrame to compare.
1125
+ right : DataFrame
1126
+ Second DataFrame to compare.
1127
+ check_dtype : bool, default True
1128
+ Whether to check the DataFrame dtype is identical.
1129
+ check_index_type : bool or {'equiv'}, default 'equiv'
1130
+ Whether to check the Index class, dtype and inferred_type
1131
+ are identical.
1132
+ check_column_type : bool or {'equiv'}, default 'equiv'
1133
+ Whether to check the columns class, dtype and inferred_type
1134
+ are identical. Is passed as the ``exact`` argument of
1135
+ :func:`assert_index_equal`.
1136
+ check_frame_type : bool, default True
1137
+ Whether to check the DataFrame class is identical.
1138
+ check_names : bool, default True
1139
+ Whether to check that the `names` attribute for both the `index`
1140
+ and `column` attributes of the DataFrame is identical.
1141
+ by_blocks : bool, default False
1142
+ Specify how to compare internal data. If False, compare by columns.
1143
+ If True, compare by blocks.
1144
+ check_exact : bool, default False
1145
+ Whether to compare number exactly.
1146
+
1147
+ .. versionchanged:: 2.2.0
1148
+
1149
+ Defaults to True for integer dtypes if none of
1150
+ ``check_exact``, ``rtol`` and ``atol`` are specified.
1151
+ check_datetimelike_compat : bool, default False
1152
+ Compare datetime-like which is comparable ignoring dtype.
1153
+ check_categorical : bool, default True
1154
+ Whether to compare internal Categorical exactly.
1155
+ check_like : bool, default False
1156
+ If True, ignore the order of index & columns.
1157
+ Note: index labels must match their respective rows
1158
+ (same as in columns) - same labels must be with the same data.
1159
+ check_freq : bool, default True
1160
+ Whether to check the `freq` attribute on a DatetimeIndex or TimedeltaIndex.
1161
+ check_flags : bool, default True
1162
+ Whether to check the `flags` attribute.
1163
+ rtol : float, default 1e-5
1164
+ Relative tolerance. Only used when check_exact is False.
1165
+ atol : float, default 1e-8
1166
+ Absolute tolerance. Only used when check_exact is False.
1167
+ obj : str, default 'DataFrame'
1168
+ Specify object name being compared, internally used to show appropriate
1169
+ assertion message.
1170
+
1171
+ See Also
1172
+ --------
1173
+ assert_series_equal : Equivalent method for asserting Series equality.
1174
+ DataFrame.equals : Check DataFrame equality.
1175
+
1176
+ Examples
1177
+ --------
1178
+ This example shows comparing two DataFrames that are equal
1179
+ but with columns of differing dtypes.
1180
+
1181
+ >>> from pandas.testing import assert_frame_equal
1182
+ >>> df1 = pd.DataFrame({'a': [1, 2], 'b': [3, 4]})
1183
+ >>> df2 = pd.DataFrame({'a': [1, 2], 'b': [3.0, 4.0]})
1184
+
1185
+ df1 equals itself.
1186
+
1187
+ >>> assert_frame_equal(df1, df1)
1188
+
1189
+ df1 differs from df2 as column 'b' is of a different type.
1190
+
1191
+ >>> assert_frame_equal(df1, df2)
1192
+ Traceback (most recent call last):
1193
+ ...
1194
+ AssertionError: Attributes of DataFrame.iloc[:, 1] (column name="b") are different
1195
+
1196
+ Attribute "dtype" are different
1197
+ [left]: int64
1198
+ [right]: float64
1199
+
1200
+ Ignore differing dtypes in columns with check_dtype.
1201
+
1202
+ >>> assert_frame_equal(df1, df2, check_dtype=False)
1203
+ """
1204
+ __tracebackhide__ = True
1205
+ _rtol = rtol if rtol is not lib.no_default else 1.0e-5
1206
+ _atol = atol if atol is not lib.no_default else 1.0e-8
1207
+ _check_exact = check_exact if check_exact is not lib.no_default else False
1208
+
1209
+ # instance validation
1210
+ _check_isinstance(left, right, DataFrame)
1211
+
1212
+ if check_frame_type:
1213
+ assert isinstance(left, type(right))
1214
+ # assert_class_equal(left, right, obj=obj)
1215
+
1216
+ # shape comparison
1217
+ if left.shape != right.shape:
1218
+ raise_assert_detail(
1219
+ obj, f"{obj} shape mismatch", f"{repr(left.shape)}", f"{repr(right.shape)}"
1220
+ )
1221
+
1222
+ if check_flags:
1223
+ assert left.flags == right.flags, f"{repr(left.flags)} != {repr(right.flags)}"
1224
+
1225
+ # index comparison
1226
+ assert_index_equal(
1227
+ left.index,
1228
+ right.index,
1229
+ exact=check_index_type,
1230
+ check_names=check_names,
1231
+ check_exact=_check_exact,
1232
+ check_categorical=check_categorical,
1233
+ check_order=not check_like,
1234
+ rtol=_rtol,
1235
+ atol=_atol,
1236
+ obj=f"{obj}.index",
1237
+ )
1238
+
1239
+ # column comparison
1240
+ assert_index_equal(
1241
+ left.columns,
1242
+ right.columns,
1243
+ exact=check_column_type,
1244
+ check_names=check_names,
1245
+ check_exact=_check_exact,
1246
+ check_categorical=check_categorical,
1247
+ check_order=not check_like,
1248
+ rtol=_rtol,
1249
+ atol=_atol,
1250
+ obj=f"{obj}.columns",
1251
+ )
1252
+
1253
+ if check_like:
1254
+ left = left.reindex_like(right)
1255
+
1256
+ # compare by blocks
1257
+ if by_blocks:
1258
+ rblocks = right._to_dict_of_blocks()
1259
+ lblocks = left._to_dict_of_blocks()
1260
+ for dtype in list(set(list(lblocks.keys()) + list(rblocks.keys()))):
1261
+ assert dtype in lblocks
1262
+ assert dtype in rblocks
1263
+ assert_frame_equal(
1264
+ lblocks[dtype], rblocks[dtype], check_dtype=check_dtype, obj=obj
1265
+ )
1266
+
1267
+ # compare by columns
1268
+ else:
1269
+ for i, col in enumerate(left.columns):
1270
+ # We have already checked that columns match, so we can do
1271
+ # fast location-based lookups
1272
+ lcol = left._ixs(i, axis=1)
1273
+ rcol = right._ixs(i, axis=1)
1274
+
1275
+ # GH #38183
1276
+ # use check_index=False, because we do not want to run
1277
+ # assert_index_equal for each column,
1278
+ # as we already checked it for the whole dataframe before.
1279
+ assert_series_equal(
1280
+ lcol,
1281
+ rcol,
1282
+ check_dtype=check_dtype,
1283
+ check_index_type=check_index_type,
1284
+ check_exact=check_exact,
1285
+ check_names=check_names,
1286
+ check_datetimelike_compat=check_datetimelike_compat,
1287
+ check_categorical=check_categorical,
1288
+ check_freq=check_freq,
1289
+ obj=f'{obj}.iloc[:, {i}] (column name="{col}")',
1290
+ rtol=rtol,
1291
+ atol=atol,
1292
+ check_index=False,
1293
+ check_flags=False,
1294
+ )
1295
+
1296
+
1297
+ def assert_equal(left, right, **kwargs) -> None:
1298
+ """
1299
+ Wrapper for tm.assert_*_equal to dispatch to the appropriate test function.
1300
+
1301
+ Parameters
1302
+ ----------
1303
+ left, right : Index, Series, DataFrame, ExtensionArray, or np.ndarray
1304
+ The two items to be compared.
1305
+ **kwargs
1306
+ All keyword arguments are passed through to the underlying assert method.
1307
+ """
1308
+ __tracebackhide__ = True
1309
+
1310
+ if isinstance(left, Index):
1311
+ assert_index_equal(left, right, **kwargs)
1312
+ if isinstance(left, (DatetimeIndex, TimedeltaIndex)):
1313
+ assert left.freq == right.freq, (left.freq, right.freq)
1314
+ elif isinstance(left, Series):
1315
+ assert_series_equal(left, right, **kwargs)
1316
+ elif isinstance(left, DataFrame):
1317
+ assert_frame_equal(left, right, **kwargs)
1318
+ elif isinstance(left, IntervalArray):
1319
+ assert_interval_array_equal(left, right, **kwargs)
1320
+ elif isinstance(left, PeriodArray):
1321
+ assert_period_array_equal(left, right, **kwargs)
1322
+ elif isinstance(left, DatetimeArray):
1323
+ assert_datetime_array_equal(left, right, **kwargs)
1324
+ elif isinstance(left, TimedeltaArray):
1325
+ assert_timedelta_array_equal(left, right, **kwargs)
1326
+ elif isinstance(left, ExtensionArray):
1327
+ assert_extension_array_equal(left, right, **kwargs)
1328
+ elif isinstance(left, np.ndarray):
1329
+ assert_numpy_array_equal(left, right, **kwargs)
1330
+ elif isinstance(left, str):
1331
+ assert kwargs == {}
1332
+ assert left == right
1333
+ else:
1334
+ assert kwargs == {}
1335
+ assert_almost_equal(left, right)
1336
+
1337
+
1338
+ def assert_sp_array_equal(left, right) -> None:
1339
+ """
1340
+ Check that the left and right SparseArray are equal.
1341
+
1342
+ Parameters
1343
+ ----------
1344
+ left : SparseArray
1345
+ right : SparseArray
1346
+ """
1347
+ _check_isinstance(left, right, pd.arrays.SparseArray)
1348
+
1349
+ assert_numpy_array_equal(left.sp_values, right.sp_values)
1350
+
1351
+ # SparseIndex comparison
1352
+ assert isinstance(left.sp_index, SparseIndex)
1353
+ assert isinstance(right.sp_index, SparseIndex)
1354
+
1355
+ left_index = left.sp_index
1356
+ right_index = right.sp_index
1357
+
1358
+ if not left_index.equals(right_index):
1359
+ raise_assert_detail(
1360
+ "SparseArray.index", "index are not equal", left_index, right_index
1361
+ )
1362
+ else:
1363
+ # Just ensure a
1364
+ pass
1365
+
1366
+ assert_attr_equal("fill_value", left, right)
1367
+ assert_attr_equal("dtype", left, right)
1368
+ assert_numpy_array_equal(left.to_dense(), right.to_dense())
1369
+
1370
+
1371
+ def assert_contains_all(iterable, dic) -> None:
1372
+ for k in iterable:
1373
+ assert k in dic, f"Did not contain item: {repr(k)}"
1374
+
1375
+
1376
+ def assert_copy(iter1, iter2, **eql_kwargs) -> None:
1377
+ """
1378
+ iter1, iter2: iterables that produce elements
1379
+ comparable with assert_almost_equal
1380
+
1381
+ Checks that the elements are equal, but not
1382
+ the same object. (Does not check that items
1383
+ in sequences are also not the same object)
1384
+ """
1385
+ for elem1, elem2 in zip(iter1, iter2):
1386
+ assert_almost_equal(elem1, elem2, **eql_kwargs)
1387
+ msg = (
1388
+ f"Expected object {repr(type(elem1))} and object {repr(type(elem2))} to be "
1389
+ "different objects, but they were the same object."
1390
+ )
1391
+ assert elem1 is not elem2, msg
1392
+
1393
+
1394
+ def is_extension_array_dtype_and_needs_i8_conversion(
1395
+ left_dtype: DtypeObj, right_dtype: DtypeObj
1396
+ ) -> bool:
1397
+ """
1398
+ Checks that we have the combination of an ExtensionArraydtype and
1399
+ a dtype that should be converted to int64
1400
+
1401
+ Returns
1402
+ -------
1403
+ bool
1404
+
1405
+ Related to issue #37609
1406
+ """
1407
+ return isinstance(left_dtype, ExtensionDtype) and needs_i8_conversion(right_dtype)
1408
+
1409
+
1410
+ def assert_indexing_slices_equivalent(ser: Series, l_slc: slice, i_slc: slice) -> None:
1411
+ """
1412
+ Check that ser.iloc[i_slc] matches ser.loc[l_slc] and, if applicable,
1413
+ ser[l_slc].
1414
+ """
1415
+ expected = ser.iloc[i_slc]
1416
+
1417
+ assert_series_equal(ser.loc[l_slc], expected)
1418
+
1419
+ if not is_integer_dtype(ser.index):
1420
+ # For integer indices, .loc and plain getitem are position-based.
1421
+ assert_series_equal(ser[l_slc], expected)
1422
+
1423
+
1424
+ def assert_metadata_equivalent(
1425
+ left: DataFrame | Series, right: DataFrame | Series | None = None
1426
+ ) -> None:
1427
+ """
1428
+ Check that ._metadata attributes are equivalent.
1429
+ """
1430
+ for attr in left._metadata:
1431
+ val = getattr(left, attr, None)
1432
+ if right is None:
1433
+ assert val is None
1434
+ else:
1435
+ assert val == getattr(right, attr, None)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/_testing/compat.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Helpers for sharing tests between DataFrame/Series
3
+ """
4
+ from __future__ import annotations
5
+
6
+ from typing import TYPE_CHECKING
7
+
8
+ from pandas import DataFrame
9
+
10
+ if TYPE_CHECKING:
11
+ from pandas._typing import DtypeObj
12
+
13
+
14
+ def get_dtype(obj) -> DtypeObj:
15
+ if isinstance(obj, DataFrame):
16
+ # Note: we are assuming only one column
17
+ return obj.dtypes.iat[0]
18
+ else:
19
+ return obj.dtype
20
+
21
+
22
+ def get_obj(df: DataFrame, klass):
23
+ """
24
+ For sharing tests using frame_or_series, either return the DataFrame
25
+ unchanged or return it's first column as a Series.
26
+ """
27
+ if klass is DataFrame:
28
+ return df
29
+ return df._ixs(0, axis=1)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/_testing/contexts.py ADDED
@@ -0,0 +1,257 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from contextlib import contextmanager
4
+ import os
5
+ from pathlib import Path
6
+ import tempfile
7
+ from typing import (
8
+ IO,
9
+ TYPE_CHECKING,
10
+ Any,
11
+ )
12
+ import uuid
13
+
14
+ from pandas._config import using_copy_on_write
15
+
16
+ from pandas.compat import PYPY
17
+ from pandas.errors import ChainedAssignmentError
18
+
19
+ from pandas import set_option
20
+
21
+ from pandas.io.common import get_handle
22
+
23
+ if TYPE_CHECKING:
24
+ from collections.abc import Generator
25
+
26
+ from pandas._typing import (
27
+ BaseBuffer,
28
+ CompressionOptions,
29
+ FilePath,
30
+ )
31
+
32
+
33
+ @contextmanager
34
+ def decompress_file(
35
+ path: FilePath | BaseBuffer, compression: CompressionOptions
36
+ ) -> Generator[IO[bytes], None, None]:
37
+ """
38
+ Open a compressed file and return a file object.
39
+
40
+ Parameters
41
+ ----------
42
+ path : str
43
+ The path where the file is read from.
44
+
45
+ compression : {'gzip', 'bz2', 'zip', 'xz', 'zstd', None}
46
+ Name of the decompression to use
47
+
48
+ Returns
49
+ -------
50
+ file object
51
+ """
52
+ with get_handle(path, "rb", compression=compression, is_text=False) as handle:
53
+ yield handle.handle
54
+
55
+
56
+ @contextmanager
57
+ def set_timezone(tz: str) -> Generator[None, None, None]:
58
+ """
59
+ Context manager for temporarily setting a timezone.
60
+
61
+ Parameters
62
+ ----------
63
+ tz : str
64
+ A string representing a valid timezone.
65
+
66
+ Examples
67
+ --------
68
+ >>> from datetime import datetime
69
+ >>> from dateutil.tz import tzlocal
70
+ >>> tzlocal().tzname(datetime(2021, 1, 1)) # doctest: +SKIP
71
+ 'IST'
72
+
73
+ >>> with set_timezone('US/Eastern'):
74
+ ... tzlocal().tzname(datetime(2021, 1, 1))
75
+ ...
76
+ 'EST'
77
+ """
78
+ import time
79
+
80
+ def setTZ(tz) -> None:
81
+ if tz is None:
82
+ try:
83
+ del os.environ["TZ"]
84
+ except KeyError:
85
+ pass
86
+ else:
87
+ os.environ["TZ"] = tz
88
+ time.tzset()
89
+
90
+ orig_tz = os.environ.get("TZ")
91
+ setTZ(tz)
92
+ try:
93
+ yield
94
+ finally:
95
+ setTZ(orig_tz)
96
+
97
+
98
+ @contextmanager
99
+ def ensure_clean(
100
+ filename=None, return_filelike: bool = False, **kwargs: Any
101
+ ) -> Generator[Any, None, None]:
102
+ """
103
+ Gets a temporary path and agrees to remove on close.
104
+
105
+ This implementation does not use tempfile.mkstemp to avoid having a file handle.
106
+ If the code using the returned path wants to delete the file itself, windows
107
+ requires that no program has a file handle to it.
108
+
109
+ Parameters
110
+ ----------
111
+ filename : str (optional)
112
+ suffix of the created file.
113
+ return_filelike : bool (default False)
114
+ if True, returns a file-like which is *always* cleaned. Necessary for
115
+ savefig and other functions which want to append extensions.
116
+ **kwargs
117
+ Additional keywords are passed to open().
118
+
119
+ """
120
+ folder = Path(tempfile.gettempdir())
121
+
122
+ if filename is None:
123
+ filename = ""
124
+ filename = str(uuid.uuid4()) + filename
125
+ path = folder / filename
126
+
127
+ path.touch()
128
+
129
+ handle_or_str: str | IO = str(path)
130
+ encoding = kwargs.pop("encoding", None)
131
+ if return_filelike:
132
+ kwargs.setdefault("mode", "w+b")
133
+ if encoding is None and "b" not in kwargs["mode"]:
134
+ encoding = "utf-8"
135
+ handle_or_str = open(path, encoding=encoding, **kwargs)
136
+
137
+ try:
138
+ yield handle_or_str
139
+ finally:
140
+ if not isinstance(handle_or_str, str):
141
+ handle_or_str.close()
142
+ if path.is_file():
143
+ path.unlink()
144
+
145
+
146
+ @contextmanager
147
+ def with_csv_dialect(name: str, **kwargs) -> Generator[None, None, None]:
148
+ """
149
+ Context manager to temporarily register a CSV dialect for parsing CSV.
150
+
151
+ Parameters
152
+ ----------
153
+ name : str
154
+ The name of the dialect.
155
+ kwargs : mapping
156
+ The parameters for the dialect.
157
+
158
+ Raises
159
+ ------
160
+ ValueError : the name of the dialect conflicts with a builtin one.
161
+
162
+ See Also
163
+ --------
164
+ csv : Python's CSV library.
165
+ """
166
+ import csv
167
+
168
+ _BUILTIN_DIALECTS = {"excel", "excel-tab", "unix"}
169
+
170
+ if name in _BUILTIN_DIALECTS:
171
+ raise ValueError("Cannot override builtin dialect.")
172
+
173
+ csv.register_dialect(name, **kwargs)
174
+ try:
175
+ yield
176
+ finally:
177
+ csv.unregister_dialect(name)
178
+
179
+
180
+ @contextmanager
181
+ def use_numexpr(use, min_elements=None) -> Generator[None, None, None]:
182
+ from pandas.core.computation import expressions as expr
183
+
184
+ if min_elements is None:
185
+ min_elements = expr._MIN_ELEMENTS
186
+
187
+ olduse = expr.USE_NUMEXPR
188
+ oldmin = expr._MIN_ELEMENTS
189
+ set_option("compute.use_numexpr", use)
190
+ expr._MIN_ELEMENTS = min_elements
191
+ try:
192
+ yield
193
+ finally:
194
+ expr._MIN_ELEMENTS = oldmin
195
+ set_option("compute.use_numexpr", olduse)
196
+
197
+
198
+ def raises_chained_assignment_error(warn=True, extra_warnings=(), extra_match=()):
199
+ from pandas._testing import assert_produces_warning
200
+
201
+ if not warn:
202
+ from contextlib import nullcontext
203
+
204
+ return nullcontext()
205
+
206
+ if PYPY and not extra_warnings:
207
+ from contextlib import nullcontext
208
+
209
+ return nullcontext()
210
+ elif PYPY and extra_warnings:
211
+ return assert_produces_warning(
212
+ extra_warnings,
213
+ match="|".join(extra_match),
214
+ )
215
+ else:
216
+ if using_copy_on_write():
217
+ warning = ChainedAssignmentError
218
+ match = (
219
+ "A value is trying to be set on a copy of a DataFrame or Series "
220
+ "through chained assignment"
221
+ )
222
+ else:
223
+ warning = FutureWarning # type: ignore[assignment]
224
+ # TODO update match
225
+ match = "ChainedAssignmentError"
226
+ if extra_warnings:
227
+ warning = (warning, *extra_warnings) # type: ignore[assignment]
228
+ return assert_produces_warning(
229
+ warning,
230
+ match="|".join((match, *extra_match)),
231
+ )
232
+
233
+
234
+ def assert_cow_warning(warn=True, match=None, **kwargs):
235
+ """
236
+ Assert that a warning is raised in the CoW warning mode.
237
+
238
+ Parameters
239
+ ----------
240
+ warn : bool, default True
241
+ By default, check that a warning is raised. Can be turned off by passing False.
242
+ match : str
243
+ The warning message to match against, if different from the default.
244
+ kwargs
245
+ Passed through to assert_produces_warning
246
+ """
247
+ from pandas._testing import assert_produces_warning
248
+
249
+ if not warn:
250
+ from contextlib import nullcontext
251
+
252
+ return nullcontext()
253
+
254
+ if not match:
255
+ match = "Setting a value on a view"
256
+
257
+ return assert_produces_warning(FutureWarning, match=match, **kwargs)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/arrays/__init__.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ All of pandas' ExtensionArrays.
3
+
4
+ See :ref:`extending.extension-types` for more.
5
+ """
6
+ from pandas.core.arrays import (
7
+ ArrowExtensionArray,
8
+ ArrowStringArray,
9
+ BooleanArray,
10
+ Categorical,
11
+ DatetimeArray,
12
+ FloatingArray,
13
+ IntegerArray,
14
+ IntervalArray,
15
+ NumpyExtensionArray,
16
+ PeriodArray,
17
+ SparseArray,
18
+ StringArray,
19
+ TimedeltaArray,
20
+ )
21
+
22
+ __all__ = [
23
+ "ArrowExtensionArray",
24
+ "ArrowStringArray",
25
+ "BooleanArray",
26
+ "Categorical",
27
+ "DatetimeArray",
28
+ "FloatingArray",
29
+ "IntegerArray",
30
+ "IntervalArray",
31
+ "NumpyExtensionArray",
32
+ "PeriodArray",
33
+ "SparseArray",
34
+ "StringArray",
35
+ "TimedeltaArray",
36
+ ]
37
+
38
+
39
+ def __getattr__(name: str) -> type[NumpyExtensionArray]:
40
+ if name == "PandasArray":
41
+ # GH#53694
42
+ import warnings
43
+
44
+ from pandas.util._exceptions import find_stack_level
45
+
46
+ warnings.warn(
47
+ "PandasArray has been renamed NumpyExtensionArray. Use that "
48
+ "instead. This alias will be removed in a future version.",
49
+ FutureWarning,
50
+ stacklevel=find_stack_level(),
51
+ )
52
+ return NumpyExtensionArray
53
+ raise AttributeError(f"module 'pandas.arrays' has no attribute '{name}'")
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/errors/__init__.py ADDED
@@ -0,0 +1,850 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Expose public exceptions & warnings
3
+ """
4
+ from __future__ import annotations
5
+
6
+ import ctypes
7
+
8
+ from pandas._config.config import OptionError
9
+
10
+ from pandas._libs.tslibs import (
11
+ OutOfBoundsDatetime,
12
+ OutOfBoundsTimedelta,
13
+ )
14
+
15
+ from pandas.util.version import InvalidVersion
16
+
17
+
18
+ class IntCastingNaNError(ValueError):
19
+ """
20
+ Exception raised when converting (``astype``) an array with NaN to an integer type.
21
+
22
+ Examples
23
+ --------
24
+ >>> pd.DataFrame(np.array([[1, np.nan], [2, 3]]), dtype="i8")
25
+ Traceback (most recent call last):
26
+ IntCastingNaNError: Cannot convert non-finite values (NA or inf) to integer
27
+ """
28
+
29
+
30
+ class NullFrequencyError(ValueError):
31
+ """
32
+ Exception raised when a ``freq`` cannot be null.
33
+
34
+ Particularly ``DatetimeIndex.shift``, ``TimedeltaIndex.shift``,
35
+ ``PeriodIndex.shift``.
36
+
37
+ Examples
38
+ --------
39
+ >>> df = pd.DatetimeIndex(["2011-01-01 10:00", "2011-01-01"], freq=None)
40
+ >>> df.shift(2)
41
+ Traceback (most recent call last):
42
+ NullFrequencyError: Cannot shift with no freq
43
+ """
44
+
45
+
46
+ class PerformanceWarning(Warning):
47
+ """
48
+ Warning raised when there is a possible performance impact.
49
+
50
+ Examples
51
+ --------
52
+ >>> df = pd.DataFrame({"jim": [0, 0, 1, 1],
53
+ ... "joe": ["x", "x", "z", "y"],
54
+ ... "jolie": [1, 2, 3, 4]})
55
+ >>> df = df.set_index(["jim", "joe"])
56
+ >>> df
57
+ jolie
58
+ jim joe
59
+ 0 x 1
60
+ x 2
61
+ 1 z 3
62
+ y 4
63
+ >>> df.loc[(1, 'z')] # doctest: +SKIP
64
+ # PerformanceWarning: indexing past lexsort depth may impact performance.
65
+ df.loc[(1, 'z')]
66
+ jolie
67
+ jim joe
68
+ 1 z 3
69
+ """
70
+
71
+
72
+ class UnsupportedFunctionCall(ValueError):
73
+ """
74
+ Exception raised when attempting to call a unsupported numpy function.
75
+
76
+ For example, ``np.cumsum(groupby_object)``.
77
+
78
+ Examples
79
+ --------
80
+ >>> df = pd.DataFrame({"A": [0, 0, 1, 1],
81
+ ... "B": ["x", "x", "z", "y"],
82
+ ... "C": [1, 2, 3, 4]}
83
+ ... )
84
+ >>> np.cumsum(df.groupby(["A"]))
85
+ Traceback (most recent call last):
86
+ UnsupportedFunctionCall: numpy operations are not valid with groupby.
87
+ Use .groupby(...).cumsum() instead
88
+ """
89
+
90
+
91
+ class UnsortedIndexError(KeyError):
92
+ """
93
+ Error raised when slicing a MultiIndex which has not been lexsorted.
94
+
95
+ Subclass of `KeyError`.
96
+
97
+ Examples
98
+ --------
99
+ >>> df = pd.DataFrame({"cat": [0, 0, 1, 1],
100
+ ... "color": ["white", "white", "brown", "black"],
101
+ ... "lives": [4, 4, 3, 7]},
102
+ ... )
103
+ >>> df = df.set_index(["cat", "color"])
104
+ >>> df
105
+ lives
106
+ cat color
107
+ 0 white 4
108
+ white 4
109
+ 1 brown 3
110
+ black 7
111
+ >>> df.loc[(0, "black"):(1, "white")]
112
+ Traceback (most recent call last):
113
+ UnsortedIndexError: 'Key length (2) was greater
114
+ than MultiIndex lexsort depth (1)'
115
+ """
116
+
117
+
118
+ class ParserError(ValueError):
119
+ """
120
+ Exception that is raised by an error encountered in parsing file contents.
121
+
122
+ This is a generic error raised for errors encountered when functions like
123
+ `read_csv` or `read_html` are parsing contents of a file.
124
+
125
+ See Also
126
+ --------
127
+ read_csv : Read CSV (comma-separated) file into a DataFrame.
128
+ read_html : Read HTML table into a DataFrame.
129
+
130
+ Examples
131
+ --------
132
+ >>> data = '''a,b,c
133
+ ... cat,foo,bar
134
+ ... dog,foo,"baz'''
135
+ >>> from io import StringIO
136
+ >>> pd.read_csv(StringIO(data), skipfooter=1, engine='python')
137
+ Traceback (most recent call last):
138
+ ParserError: ',' expected after '"'. Error could possibly be due
139
+ to parsing errors in the skipped footer rows
140
+ """
141
+
142
+
143
+ class DtypeWarning(Warning):
144
+ """
145
+ Warning raised when reading different dtypes in a column from a file.
146
+
147
+ Raised for a dtype incompatibility. This can happen whenever `read_csv`
148
+ or `read_table` encounter non-uniform dtypes in a column(s) of a given
149
+ CSV file.
150
+
151
+ See Also
152
+ --------
153
+ read_csv : Read CSV (comma-separated) file into a DataFrame.
154
+ read_table : Read general delimited file into a DataFrame.
155
+
156
+ Notes
157
+ -----
158
+ This warning is issued when dealing with larger files because the dtype
159
+ checking happens per chunk read.
160
+
161
+ Despite the warning, the CSV file is read with mixed types in a single
162
+ column which will be an object type. See the examples below to better
163
+ understand this issue.
164
+
165
+ Examples
166
+ --------
167
+ This example creates and reads a large CSV file with a column that contains
168
+ `int` and `str`.
169
+
170
+ >>> df = pd.DataFrame({'a': (['1'] * 100000 + ['X'] * 100000 +
171
+ ... ['1'] * 100000),
172
+ ... 'b': ['b'] * 300000}) # doctest: +SKIP
173
+ >>> df.to_csv('test.csv', index=False) # doctest: +SKIP
174
+ >>> df2 = pd.read_csv('test.csv') # doctest: +SKIP
175
+ ... # DtypeWarning: Columns (0) have mixed types
176
+
177
+ Important to notice that ``df2`` will contain both `str` and `int` for the
178
+ same input, '1'.
179
+
180
+ >>> df2.iloc[262140, 0] # doctest: +SKIP
181
+ '1'
182
+ >>> type(df2.iloc[262140, 0]) # doctest: +SKIP
183
+ <class 'str'>
184
+ >>> df2.iloc[262150, 0] # doctest: +SKIP
185
+ 1
186
+ >>> type(df2.iloc[262150, 0]) # doctest: +SKIP
187
+ <class 'int'>
188
+
189
+ One way to solve this issue is using the `dtype` parameter in the
190
+ `read_csv` and `read_table` functions to explicit the conversion:
191
+
192
+ >>> df2 = pd.read_csv('test.csv', sep=',', dtype={'a': str}) # doctest: +SKIP
193
+
194
+ No warning was issued.
195
+ """
196
+
197
+
198
+ class EmptyDataError(ValueError):
199
+ """
200
+ Exception raised in ``pd.read_csv`` when empty data or header is encountered.
201
+
202
+ Examples
203
+ --------
204
+ >>> from io import StringIO
205
+ >>> empty = StringIO()
206
+ >>> pd.read_csv(empty)
207
+ Traceback (most recent call last):
208
+ EmptyDataError: No columns to parse from file
209
+ """
210
+
211
+
212
+ class ParserWarning(Warning):
213
+ """
214
+ Warning raised when reading a file that doesn't use the default 'c' parser.
215
+
216
+ Raised by `pd.read_csv` and `pd.read_table` when it is necessary to change
217
+ parsers, generally from the default 'c' parser to 'python'.
218
+
219
+ It happens due to a lack of support or functionality for parsing a
220
+ particular attribute of a CSV file with the requested engine.
221
+
222
+ Currently, 'c' unsupported options include the following parameters:
223
+
224
+ 1. `sep` other than a single character (e.g. regex separators)
225
+ 2. `skipfooter` higher than 0
226
+ 3. `sep=None` with `delim_whitespace=False`
227
+
228
+ The warning can be avoided by adding `engine='python'` as a parameter in
229
+ `pd.read_csv` and `pd.read_table` methods.
230
+
231
+ See Also
232
+ --------
233
+ pd.read_csv : Read CSV (comma-separated) file into DataFrame.
234
+ pd.read_table : Read general delimited file into DataFrame.
235
+
236
+ Examples
237
+ --------
238
+ Using a `sep` in `pd.read_csv` other than a single character:
239
+
240
+ >>> import io
241
+ >>> csv = '''a;b;c
242
+ ... 1;1,8
243
+ ... 1;2,1'''
244
+ >>> df = pd.read_csv(io.StringIO(csv), sep='[;,]') # doctest: +SKIP
245
+ ... # ParserWarning: Falling back to the 'python' engine...
246
+
247
+ Adding `engine='python'` to `pd.read_csv` removes the Warning:
248
+
249
+ >>> df = pd.read_csv(io.StringIO(csv), sep='[;,]', engine='python')
250
+ """
251
+
252
+
253
+ class MergeError(ValueError):
254
+ """
255
+ Exception raised when merging data.
256
+
257
+ Subclass of ``ValueError``.
258
+
259
+ Examples
260
+ --------
261
+ >>> left = pd.DataFrame({"a": ["a", "b", "b", "d"],
262
+ ... "b": ["cat", "dog", "weasel", "horse"]},
263
+ ... index=range(4))
264
+ >>> right = pd.DataFrame({"a": ["a", "b", "c", "d"],
265
+ ... "c": ["meow", "bark", "chirp", "nay"]},
266
+ ... index=range(4)).set_index("a")
267
+ >>> left.join(right, on="a", validate="one_to_one",)
268
+ Traceback (most recent call last):
269
+ MergeError: Merge keys are not unique in left dataset; not a one-to-one merge
270
+ """
271
+
272
+
273
+ class AbstractMethodError(NotImplementedError):
274
+ """
275
+ Raise this error instead of NotImplementedError for abstract methods.
276
+
277
+ Examples
278
+ --------
279
+ >>> class Foo:
280
+ ... @classmethod
281
+ ... def classmethod(cls):
282
+ ... raise pd.errors.AbstractMethodError(cls, methodtype="classmethod")
283
+ ... def method(self):
284
+ ... raise pd.errors.AbstractMethodError(self)
285
+ >>> test = Foo.classmethod()
286
+ Traceback (most recent call last):
287
+ AbstractMethodError: This classmethod must be defined in the concrete class Foo
288
+
289
+ >>> test2 = Foo().method()
290
+ Traceback (most recent call last):
291
+ AbstractMethodError: This classmethod must be defined in the concrete class Foo
292
+ """
293
+
294
+ def __init__(self, class_instance, methodtype: str = "method") -> None:
295
+ types = {"method", "classmethod", "staticmethod", "property"}
296
+ if methodtype not in types:
297
+ raise ValueError(
298
+ f"methodtype must be one of {methodtype}, got {types} instead."
299
+ )
300
+ self.methodtype = methodtype
301
+ self.class_instance = class_instance
302
+
303
+ def __str__(self) -> str:
304
+ if self.methodtype == "classmethod":
305
+ name = self.class_instance.__name__
306
+ else:
307
+ name = type(self.class_instance).__name__
308
+ return f"This {self.methodtype} must be defined in the concrete class {name}"
309
+
310
+
311
+ class NumbaUtilError(Exception):
312
+ """
313
+ Error raised for unsupported Numba engine routines.
314
+
315
+ Examples
316
+ --------
317
+ >>> df = pd.DataFrame({"key": ["a", "a", "b", "b"], "data": [1, 2, 3, 4]},
318
+ ... columns=["key", "data"])
319
+ >>> def incorrect_function(x):
320
+ ... return sum(x) * 2.7
321
+ >>> df.groupby("key").agg(incorrect_function, engine="numba")
322
+ Traceback (most recent call last):
323
+ NumbaUtilError: The first 2 arguments to incorrect_function
324
+ must be ['values', 'index']
325
+ """
326
+
327
+
328
+ class DuplicateLabelError(ValueError):
329
+ """
330
+ Error raised when an operation would introduce duplicate labels.
331
+
332
+ Examples
333
+ --------
334
+ >>> s = pd.Series([0, 1, 2], index=['a', 'b', 'c']).set_flags(
335
+ ... allows_duplicate_labels=False
336
+ ... )
337
+ >>> s.reindex(['a', 'a', 'b'])
338
+ Traceback (most recent call last):
339
+ ...
340
+ DuplicateLabelError: Index has duplicates.
341
+ positions
342
+ label
343
+ a [0, 1]
344
+ """
345
+
346
+
347
+ class InvalidIndexError(Exception):
348
+ """
349
+ Exception raised when attempting to use an invalid index key.
350
+
351
+ Examples
352
+ --------
353
+ >>> idx = pd.MultiIndex.from_product([["x", "y"], [0, 1]])
354
+ >>> df = pd.DataFrame([[1, 1, 2, 2],
355
+ ... [3, 3, 4, 4]], columns=idx)
356
+ >>> df
357
+ x y
358
+ 0 1 0 1
359
+ 0 1 1 2 2
360
+ 1 3 3 4 4
361
+ >>> df[:, 0]
362
+ Traceback (most recent call last):
363
+ InvalidIndexError: (slice(None, None, None), 0)
364
+ """
365
+
366
+
367
+ class DataError(Exception):
368
+ """
369
+ Exceptionn raised when performing an operation on non-numerical data.
370
+
371
+ For example, calling ``ohlc`` on a non-numerical column or a function
372
+ on a rolling window.
373
+
374
+ Examples
375
+ --------
376
+ >>> ser = pd.Series(['a', 'b', 'c'])
377
+ >>> ser.rolling(2).sum()
378
+ Traceback (most recent call last):
379
+ DataError: No numeric types to aggregate
380
+ """
381
+
382
+
383
+ class SpecificationError(Exception):
384
+ """
385
+ Exception raised by ``agg`` when the functions are ill-specified.
386
+
387
+ The exception raised in two scenarios.
388
+
389
+ The first way is calling ``agg`` on a
390
+ Dataframe or Series using a nested renamer (dict-of-dict).
391
+
392
+ The second way is calling ``agg`` on a Dataframe with duplicated functions
393
+ names without assigning column name.
394
+
395
+ Examples
396
+ --------
397
+ >>> df = pd.DataFrame({'A': [1, 1, 1, 2, 2],
398
+ ... 'B': range(5),
399
+ ... 'C': range(5)})
400
+ >>> df.groupby('A').B.agg({'foo': 'count'}) # doctest: +SKIP
401
+ ... # SpecificationError: nested renamer is not supported
402
+
403
+ >>> df.groupby('A').agg({'B': {'foo': ['sum', 'max']}}) # doctest: +SKIP
404
+ ... # SpecificationError: nested renamer is not supported
405
+
406
+ >>> df.groupby('A').agg(['min', 'min']) # doctest: +SKIP
407
+ ... # SpecificationError: nested renamer is not supported
408
+ """
409
+
410
+
411
+ class SettingWithCopyError(ValueError):
412
+ """
413
+ Exception raised when trying to set on a copied slice from a ``DataFrame``.
414
+
415
+ The ``mode.chained_assignment`` needs to be set to set to 'raise.' This can
416
+ happen unintentionally when chained indexing.
417
+
418
+ For more information on evaluation order,
419
+ see :ref:`the user guide<indexing.evaluation_order>`.
420
+
421
+ For more information on view vs. copy,
422
+ see :ref:`the user guide<indexing.view_versus_copy>`.
423
+
424
+ Examples
425
+ --------
426
+ >>> pd.options.mode.chained_assignment = 'raise'
427
+ >>> df = pd.DataFrame({'A': [1, 1, 1, 2, 2]}, columns=['A'])
428
+ >>> df.loc[0:3]['A'] = 'a' # doctest: +SKIP
429
+ ... # SettingWithCopyError: A value is trying to be set on a copy of a...
430
+ """
431
+
432
+
433
+ class SettingWithCopyWarning(Warning):
434
+ """
435
+ Warning raised when trying to set on a copied slice from a ``DataFrame``.
436
+
437
+ The ``mode.chained_assignment`` needs to be set to set to 'warn.'
438
+ 'Warn' is the default option. This can happen unintentionally when
439
+ chained indexing.
440
+
441
+ For more information on evaluation order,
442
+ see :ref:`the user guide<indexing.evaluation_order>`.
443
+
444
+ For more information on view vs. copy,
445
+ see :ref:`the user guide<indexing.view_versus_copy>`.
446
+
447
+ Examples
448
+ --------
449
+ >>> df = pd.DataFrame({'A': [1, 1, 1, 2, 2]}, columns=['A'])
450
+ >>> df.loc[0:3]['A'] = 'a' # doctest: +SKIP
451
+ ... # SettingWithCopyWarning: A value is trying to be set on a copy of a...
452
+ """
453
+
454
+
455
+ class ChainedAssignmentError(Warning):
456
+ """
457
+ Warning raised when trying to set using chained assignment.
458
+
459
+ When the ``mode.copy_on_write`` option is enabled, chained assignment can
460
+ never work. In such a situation, we are always setting into a temporary
461
+ object that is the result of an indexing operation (getitem), which under
462
+ Copy-on-Write always behaves as a copy. Thus, assigning through a chain
463
+ can never update the original Series or DataFrame.
464
+
465
+ For more information on view vs. copy,
466
+ see :ref:`the user guide<indexing.view_versus_copy>`.
467
+
468
+ Examples
469
+ --------
470
+ >>> pd.options.mode.copy_on_write = True
471
+ >>> df = pd.DataFrame({'A': [1, 1, 1, 2, 2]}, columns=['A'])
472
+ >>> df["A"][0:3] = 10 # doctest: +SKIP
473
+ ... # ChainedAssignmentError: ...
474
+ >>> pd.options.mode.copy_on_write = False
475
+ """
476
+
477
+
478
+ _chained_assignment_msg = (
479
+ "A value is trying to be set on a copy of a DataFrame or Series "
480
+ "through chained assignment.\n"
481
+ "When using the Copy-on-Write mode, such chained assignment never works "
482
+ "to update the original DataFrame or Series, because the intermediate "
483
+ "object on which we are setting values always behaves as a copy.\n\n"
484
+ "Try using '.loc[row_indexer, col_indexer] = value' instead, to perform "
485
+ "the assignment in a single step.\n\n"
486
+ "See the caveats in the documentation: "
487
+ "https://pandas.pydata.org/pandas-docs/stable/user_guide/"
488
+ "indexing.html#returning-a-view-versus-a-copy"
489
+ )
490
+
491
+
492
+ _chained_assignment_method_msg = (
493
+ "A value is trying to be set on a copy of a DataFrame or Series "
494
+ "through chained assignment using an inplace method.\n"
495
+ "When using the Copy-on-Write mode, such inplace method never works "
496
+ "to update the original DataFrame or Series, because the intermediate "
497
+ "object on which we are setting values always behaves as a copy.\n\n"
498
+ "For example, when doing 'df[col].method(value, inplace=True)', try "
499
+ "using 'df.method({col: value}, inplace=True)' instead, to perform "
500
+ "the operation inplace on the original object.\n\n"
501
+ )
502
+
503
+
504
+ _chained_assignment_warning_msg = (
505
+ "ChainedAssignmentError: behaviour will change in pandas 3.0!\n"
506
+ "You are setting values through chained assignment. Currently this works "
507
+ "in certain cases, but when using Copy-on-Write (which will become the "
508
+ "default behaviour in pandas 3.0) this will never work to update the "
509
+ "original DataFrame or Series, because the intermediate object on which "
510
+ "we are setting values will behave as a copy.\n"
511
+ "A typical example is when you are setting values in a column of a "
512
+ "DataFrame, like:\n\n"
513
+ 'df["col"][row_indexer] = value\n\n'
514
+ 'Use `df.loc[row_indexer, "col"] = values` instead, to perform the '
515
+ "assignment in a single step and ensure this keeps updating the original `df`.\n\n"
516
+ "See the caveats in the documentation: "
517
+ "https://pandas.pydata.org/pandas-docs/stable/user_guide/"
518
+ "indexing.html#returning-a-view-versus-a-copy\n"
519
+ )
520
+
521
+
522
+ _chained_assignment_warning_method_msg = (
523
+ "A value is trying to be set on a copy of a DataFrame or Series "
524
+ "through chained assignment using an inplace method.\n"
525
+ "The behavior will change in pandas 3.0. This inplace method will "
526
+ "never work because the intermediate object on which we are setting "
527
+ "values always behaves as a copy.\n\n"
528
+ "For example, when doing 'df[col].method(value, inplace=True)', try "
529
+ "using 'df.method({col: value}, inplace=True)' or "
530
+ "df[col] = df[col].method(value) instead, to perform "
531
+ "the operation inplace on the original object.\n\n"
532
+ )
533
+
534
+
535
+ def _check_cacher(obj):
536
+ # This is a mess, selection paths that return a view set the _cacher attribute
537
+ # on the Series; most of them also set _item_cache which adds 1 to our relevant
538
+ # reference count, but iloc does not, so we have to check if we are actually
539
+ # in the item cache
540
+ if hasattr(obj, "_cacher"):
541
+ parent = obj._cacher[1]()
542
+ # parent could be dead
543
+ if parent is None:
544
+ return False
545
+ if hasattr(parent, "_item_cache"):
546
+ if obj._cacher[0] in parent._item_cache:
547
+ # Check if we are actually the item from item_cache, iloc creates a
548
+ # new object
549
+ return obj is parent._item_cache[obj._cacher[0]]
550
+ return False
551
+
552
+
553
+ class NumExprClobberingError(NameError):
554
+ """
555
+ Exception raised when trying to use a built-in numexpr name as a variable name.
556
+
557
+ ``eval`` or ``query`` will throw the error if the engine is set
558
+ to 'numexpr'. 'numexpr' is the default engine value for these methods if the
559
+ numexpr package is installed.
560
+
561
+ Examples
562
+ --------
563
+ >>> df = pd.DataFrame({'abs': [1, 1, 1]})
564
+ >>> df.query("abs > 2") # doctest: +SKIP
565
+ ... # NumExprClobberingError: Variables in expression "(abs) > (2)" overlap...
566
+ >>> sin, a = 1, 2
567
+ >>> pd.eval("sin + a", engine='numexpr') # doctest: +SKIP
568
+ ... # NumExprClobberingError: Variables in expression "(sin) + (a)" overlap...
569
+ """
570
+
571
+
572
+ class UndefinedVariableError(NameError):
573
+ """
574
+ Exception raised by ``query`` or ``eval`` when using an undefined variable name.
575
+
576
+ It will also specify whether the undefined variable is local or not.
577
+
578
+ Examples
579
+ --------
580
+ >>> df = pd.DataFrame({'A': [1, 1, 1]})
581
+ >>> df.query("A > x") # doctest: +SKIP
582
+ ... # UndefinedVariableError: name 'x' is not defined
583
+ >>> df.query("A > @y") # doctest: +SKIP
584
+ ... # UndefinedVariableError: local variable 'y' is not defined
585
+ >>> pd.eval('x + 1') # doctest: +SKIP
586
+ ... # UndefinedVariableError: name 'x' is not defined
587
+ """
588
+
589
+ def __init__(self, name: str, is_local: bool | None = None) -> None:
590
+ base_msg = f"{repr(name)} is not defined"
591
+ if is_local:
592
+ msg = f"local variable {base_msg}"
593
+ else:
594
+ msg = f"name {base_msg}"
595
+ super().__init__(msg)
596
+
597
+
598
+ class IndexingError(Exception):
599
+ """
600
+ Exception is raised when trying to index and there is a mismatch in dimensions.
601
+
602
+ Examples
603
+ --------
604
+ >>> df = pd.DataFrame({'A': [1, 1, 1]})
605
+ >>> df.loc[..., ..., 'A'] # doctest: +SKIP
606
+ ... # IndexingError: indexer may only contain one '...' entry
607
+ >>> df = pd.DataFrame({'A': [1, 1, 1]})
608
+ >>> df.loc[1, ..., ...] # doctest: +SKIP
609
+ ... # IndexingError: Too many indexers
610
+ >>> df[pd.Series([True], dtype=bool)] # doctest: +SKIP
611
+ ... # IndexingError: Unalignable boolean Series provided as indexer...
612
+ >>> s = pd.Series(range(2),
613
+ ... index = pd.MultiIndex.from_product([["a", "b"], ["c"]]))
614
+ >>> s.loc["a", "c", "d"] # doctest: +SKIP
615
+ ... # IndexingError: Too many indexers
616
+ """
617
+
618
+
619
+ class PyperclipException(RuntimeError):
620
+ """
621
+ Exception raised when clipboard functionality is unsupported.
622
+
623
+ Raised by ``to_clipboard()`` and ``read_clipboard()``.
624
+ """
625
+
626
+
627
+ class PyperclipWindowsException(PyperclipException):
628
+ """
629
+ Exception raised when clipboard functionality is unsupported by Windows.
630
+
631
+ Access to the clipboard handle would be denied due to some other
632
+ window process is accessing it.
633
+ """
634
+
635
+ def __init__(self, message: str) -> None:
636
+ # attr only exists on Windows, so typing fails on other platforms
637
+ message += f" ({ctypes.WinError()})" # type: ignore[attr-defined]
638
+ super().__init__(message)
639
+
640
+
641
+ class CSSWarning(UserWarning):
642
+ """
643
+ Warning is raised when converting css styling fails.
644
+
645
+ This can be due to the styling not having an equivalent value or because the
646
+ styling isn't properly formatted.
647
+
648
+ Examples
649
+ --------
650
+ >>> df = pd.DataFrame({'A': [1, 1, 1]})
651
+ >>> df.style.applymap(
652
+ ... lambda x: 'background-color: blueGreenRed;'
653
+ ... ).to_excel('styled.xlsx') # doctest: +SKIP
654
+ CSSWarning: Unhandled color format: 'blueGreenRed'
655
+ >>> df.style.applymap(
656
+ ... lambda x: 'border: 1px solid red red;'
657
+ ... ).to_excel('styled.xlsx') # doctest: +SKIP
658
+ CSSWarning: Unhandled color format: 'blueGreenRed'
659
+ """
660
+
661
+
662
+ class PossibleDataLossError(Exception):
663
+ """
664
+ Exception raised when trying to open a HDFStore file when already opened.
665
+
666
+ Examples
667
+ --------
668
+ >>> store = pd.HDFStore('my-store', 'a') # doctest: +SKIP
669
+ >>> store.open("w") # doctest: +SKIP
670
+ ... # PossibleDataLossError: Re-opening the file [my-store] with mode [a]...
671
+ """
672
+
673
+
674
+ class ClosedFileError(Exception):
675
+ """
676
+ Exception is raised when trying to perform an operation on a closed HDFStore file.
677
+
678
+ Examples
679
+ --------
680
+ >>> store = pd.HDFStore('my-store', 'a') # doctest: +SKIP
681
+ >>> store.close() # doctest: +SKIP
682
+ >>> store.keys() # doctest: +SKIP
683
+ ... # ClosedFileError: my-store file is not open!
684
+ """
685
+
686
+
687
+ class IncompatibilityWarning(Warning):
688
+ """
689
+ Warning raised when trying to use where criteria on an incompatible HDF5 file.
690
+ """
691
+
692
+
693
+ class AttributeConflictWarning(Warning):
694
+ """
695
+ Warning raised when index attributes conflict when using HDFStore.
696
+
697
+ Occurs when attempting to append an index with a different
698
+ name than the existing index on an HDFStore or attempting to append an index with a
699
+ different frequency than the existing index on an HDFStore.
700
+
701
+ Examples
702
+ --------
703
+ >>> idx1 = pd.Index(['a', 'b'], name='name1')
704
+ >>> df1 = pd.DataFrame([[1, 2], [3, 4]], index=idx1)
705
+ >>> df1.to_hdf('file', 'data', 'w', append=True) # doctest: +SKIP
706
+ >>> idx2 = pd.Index(['c', 'd'], name='name2')
707
+ >>> df2 = pd.DataFrame([[5, 6], [7, 8]], index=idx2)
708
+ >>> df2.to_hdf('file', 'data', 'a', append=True) # doctest: +SKIP
709
+ AttributeConflictWarning: the [index_name] attribute of the existing index is
710
+ [name1] which conflicts with the new [name2]...
711
+ """
712
+
713
+
714
+ class DatabaseError(OSError):
715
+ """
716
+ Error is raised when executing sql with bad syntax or sql that throws an error.
717
+
718
+ Examples
719
+ --------
720
+ >>> from sqlite3 import connect
721
+ >>> conn = connect(':memory:')
722
+ >>> pd.read_sql('select * test', conn) # doctest: +SKIP
723
+ ... # DatabaseError: Execution failed on sql 'test': near "test": syntax error
724
+ """
725
+
726
+
727
+ class PossiblePrecisionLoss(Warning):
728
+ """
729
+ Warning raised by to_stata on a column with a value outside or equal to int64.
730
+
731
+ When the column value is outside or equal to the int64 value the column is
732
+ converted to a float64 dtype.
733
+
734
+ Examples
735
+ --------
736
+ >>> df = pd.DataFrame({"s": pd.Series([1, 2**53], dtype=np.int64)})
737
+ >>> df.to_stata('test') # doctest: +SKIP
738
+ ... # PossiblePrecisionLoss: Column converted from int64 to float64...
739
+ """
740
+
741
+
742
+ class ValueLabelTypeMismatch(Warning):
743
+ """
744
+ Warning raised by to_stata on a category column that contains non-string values.
745
+
746
+ Examples
747
+ --------
748
+ >>> df = pd.DataFrame({"categories": pd.Series(["a", 2], dtype="category")})
749
+ >>> df.to_stata('test') # doctest: +SKIP
750
+ ... # ValueLabelTypeMismatch: Stata value labels (pandas categories) must be str...
751
+ """
752
+
753
+
754
+ class InvalidColumnName(Warning):
755
+ """
756
+ Warning raised by to_stata the column contains a non-valid stata name.
757
+
758
+ Because the column name is an invalid Stata variable, the name needs to be
759
+ converted.
760
+
761
+ Examples
762
+ --------
763
+ >>> df = pd.DataFrame({"0categories": pd.Series([2, 2])})
764
+ >>> df.to_stata('test') # doctest: +SKIP
765
+ ... # InvalidColumnName: Not all pandas column names were valid Stata variable...
766
+ """
767
+
768
+
769
+ class CategoricalConversionWarning(Warning):
770
+ """
771
+ Warning is raised when reading a partial labeled Stata file using a iterator.
772
+
773
+ Examples
774
+ --------
775
+ >>> from pandas.io.stata import StataReader
776
+ >>> with StataReader('dta_file', chunksize=2) as reader: # doctest: +SKIP
777
+ ... for i, block in enumerate(reader):
778
+ ... print(i, block)
779
+ ... # CategoricalConversionWarning: One or more series with value labels...
780
+ """
781
+
782
+
783
+ class LossySetitemError(Exception):
784
+ """
785
+ Raised when trying to do a __setitem__ on an np.ndarray that is not lossless.
786
+
787
+ Notes
788
+ -----
789
+ This is an internal error.
790
+ """
791
+
792
+
793
+ class NoBufferPresent(Exception):
794
+ """
795
+ Exception is raised in _get_data_buffer to signal that there is no requested buffer.
796
+ """
797
+
798
+
799
+ class InvalidComparison(Exception):
800
+ """
801
+ Exception is raised by _validate_comparison_value to indicate an invalid comparison.
802
+
803
+ Notes
804
+ -----
805
+ This is an internal error.
806
+ """
807
+
808
+
809
+ __all__ = [
810
+ "AbstractMethodError",
811
+ "AttributeConflictWarning",
812
+ "CategoricalConversionWarning",
813
+ "ClosedFileError",
814
+ "CSSWarning",
815
+ "DatabaseError",
816
+ "DataError",
817
+ "DtypeWarning",
818
+ "DuplicateLabelError",
819
+ "EmptyDataError",
820
+ "IncompatibilityWarning",
821
+ "IntCastingNaNError",
822
+ "InvalidColumnName",
823
+ "InvalidComparison",
824
+ "InvalidIndexError",
825
+ "InvalidVersion",
826
+ "IndexingError",
827
+ "LossySetitemError",
828
+ "MergeError",
829
+ "NoBufferPresent",
830
+ "NullFrequencyError",
831
+ "NumbaUtilError",
832
+ "NumExprClobberingError",
833
+ "OptionError",
834
+ "OutOfBoundsDatetime",
835
+ "OutOfBoundsTimedelta",
836
+ "ParserError",
837
+ "ParserWarning",
838
+ "PerformanceWarning",
839
+ "PossibleDataLossError",
840
+ "PossiblePrecisionLoss",
841
+ "PyperclipException",
842
+ "PyperclipWindowsException",
843
+ "SettingWithCopyError",
844
+ "SettingWithCopyWarning",
845
+ "SpecificationError",
846
+ "UndefinedVariableError",
847
+ "UnsortedIndexError",
848
+ "UnsupportedFunctionCall",
849
+ "ValueLabelTypeMismatch",
850
+ ]
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/io/feather_format.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ feather-format compat """
2
+ from __future__ import annotations
3
+
4
+ from typing import (
5
+ TYPE_CHECKING,
6
+ Any,
7
+ )
8
+
9
+ from pandas._config import using_pyarrow_string_dtype
10
+
11
+ from pandas._libs import lib
12
+ from pandas.compat._optional import import_optional_dependency
13
+ from pandas.util._decorators import doc
14
+ from pandas.util._validators import check_dtype_backend
15
+
16
+ import pandas as pd
17
+ from pandas.core.api import DataFrame
18
+ from pandas.core.shared_docs import _shared_docs
19
+
20
+ from pandas.io._util import arrow_string_types_mapper
21
+ from pandas.io.common import get_handle
22
+
23
+ if TYPE_CHECKING:
24
+ from collections.abc import (
25
+ Hashable,
26
+ Sequence,
27
+ )
28
+
29
+ from pandas._typing import (
30
+ DtypeBackend,
31
+ FilePath,
32
+ ReadBuffer,
33
+ StorageOptions,
34
+ WriteBuffer,
35
+ )
36
+
37
+
38
+ @doc(storage_options=_shared_docs["storage_options"])
39
+ def to_feather(
40
+ df: DataFrame,
41
+ path: FilePath | WriteBuffer[bytes],
42
+ storage_options: StorageOptions | None = None,
43
+ **kwargs: Any,
44
+ ) -> None:
45
+ """
46
+ Write a DataFrame to the binary Feather format.
47
+
48
+ Parameters
49
+ ----------
50
+ df : DataFrame
51
+ path : str, path object, or file-like object
52
+ {storage_options}
53
+ **kwargs :
54
+ Additional keywords passed to `pyarrow.feather.write_feather`.
55
+
56
+ """
57
+ import_optional_dependency("pyarrow")
58
+ from pyarrow import feather
59
+
60
+ if not isinstance(df, DataFrame):
61
+ raise ValueError("feather only support IO with DataFrames")
62
+
63
+ with get_handle(
64
+ path, "wb", storage_options=storage_options, is_text=False
65
+ ) as handles:
66
+ feather.write_feather(df, handles.handle, **kwargs)
67
+
68
+
69
+ @doc(storage_options=_shared_docs["storage_options"])
70
+ def read_feather(
71
+ path: FilePath | ReadBuffer[bytes],
72
+ columns: Sequence[Hashable] | None = None,
73
+ use_threads: bool = True,
74
+ storage_options: StorageOptions | None = None,
75
+ dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
76
+ ) -> DataFrame:
77
+ """
78
+ Load a feather-format object from the file path.
79
+
80
+ Parameters
81
+ ----------
82
+ path : str, path object, or file-like object
83
+ String, path object (implementing ``os.PathLike[str]``), or file-like
84
+ object implementing a binary ``read()`` function. The string could be a URL.
85
+ Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is
86
+ expected. A local file could be: ``file://localhost/path/to/table.feather``.
87
+ columns : sequence, default None
88
+ If not provided, all columns are read.
89
+ use_threads : bool, default True
90
+ Whether to parallelize reading using multiple threads.
91
+ {storage_options}
92
+
93
+ dtype_backend : {{'numpy_nullable', 'pyarrow'}}, default 'numpy_nullable'
94
+ Back-end data type applied to the resultant :class:`DataFrame`
95
+ (still experimental). Behaviour is as follows:
96
+
97
+ * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
98
+ (default).
99
+ * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`
100
+ DataFrame.
101
+
102
+ .. versionadded:: 2.0
103
+
104
+ Returns
105
+ -------
106
+ type of object stored in file
107
+
108
+ Examples
109
+ --------
110
+ >>> df = pd.read_feather("path/to/file.feather") # doctest: +SKIP
111
+ """
112
+ import_optional_dependency("pyarrow")
113
+ from pyarrow import feather
114
+
115
+ # import utils to register the pyarrow extension types
116
+ import pandas.core.arrays.arrow.extension_types # pyright: ignore[reportUnusedImport] # noqa: F401
117
+
118
+ check_dtype_backend(dtype_backend)
119
+
120
+ with get_handle(
121
+ path, "rb", storage_options=storage_options, is_text=False
122
+ ) as handles:
123
+ if dtype_backend is lib.no_default and not using_pyarrow_string_dtype():
124
+ return feather.read_feather(
125
+ handles.handle, columns=columns, use_threads=bool(use_threads)
126
+ )
127
+
128
+ pa_table = feather.read_table(
129
+ handles.handle, columns=columns, use_threads=bool(use_threads)
130
+ )
131
+
132
+ if dtype_backend == "numpy_nullable":
133
+ from pandas.io._util import _arrow_dtype_mapping
134
+
135
+ return pa_table.to_pandas(types_mapper=_arrow_dtype_mapping().get)
136
+
137
+ elif dtype_backend == "pyarrow":
138
+ return pa_table.to_pandas(types_mapper=pd.ArrowDtype)
139
+
140
+ elif using_pyarrow_string_dtype():
141
+ return pa_table.to_pandas(types_mapper=arrow_string_types_mapper())
142
+ else:
143
+ raise NotImplementedError
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/io/gbq.py ADDED
@@ -0,0 +1,255 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ Google BigQuery support """
2
+ from __future__ import annotations
3
+
4
+ from typing import (
5
+ TYPE_CHECKING,
6
+ Any,
7
+ )
8
+ import warnings
9
+
10
+ from pandas.compat._optional import import_optional_dependency
11
+ from pandas.util._exceptions import find_stack_level
12
+
13
+ if TYPE_CHECKING:
14
+ from google.auth.credentials import Credentials
15
+
16
+ from pandas import DataFrame
17
+
18
+
19
+ def _try_import():
20
+ # since pandas is a dependency of pandas-gbq
21
+ # we need to import on first use
22
+ msg = (
23
+ "pandas-gbq is required to load data from Google BigQuery. "
24
+ "See the docs: https://pandas-gbq.readthedocs.io."
25
+ )
26
+ pandas_gbq = import_optional_dependency("pandas_gbq", extra=msg)
27
+ return pandas_gbq
28
+
29
+
30
+ def read_gbq(
31
+ query: str,
32
+ project_id: str | None = None,
33
+ index_col: str | None = None,
34
+ col_order: list[str] | None = None,
35
+ reauth: bool = False,
36
+ auth_local_webserver: bool = True,
37
+ dialect: str | None = None,
38
+ location: str | None = None,
39
+ configuration: dict[str, Any] | None = None,
40
+ credentials: Credentials | None = None,
41
+ use_bqstorage_api: bool | None = None,
42
+ max_results: int | None = None,
43
+ progress_bar_type: str | None = None,
44
+ ) -> DataFrame:
45
+ """
46
+ Load data from Google BigQuery.
47
+
48
+ .. deprecated:: 2.2.0
49
+
50
+ Please use ``pandas_gbq.read_gbq`` instead.
51
+
52
+ This function requires the `pandas-gbq package
53
+ <https://pandas-gbq.readthedocs.io>`__.
54
+
55
+ See the `How to authenticate with Google BigQuery
56
+ <https://pandas-gbq.readthedocs.io/en/latest/howto/authentication.html>`__
57
+ guide for authentication instructions.
58
+
59
+ Parameters
60
+ ----------
61
+ query : str
62
+ SQL-Like Query to return data values.
63
+ project_id : str, optional
64
+ Google BigQuery Account project ID. Optional when available from
65
+ the environment.
66
+ index_col : str, optional
67
+ Name of result column to use for index in results DataFrame.
68
+ col_order : list(str), optional
69
+ List of BigQuery column names in the desired order for results
70
+ DataFrame.
71
+ reauth : bool, default False
72
+ Force Google BigQuery to re-authenticate the user. This is useful
73
+ if multiple accounts are used.
74
+ auth_local_webserver : bool, default True
75
+ Use the `local webserver flow`_ instead of the `console flow`_
76
+ when getting user credentials.
77
+
78
+ .. _local webserver flow:
79
+ https://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server
80
+ .. _console flow:
81
+ https://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console
82
+
83
+ *New in version 0.2.0 of pandas-gbq*.
84
+
85
+ .. versionchanged:: 1.5.0
86
+ Default value is changed to ``True``. Google has deprecated the
87
+ ``auth_local_webserver = False`` `"out of band" (copy-paste)
88
+ flow
89
+ <https://developers.googleblog.com/2022/02/making-oauth-flows-safer.html?m=1#disallowed-oob>`_.
90
+ dialect : str, default 'legacy'
91
+ Note: The default value is changing to 'standard' in a future version.
92
+
93
+ SQL syntax dialect to use. Value can be one of:
94
+
95
+ ``'legacy'``
96
+ Use BigQuery's legacy SQL dialect. For more information see
97
+ `BigQuery Legacy SQL Reference
98
+ <https://cloud.google.com/bigquery/docs/reference/legacy-sql>`__.
99
+ ``'standard'``
100
+ Use BigQuery's standard SQL, which is
101
+ compliant with the SQL 2011 standard. For more information
102
+ see `BigQuery Standard SQL Reference
103
+ <https://cloud.google.com/bigquery/docs/reference/standard-sql/>`__.
104
+ location : str, optional
105
+ Location where the query job should run. See the `BigQuery locations
106
+ documentation
107
+ <https://cloud.google.com/bigquery/docs/dataset-locations>`__ for a
108
+ list of available locations. The location must match that of any
109
+ datasets used in the query.
110
+
111
+ *New in version 0.5.0 of pandas-gbq*.
112
+ configuration : dict, optional
113
+ Query config parameters for job processing.
114
+ For example:
115
+
116
+ configuration = {'query': {'useQueryCache': False}}
117
+
118
+ For more information see `BigQuery REST API Reference
119
+ <https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query>`__.
120
+ credentials : google.auth.credentials.Credentials, optional
121
+ Credentials for accessing Google APIs. Use this parameter to override
122
+ default credentials, such as to use Compute Engine
123
+ :class:`google.auth.compute_engine.Credentials` or Service Account
124
+ :class:`google.oauth2.service_account.Credentials` directly.
125
+
126
+ *New in version 0.8.0 of pandas-gbq*.
127
+ use_bqstorage_api : bool, default False
128
+ Use the `BigQuery Storage API
129
+ <https://cloud.google.com/bigquery/docs/reference/storage/>`__ to
130
+ download query results quickly, but at an increased cost. To use this
131
+ API, first `enable it in the Cloud Console
132
+ <https://console.cloud.google.com/apis/library/bigquerystorage.googleapis.com>`__.
133
+ You must also have the `bigquery.readsessions.create
134
+ <https://cloud.google.com/bigquery/docs/access-control#roles>`__
135
+ permission on the project you are billing queries to.
136
+
137
+ This feature requires version 0.10.0 or later of the ``pandas-gbq``
138
+ package. It also requires the ``google-cloud-bigquery-storage`` and
139
+ ``fastavro`` packages.
140
+
141
+ max_results : int, optional
142
+ If set, limit the maximum number of rows to fetch from the query
143
+ results.
144
+
145
+ progress_bar_type : Optional, str
146
+ If set, use the `tqdm <https://tqdm.github.io/>`__ library to
147
+ display a progress bar while the data downloads. Install the
148
+ ``tqdm`` package to use this feature.
149
+
150
+ Possible values of ``progress_bar_type`` include:
151
+
152
+ ``None``
153
+ No progress bar.
154
+ ``'tqdm'``
155
+ Use the :func:`tqdm.tqdm` function to print a progress bar
156
+ to :data:`sys.stderr`.
157
+ ``'tqdm_notebook'``
158
+ Use the :func:`tqdm.tqdm_notebook` function to display a
159
+ progress bar as a Jupyter notebook widget.
160
+ ``'tqdm_gui'``
161
+ Use the :func:`tqdm.tqdm_gui` function to display a
162
+ progress bar as a graphical dialog box.
163
+
164
+ Returns
165
+ -------
166
+ df: DataFrame
167
+ DataFrame representing results of query.
168
+
169
+ See Also
170
+ --------
171
+ pandas_gbq.read_gbq : This function in the pandas-gbq library.
172
+ DataFrame.to_gbq : Write a DataFrame to Google BigQuery.
173
+
174
+ Examples
175
+ --------
176
+ Example taken from `Google BigQuery documentation
177
+ <https://cloud.google.com/bigquery/docs/pandas-gbq-migration>`_
178
+
179
+ >>> sql = "SELECT name FROM table_name WHERE state = 'TX' LIMIT 100;"
180
+ >>> df = pd.read_gbq(sql, dialect="standard") # doctest: +SKIP
181
+ >>> project_id = "your-project-id" # doctest: +SKIP
182
+ >>> df = pd.read_gbq(sql,
183
+ ... project_id=project_id,
184
+ ... dialect="standard"
185
+ ... ) # doctest: +SKIP
186
+ """
187
+ warnings.warn(
188
+ "read_gbq is deprecated and will be removed in a future version. "
189
+ "Please use pandas_gbq.read_gbq instead: "
190
+ "https://pandas-gbq.readthedocs.io/en/latest/api.html#pandas_gbq.read_gbq",
191
+ FutureWarning,
192
+ stacklevel=find_stack_level(),
193
+ )
194
+ pandas_gbq = _try_import()
195
+
196
+ kwargs: dict[str, str | bool | int | None] = {}
197
+
198
+ # START: new kwargs. Don't populate unless explicitly set.
199
+ if use_bqstorage_api is not None:
200
+ kwargs["use_bqstorage_api"] = use_bqstorage_api
201
+ if max_results is not None:
202
+ kwargs["max_results"] = max_results
203
+
204
+ kwargs["progress_bar_type"] = progress_bar_type
205
+ # END: new kwargs
206
+
207
+ return pandas_gbq.read_gbq(
208
+ query,
209
+ project_id=project_id,
210
+ index_col=index_col,
211
+ col_order=col_order,
212
+ reauth=reauth,
213
+ auth_local_webserver=auth_local_webserver,
214
+ dialect=dialect,
215
+ location=location,
216
+ configuration=configuration,
217
+ credentials=credentials,
218
+ **kwargs,
219
+ )
220
+
221
+
222
+ def to_gbq(
223
+ dataframe: DataFrame,
224
+ destination_table: str,
225
+ project_id: str | None = None,
226
+ chunksize: int | None = None,
227
+ reauth: bool = False,
228
+ if_exists: str = "fail",
229
+ auth_local_webserver: bool = True,
230
+ table_schema: list[dict[str, str]] | None = None,
231
+ location: str | None = None,
232
+ progress_bar: bool = True,
233
+ credentials: Credentials | None = None,
234
+ ) -> None:
235
+ warnings.warn(
236
+ "to_gbq is deprecated and will be removed in a future version. "
237
+ "Please use pandas_gbq.to_gbq instead: "
238
+ "https://pandas-gbq.readthedocs.io/en/latest/api.html#pandas_gbq.to_gbq",
239
+ FutureWarning,
240
+ stacklevel=find_stack_level(),
241
+ )
242
+ pandas_gbq = _try_import()
243
+ pandas_gbq.to_gbq(
244
+ dataframe,
245
+ destination_table,
246
+ project_id=project_id,
247
+ chunksize=chunksize,
248
+ reauth=reauth,
249
+ if_exists=if_exists,
250
+ auth_local_webserver=auth_local_webserver,
251
+ table_schema=table_schema,
252
+ location=location,
253
+ progress_bar=progress_bar,
254
+ credentials=credentials,
255
+ )
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/io/html.py ADDED
@@ -0,0 +1,1259 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ :mod:`pandas.io.html` is a module containing functionality for dealing with
3
+ HTML IO.
4
+
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from collections import abc
10
+ import numbers
11
+ import re
12
+ from re import Pattern
13
+ from typing import (
14
+ TYPE_CHECKING,
15
+ Literal,
16
+ cast,
17
+ )
18
+ import warnings
19
+
20
+ from pandas._libs import lib
21
+ from pandas.compat._optional import import_optional_dependency
22
+ from pandas.errors import (
23
+ AbstractMethodError,
24
+ EmptyDataError,
25
+ )
26
+ from pandas.util._decorators import doc
27
+ from pandas.util._exceptions import find_stack_level
28
+ from pandas.util._validators import check_dtype_backend
29
+
30
+ from pandas.core.dtypes.common import is_list_like
31
+
32
+ from pandas import isna
33
+ from pandas.core.indexes.base import Index
34
+ from pandas.core.indexes.multi import MultiIndex
35
+ from pandas.core.series import Series
36
+ from pandas.core.shared_docs import _shared_docs
37
+
38
+ from pandas.io.common import (
39
+ file_exists,
40
+ get_handle,
41
+ is_file_like,
42
+ is_fsspec_url,
43
+ is_url,
44
+ stringify_path,
45
+ validate_header_arg,
46
+ )
47
+ from pandas.io.formats.printing import pprint_thing
48
+ from pandas.io.parsers import TextParser
49
+
50
+ if TYPE_CHECKING:
51
+ from collections.abc import (
52
+ Iterable,
53
+ Sequence,
54
+ )
55
+
56
+ from pandas._typing import (
57
+ BaseBuffer,
58
+ DtypeBackend,
59
+ FilePath,
60
+ HTMLFlavors,
61
+ ReadBuffer,
62
+ StorageOptions,
63
+ )
64
+
65
+ from pandas import DataFrame
66
+
67
+ #############
68
+ # READ HTML #
69
+ #############
70
+ _RE_WHITESPACE = re.compile(r"[\r\n]+|\s{2,}")
71
+
72
+
73
+ def _remove_whitespace(s: str, regex: Pattern = _RE_WHITESPACE) -> str:
74
+ """
75
+ Replace extra whitespace inside of a string with a single space.
76
+
77
+ Parameters
78
+ ----------
79
+ s : str or unicode
80
+ The string from which to remove extra whitespace.
81
+ regex : re.Pattern
82
+ The regular expression to use to remove extra whitespace.
83
+
84
+ Returns
85
+ -------
86
+ subd : str or unicode
87
+ `s` with all extra whitespace replaced with a single space.
88
+ """
89
+ return regex.sub(" ", s.strip())
90
+
91
+
92
+ def _get_skiprows(skiprows: int | Sequence[int] | slice | None) -> int | Sequence[int]:
93
+ """
94
+ Get an iterator given an integer, slice or container.
95
+
96
+ Parameters
97
+ ----------
98
+ skiprows : int, slice, container
99
+ The iterator to use to skip rows; can also be a slice.
100
+
101
+ Raises
102
+ ------
103
+ TypeError
104
+ * If `skiprows` is not a slice, integer, or Container
105
+
106
+ Returns
107
+ -------
108
+ it : iterable
109
+ A proper iterator to use to skip rows of a DataFrame.
110
+ """
111
+ if isinstance(skiprows, slice):
112
+ start, step = skiprows.start or 0, skiprows.step or 1
113
+ return list(range(start, skiprows.stop, step))
114
+ elif isinstance(skiprows, numbers.Integral) or is_list_like(skiprows):
115
+ return cast("int | Sequence[int]", skiprows)
116
+ elif skiprows is None:
117
+ return 0
118
+ raise TypeError(f"{type(skiprows).__name__} is not a valid type for skipping rows")
119
+
120
+
121
+ def _read(
122
+ obj: FilePath | BaseBuffer,
123
+ encoding: str | None,
124
+ storage_options: StorageOptions | None,
125
+ ) -> str | bytes:
126
+ """
127
+ Try to read from a url, file or string.
128
+
129
+ Parameters
130
+ ----------
131
+ obj : str, unicode, path object, or file-like object
132
+
133
+ Returns
134
+ -------
135
+ raw_text : str
136
+ """
137
+ text: str | bytes
138
+ if (
139
+ is_url(obj)
140
+ or hasattr(obj, "read")
141
+ or (isinstance(obj, str) and file_exists(obj))
142
+ ):
143
+ with get_handle(
144
+ obj, "r", encoding=encoding, storage_options=storage_options
145
+ ) as handles:
146
+ text = handles.handle.read()
147
+ elif isinstance(obj, (str, bytes)):
148
+ text = obj
149
+ else:
150
+ raise TypeError(f"Cannot read object of type '{type(obj).__name__}'")
151
+ return text
152
+
153
+
154
+ class _HtmlFrameParser:
155
+ """
156
+ Base class for parsers that parse HTML into DataFrames.
157
+
158
+ Parameters
159
+ ----------
160
+ io : str or file-like
161
+ This can be either a string of raw HTML, a valid URL using the HTTP,
162
+ FTP, or FILE protocols or a file-like object.
163
+
164
+ match : str or regex
165
+ The text to match in the document.
166
+
167
+ attrs : dict
168
+ List of HTML <table> element attributes to match.
169
+
170
+ encoding : str
171
+ Encoding to be used by parser
172
+
173
+ displayed_only : bool
174
+ Whether or not items with "display:none" should be ignored
175
+
176
+ extract_links : {None, "all", "header", "body", "footer"}
177
+ Table elements in the specified section(s) with <a> tags will have their
178
+ href extracted.
179
+
180
+ .. versionadded:: 1.5.0
181
+
182
+ Attributes
183
+ ----------
184
+ io : str or file-like
185
+ raw HTML, URL, or file-like object
186
+
187
+ match : regex
188
+ The text to match in the raw HTML
189
+
190
+ attrs : dict-like
191
+ A dictionary of valid table attributes to use to search for table
192
+ elements.
193
+
194
+ encoding : str
195
+ Encoding to be used by parser
196
+
197
+ displayed_only : bool
198
+ Whether or not items with "display:none" should be ignored
199
+
200
+ extract_links : {None, "all", "header", "body", "footer"}
201
+ Table elements in the specified section(s) with <a> tags will have their
202
+ href extracted.
203
+
204
+ .. versionadded:: 1.5.0
205
+
206
+ Notes
207
+ -----
208
+ To subclass this class effectively you must override the following methods:
209
+ * :func:`_build_doc`
210
+ * :func:`_attr_getter`
211
+ * :func:`_href_getter`
212
+ * :func:`_text_getter`
213
+ * :func:`_parse_td`
214
+ * :func:`_parse_thead_tr`
215
+ * :func:`_parse_tbody_tr`
216
+ * :func:`_parse_tfoot_tr`
217
+ * :func:`_parse_tables`
218
+ * :func:`_equals_tag`
219
+ See each method's respective documentation for details on their
220
+ functionality.
221
+ """
222
+
223
+ def __init__(
224
+ self,
225
+ io: FilePath | ReadBuffer[str] | ReadBuffer[bytes],
226
+ match: str | Pattern,
227
+ attrs: dict[str, str] | None,
228
+ encoding: str,
229
+ displayed_only: bool,
230
+ extract_links: Literal[None, "header", "footer", "body", "all"],
231
+ storage_options: StorageOptions = None,
232
+ ) -> None:
233
+ self.io = io
234
+ self.match = match
235
+ self.attrs = attrs
236
+ self.encoding = encoding
237
+ self.displayed_only = displayed_only
238
+ self.extract_links = extract_links
239
+ self.storage_options = storage_options
240
+
241
+ def parse_tables(self):
242
+ """
243
+ Parse and return all tables from the DOM.
244
+
245
+ Returns
246
+ -------
247
+ list of parsed (header, body, footer) tuples from tables.
248
+ """
249
+ tables = self._parse_tables(self._build_doc(), self.match, self.attrs)
250
+ return (self._parse_thead_tbody_tfoot(table) for table in tables)
251
+
252
+ def _attr_getter(self, obj, attr):
253
+ """
254
+ Return the attribute value of an individual DOM node.
255
+
256
+ Parameters
257
+ ----------
258
+ obj : node-like
259
+ A DOM node.
260
+
261
+ attr : str or unicode
262
+ The attribute, such as "colspan"
263
+
264
+ Returns
265
+ -------
266
+ str or unicode
267
+ The attribute value.
268
+ """
269
+ # Both lxml and BeautifulSoup have the same implementation:
270
+ return obj.get(attr)
271
+
272
+ def _href_getter(self, obj) -> str | None:
273
+ """
274
+ Return a href if the DOM node contains a child <a> or None.
275
+
276
+ Parameters
277
+ ----------
278
+ obj : node-like
279
+ A DOM node.
280
+
281
+ Returns
282
+ -------
283
+ href : str or unicode
284
+ The href from the <a> child of the DOM node.
285
+ """
286
+ raise AbstractMethodError(self)
287
+
288
+ def _text_getter(self, obj):
289
+ """
290
+ Return the text of an individual DOM node.
291
+
292
+ Parameters
293
+ ----------
294
+ obj : node-like
295
+ A DOM node.
296
+
297
+ Returns
298
+ -------
299
+ text : str or unicode
300
+ The text from an individual DOM node.
301
+ """
302
+ raise AbstractMethodError(self)
303
+
304
+ def _parse_td(self, obj):
305
+ """
306
+ Return the td elements from a row element.
307
+
308
+ Parameters
309
+ ----------
310
+ obj : node-like
311
+ A DOM <tr> node.
312
+
313
+ Returns
314
+ -------
315
+ list of node-like
316
+ These are the elements of each row, i.e., the columns.
317
+ """
318
+ raise AbstractMethodError(self)
319
+
320
+ def _parse_thead_tr(self, table):
321
+ """
322
+ Return the list of thead row elements from the parsed table element.
323
+
324
+ Parameters
325
+ ----------
326
+ table : a table element that contains zero or more thead elements.
327
+
328
+ Returns
329
+ -------
330
+ list of node-like
331
+ These are the <tr> row elements of a table.
332
+ """
333
+ raise AbstractMethodError(self)
334
+
335
+ def _parse_tbody_tr(self, table):
336
+ """
337
+ Return the list of tbody row elements from the parsed table element.
338
+
339
+ HTML5 table bodies consist of either 0 or more <tbody> elements (which
340
+ only contain <tr> elements) or 0 or more <tr> elements. This method
341
+ checks for both structures.
342
+
343
+ Parameters
344
+ ----------
345
+ table : a table element that contains row elements.
346
+
347
+ Returns
348
+ -------
349
+ list of node-like
350
+ These are the <tr> row elements of a table.
351
+ """
352
+ raise AbstractMethodError(self)
353
+
354
+ def _parse_tfoot_tr(self, table):
355
+ """
356
+ Return the list of tfoot row elements from the parsed table element.
357
+
358
+ Parameters
359
+ ----------
360
+ table : a table element that contains row elements.
361
+
362
+ Returns
363
+ -------
364
+ list of node-like
365
+ These are the <tr> row elements of a table.
366
+ """
367
+ raise AbstractMethodError(self)
368
+
369
+ def _parse_tables(self, document, match, attrs):
370
+ """
371
+ Return all tables from the parsed DOM.
372
+
373
+ Parameters
374
+ ----------
375
+ document : the DOM from which to parse the table element.
376
+
377
+ match : str or regular expression
378
+ The text to search for in the DOM tree.
379
+
380
+ attrs : dict
381
+ A dictionary of table attributes that can be used to disambiguate
382
+ multiple tables on a page.
383
+
384
+ Raises
385
+ ------
386
+ ValueError : `match` does not match any text in the document.
387
+
388
+ Returns
389
+ -------
390
+ list of node-like
391
+ HTML <table> elements to be parsed into raw data.
392
+ """
393
+ raise AbstractMethodError(self)
394
+
395
+ def _equals_tag(self, obj, tag) -> bool:
396
+ """
397
+ Return whether an individual DOM node matches a tag
398
+
399
+ Parameters
400
+ ----------
401
+ obj : node-like
402
+ A DOM node.
403
+
404
+ tag : str
405
+ Tag name to be checked for equality.
406
+
407
+ Returns
408
+ -------
409
+ boolean
410
+ Whether `obj`'s tag name is `tag`
411
+ """
412
+ raise AbstractMethodError(self)
413
+
414
+ def _build_doc(self):
415
+ """
416
+ Return a tree-like object that can be used to iterate over the DOM.
417
+
418
+ Returns
419
+ -------
420
+ node-like
421
+ The DOM from which to parse the table element.
422
+ """
423
+ raise AbstractMethodError(self)
424
+
425
+ def _parse_thead_tbody_tfoot(self, table_html):
426
+ """
427
+ Given a table, return parsed header, body, and foot.
428
+
429
+ Parameters
430
+ ----------
431
+ table_html : node-like
432
+
433
+ Returns
434
+ -------
435
+ tuple of (header, body, footer), each a list of list-of-text rows.
436
+
437
+ Notes
438
+ -----
439
+ Header and body are lists-of-lists. Top level list is a list of
440
+ rows. Each row is a list of str text.
441
+
442
+ Logic: Use <thead>, <tbody>, <tfoot> elements to identify
443
+ header, body, and footer, otherwise:
444
+ - Put all rows into body
445
+ - Move rows from top of body to header only if
446
+ all elements inside row are <th>
447
+ - Move rows from bottom of body to footer only if
448
+ all elements inside row are <th>
449
+ """
450
+ header_rows = self._parse_thead_tr(table_html)
451
+ body_rows = self._parse_tbody_tr(table_html)
452
+ footer_rows = self._parse_tfoot_tr(table_html)
453
+
454
+ def row_is_all_th(row):
455
+ return all(self._equals_tag(t, "th") for t in self._parse_td(row))
456
+
457
+ if not header_rows:
458
+ # The table has no <thead>. Move the top all-<th> rows from
459
+ # body_rows to header_rows. (This is a common case because many
460
+ # tables in the wild have no <thead> or <tfoot>
461
+ while body_rows and row_is_all_th(body_rows[0]):
462
+ header_rows.append(body_rows.pop(0))
463
+
464
+ header = self._expand_colspan_rowspan(header_rows, section="header")
465
+ body = self._expand_colspan_rowspan(body_rows, section="body")
466
+ footer = self._expand_colspan_rowspan(footer_rows, section="footer")
467
+
468
+ return header, body, footer
469
+
470
+ def _expand_colspan_rowspan(
471
+ self, rows, section: Literal["header", "footer", "body"]
472
+ ):
473
+ """
474
+ Given a list of <tr>s, return a list of text rows.
475
+
476
+ Parameters
477
+ ----------
478
+ rows : list of node-like
479
+ List of <tr>s
480
+ section : the section that the rows belong to (header, body or footer).
481
+
482
+ Returns
483
+ -------
484
+ list of list
485
+ Each returned row is a list of str text, or tuple (text, link)
486
+ if extract_links is not None.
487
+
488
+ Notes
489
+ -----
490
+ Any cell with ``rowspan`` or ``colspan`` will have its contents copied
491
+ to subsequent cells.
492
+ """
493
+ all_texts = [] # list of rows, each a list of str
494
+ text: str | tuple
495
+ remainder: list[
496
+ tuple[int, str | tuple, int]
497
+ ] = [] # list of (index, text, nrows)
498
+
499
+ for tr in rows:
500
+ texts = [] # the output for this row
501
+ next_remainder = []
502
+
503
+ index = 0
504
+ tds = self._parse_td(tr)
505
+ for td in tds:
506
+ # Append texts from previous rows with rowspan>1 that come
507
+ # before this <td>
508
+ while remainder and remainder[0][0] <= index:
509
+ prev_i, prev_text, prev_rowspan = remainder.pop(0)
510
+ texts.append(prev_text)
511
+ if prev_rowspan > 1:
512
+ next_remainder.append((prev_i, prev_text, prev_rowspan - 1))
513
+ index += 1
514
+
515
+ # Append the text from this <td>, colspan times
516
+ text = _remove_whitespace(self._text_getter(td))
517
+ if self.extract_links in ("all", section):
518
+ href = self._href_getter(td)
519
+ text = (text, href)
520
+ rowspan = int(self._attr_getter(td, "rowspan") or 1)
521
+ colspan = int(self._attr_getter(td, "colspan") or 1)
522
+
523
+ for _ in range(colspan):
524
+ texts.append(text)
525
+ if rowspan > 1:
526
+ next_remainder.append((index, text, rowspan - 1))
527
+ index += 1
528
+
529
+ # Append texts from previous rows at the final position
530
+ for prev_i, prev_text, prev_rowspan in remainder:
531
+ texts.append(prev_text)
532
+ if prev_rowspan > 1:
533
+ next_remainder.append((prev_i, prev_text, prev_rowspan - 1))
534
+
535
+ all_texts.append(texts)
536
+ remainder = next_remainder
537
+
538
+ # Append rows that only appear because the previous row had non-1
539
+ # rowspan
540
+ while remainder:
541
+ next_remainder = []
542
+ texts = []
543
+ for prev_i, prev_text, prev_rowspan in remainder:
544
+ texts.append(prev_text)
545
+ if prev_rowspan > 1:
546
+ next_remainder.append((prev_i, prev_text, prev_rowspan - 1))
547
+ all_texts.append(texts)
548
+ remainder = next_remainder
549
+
550
+ return all_texts
551
+
552
+ def _handle_hidden_tables(self, tbl_list, attr_name: str):
553
+ """
554
+ Return list of tables, potentially removing hidden elements
555
+
556
+ Parameters
557
+ ----------
558
+ tbl_list : list of node-like
559
+ Type of list elements will vary depending upon parser used
560
+ attr_name : str
561
+ Name of the accessor for retrieving HTML attributes
562
+
563
+ Returns
564
+ -------
565
+ list of node-like
566
+ Return type matches `tbl_list`
567
+ """
568
+ if not self.displayed_only:
569
+ return tbl_list
570
+
571
+ return [
572
+ x
573
+ for x in tbl_list
574
+ if "display:none"
575
+ not in getattr(x, attr_name).get("style", "").replace(" ", "")
576
+ ]
577
+
578
+
579
+ class _BeautifulSoupHtml5LibFrameParser(_HtmlFrameParser):
580
+ """
581
+ HTML to DataFrame parser that uses BeautifulSoup under the hood.
582
+
583
+ See Also
584
+ --------
585
+ pandas.io.html._HtmlFrameParser
586
+ pandas.io.html._LxmlFrameParser
587
+
588
+ Notes
589
+ -----
590
+ Documentation strings for this class are in the base class
591
+ :class:`pandas.io.html._HtmlFrameParser`.
592
+ """
593
+
594
+ def _parse_tables(self, document, match, attrs):
595
+ element_name = "table"
596
+ tables = document.find_all(element_name, attrs=attrs)
597
+ if not tables:
598
+ raise ValueError("No tables found")
599
+
600
+ result = []
601
+ unique_tables = set()
602
+ tables = self._handle_hidden_tables(tables, "attrs")
603
+
604
+ for table in tables:
605
+ if self.displayed_only:
606
+ for elem in table.find_all("style"):
607
+ elem.decompose()
608
+
609
+ for elem in table.find_all(style=re.compile(r"display:\s*none")):
610
+ elem.decompose()
611
+
612
+ if table not in unique_tables and table.find(string=match) is not None:
613
+ result.append(table)
614
+ unique_tables.add(table)
615
+ if not result:
616
+ raise ValueError(f"No tables found matching pattern {repr(match.pattern)}")
617
+ return result
618
+
619
+ def _href_getter(self, obj) -> str | None:
620
+ a = obj.find("a", href=True)
621
+ return None if not a else a["href"]
622
+
623
+ def _text_getter(self, obj):
624
+ return obj.text
625
+
626
+ def _equals_tag(self, obj, tag) -> bool:
627
+ return obj.name == tag
628
+
629
+ def _parse_td(self, row):
630
+ return row.find_all(("td", "th"), recursive=False)
631
+
632
+ def _parse_thead_tr(self, table):
633
+ return table.select("thead tr")
634
+
635
+ def _parse_tbody_tr(self, table):
636
+ from_tbody = table.select("tbody tr")
637
+ from_root = table.find_all("tr", recursive=False)
638
+ # HTML spec: at most one of these lists has content
639
+ return from_tbody + from_root
640
+
641
+ def _parse_tfoot_tr(self, table):
642
+ return table.select("tfoot tr")
643
+
644
+ def _setup_build_doc(self):
645
+ raw_text = _read(self.io, self.encoding, self.storage_options)
646
+ if not raw_text:
647
+ raise ValueError(f"No text parsed from document: {self.io}")
648
+ return raw_text
649
+
650
+ def _build_doc(self):
651
+ from bs4 import BeautifulSoup
652
+
653
+ bdoc = self._setup_build_doc()
654
+ if isinstance(bdoc, bytes) and self.encoding is not None:
655
+ udoc = bdoc.decode(self.encoding)
656
+ from_encoding = None
657
+ else:
658
+ udoc = bdoc
659
+ from_encoding = self.encoding
660
+
661
+ soup = BeautifulSoup(udoc, features="html5lib", from_encoding=from_encoding)
662
+
663
+ for br in soup.find_all("br"):
664
+ br.replace_with("\n" + br.text)
665
+
666
+ return soup
667
+
668
+
669
+ def _build_xpath_expr(attrs) -> str:
670
+ """
671
+ Build an xpath expression to simulate bs4's ability to pass in kwargs to
672
+ search for attributes when using the lxml parser.
673
+
674
+ Parameters
675
+ ----------
676
+ attrs : dict
677
+ A dict of HTML attributes. These are NOT checked for validity.
678
+
679
+ Returns
680
+ -------
681
+ expr : unicode
682
+ An XPath expression that checks for the given HTML attributes.
683
+ """
684
+ # give class attribute as class_ because class is a python keyword
685
+ if "class_" in attrs:
686
+ attrs["class"] = attrs.pop("class_")
687
+
688
+ s = " and ".join([f"@{k}={repr(v)}" for k, v in attrs.items()])
689
+ return f"[{s}]"
690
+
691
+
692
+ _re_namespace = {"re": "http://exslt.org/regular-expressions"}
693
+
694
+
695
+ class _LxmlFrameParser(_HtmlFrameParser):
696
+ """
697
+ HTML to DataFrame parser that uses lxml under the hood.
698
+
699
+ Warning
700
+ -------
701
+ This parser can only handle HTTP, FTP, and FILE urls.
702
+
703
+ See Also
704
+ --------
705
+ _HtmlFrameParser
706
+ _BeautifulSoupLxmlFrameParser
707
+
708
+ Notes
709
+ -----
710
+ Documentation strings for this class are in the base class
711
+ :class:`_HtmlFrameParser`.
712
+ """
713
+
714
+ def _href_getter(self, obj) -> str | None:
715
+ href = obj.xpath(".//a/@href")
716
+ return None if not href else href[0]
717
+
718
+ def _text_getter(self, obj):
719
+ return obj.text_content()
720
+
721
+ def _parse_td(self, row):
722
+ # Look for direct children only: the "row" element here may be a
723
+ # <thead> or <tfoot> (see _parse_thead_tr).
724
+ return row.xpath("./td|./th")
725
+
726
+ def _parse_tables(self, document, match, kwargs):
727
+ pattern = match.pattern
728
+
729
+ # 1. check all descendants for the given pattern and only search tables
730
+ # GH 49929
731
+ xpath_expr = f"//table[.//text()[re:test(., {repr(pattern)})]]"
732
+
733
+ # if any table attributes were given build an xpath expression to
734
+ # search for them
735
+ if kwargs:
736
+ xpath_expr += _build_xpath_expr(kwargs)
737
+
738
+ tables = document.xpath(xpath_expr, namespaces=_re_namespace)
739
+
740
+ tables = self._handle_hidden_tables(tables, "attrib")
741
+ if self.displayed_only:
742
+ for table in tables:
743
+ # lxml utilizes XPATH 1.0 which does not have regex
744
+ # support. As a result, we find all elements with a style
745
+ # attribute and iterate them to check for display:none
746
+ for elem in table.xpath(".//style"):
747
+ elem.drop_tree()
748
+ for elem in table.xpath(".//*[@style]"):
749
+ if "display:none" in elem.attrib.get("style", "").replace(" ", ""):
750
+ elem.drop_tree()
751
+ if not tables:
752
+ raise ValueError(f"No tables found matching regex {repr(pattern)}")
753
+ return tables
754
+
755
+ def _equals_tag(self, obj, tag) -> bool:
756
+ return obj.tag == tag
757
+
758
+ def _build_doc(self):
759
+ """
760
+ Raises
761
+ ------
762
+ ValueError
763
+ * If a URL that lxml cannot parse is passed.
764
+
765
+ Exception
766
+ * Any other ``Exception`` thrown. For example, trying to parse a
767
+ URL that is syntactically correct on a machine with no internet
768
+ connection will fail.
769
+
770
+ See Also
771
+ --------
772
+ pandas.io.html._HtmlFrameParser._build_doc
773
+ """
774
+ from lxml.etree import XMLSyntaxError
775
+ from lxml.html import (
776
+ HTMLParser,
777
+ fromstring,
778
+ parse,
779
+ )
780
+
781
+ parser = HTMLParser(recover=True, encoding=self.encoding)
782
+
783
+ try:
784
+ if is_url(self.io):
785
+ with get_handle(
786
+ self.io, "r", storage_options=self.storage_options
787
+ ) as f:
788
+ r = parse(f.handle, parser=parser)
789
+ else:
790
+ # try to parse the input in the simplest way
791
+ r = parse(self.io, parser=parser)
792
+ try:
793
+ r = r.getroot()
794
+ except AttributeError:
795
+ pass
796
+ except (UnicodeDecodeError, OSError) as e:
797
+ # if the input is a blob of html goop
798
+ if not is_url(self.io):
799
+ r = fromstring(self.io, parser=parser)
800
+
801
+ try:
802
+ r = r.getroot()
803
+ except AttributeError:
804
+ pass
805
+ else:
806
+ raise e
807
+ else:
808
+ if not hasattr(r, "text_content"):
809
+ raise XMLSyntaxError("no text parsed from document", 0, 0, 0)
810
+
811
+ for br in r.xpath("*//br"):
812
+ br.tail = "\n" + (br.tail or "")
813
+
814
+ return r
815
+
816
+ def _parse_thead_tr(self, table):
817
+ rows = []
818
+
819
+ for thead in table.xpath(".//thead"):
820
+ rows.extend(thead.xpath("./tr"))
821
+
822
+ # HACK: lxml does not clean up the clearly-erroneous
823
+ # <thead><th>foo</th><th>bar</th></thead>. (Missing <tr>). Add
824
+ # the <thead> and _pretend_ it's a <tr>; _parse_td() will find its
825
+ # children as though it's a <tr>.
826
+ #
827
+ # Better solution would be to use html5lib.
828
+ elements_at_root = thead.xpath("./td|./th")
829
+ if elements_at_root:
830
+ rows.append(thead)
831
+
832
+ return rows
833
+
834
+ def _parse_tbody_tr(self, table):
835
+ from_tbody = table.xpath(".//tbody//tr")
836
+ from_root = table.xpath("./tr")
837
+ # HTML spec: at most one of these lists has content
838
+ return from_tbody + from_root
839
+
840
+ def _parse_tfoot_tr(self, table):
841
+ return table.xpath(".//tfoot//tr")
842
+
843
+
844
+ def _expand_elements(body) -> None:
845
+ data = [len(elem) for elem in body]
846
+ lens = Series(data)
847
+ lens_max = lens.max()
848
+ not_max = lens[lens != lens_max]
849
+
850
+ empty = [""]
851
+ for ind, length in not_max.items():
852
+ body[ind] += empty * (lens_max - length)
853
+
854
+
855
+ def _data_to_frame(**kwargs):
856
+ head, body, foot = kwargs.pop("data")
857
+ header = kwargs.pop("header")
858
+ kwargs["skiprows"] = _get_skiprows(kwargs["skiprows"])
859
+ if head:
860
+ body = head + body
861
+
862
+ # Infer header when there is a <thead> or top <th>-only rows
863
+ if header is None:
864
+ if len(head) == 1:
865
+ header = 0
866
+ else:
867
+ # ignore all-empty-text rows
868
+ header = [i for i, row in enumerate(head) if any(text for text in row)]
869
+
870
+ if foot:
871
+ body += foot
872
+
873
+ # fill out elements of body that are "ragged"
874
+ _expand_elements(body)
875
+ with TextParser(body, header=header, **kwargs) as tp:
876
+ return tp.read()
877
+
878
+
879
+ _valid_parsers = {
880
+ "lxml": _LxmlFrameParser,
881
+ None: _LxmlFrameParser,
882
+ "html5lib": _BeautifulSoupHtml5LibFrameParser,
883
+ "bs4": _BeautifulSoupHtml5LibFrameParser,
884
+ }
885
+
886
+
887
+ def _parser_dispatch(flavor: HTMLFlavors | None) -> type[_HtmlFrameParser]:
888
+ """
889
+ Choose the parser based on the input flavor.
890
+
891
+ Parameters
892
+ ----------
893
+ flavor : {{"lxml", "html5lib", "bs4"}} or None
894
+ The type of parser to use. This must be a valid backend.
895
+
896
+ Returns
897
+ -------
898
+ cls : _HtmlFrameParser subclass
899
+ The parser class based on the requested input flavor.
900
+
901
+ Raises
902
+ ------
903
+ ValueError
904
+ * If `flavor` is not a valid backend.
905
+ ImportError
906
+ * If you do not have the requested `flavor`
907
+ """
908
+ valid_parsers = list(_valid_parsers.keys())
909
+ if flavor not in valid_parsers:
910
+ raise ValueError(
911
+ f"{repr(flavor)} is not a valid flavor, valid flavors are {valid_parsers}"
912
+ )
913
+
914
+ if flavor in ("bs4", "html5lib"):
915
+ import_optional_dependency("html5lib")
916
+ import_optional_dependency("bs4")
917
+ else:
918
+ import_optional_dependency("lxml.etree")
919
+ return _valid_parsers[flavor]
920
+
921
+
922
+ def _print_as_set(s) -> str:
923
+ arg = ", ".join([pprint_thing(el) for el in s])
924
+ return f"{{{arg}}}"
925
+
926
+
927
+ def _validate_flavor(flavor):
928
+ if flavor is None:
929
+ flavor = "lxml", "bs4"
930
+ elif isinstance(flavor, str):
931
+ flavor = (flavor,)
932
+ elif isinstance(flavor, abc.Iterable):
933
+ if not all(isinstance(flav, str) for flav in flavor):
934
+ raise TypeError(
935
+ f"Object of type {repr(type(flavor).__name__)} "
936
+ f"is not an iterable of strings"
937
+ )
938
+ else:
939
+ msg = repr(flavor) if isinstance(flavor, str) else str(flavor)
940
+ msg += " is not a valid flavor"
941
+ raise ValueError(msg)
942
+
943
+ flavor = tuple(flavor)
944
+ valid_flavors = set(_valid_parsers)
945
+ flavor_set = set(flavor)
946
+
947
+ if not flavor_set & valid_flavors:
948
+ raise ValueError(
949
+ f"{_print_as_set(flavor_set)} is not a valid set of flavors, valid "
950
+ f"flavors are {_print_as_set(valid_flavors)}"
951
+ )
952
+ return flavor
953
+
954
+
955
+ def _parse(
956
+ flavor,
957
+ io,
958
+ match,
959
+ attrs,
960
+ encoding,
961
+ displayed_only,
962
+ extract_links,
963
+ storage_options,
964
+ **kwargs,
965
+ ):
966
+ flavor = _validate_flavor(flavor)
967
+ compiled_match = re.compile(match) # you can pass a compiled regex here
968
+
969
+ retained = None
970
+ for flav in flavor:
971
+ parser = _parser_dispatch(flav)
972
+ p = parser(
973
+ io,
974
+ compiled_match,
975
+ attrs,
976
+ encoding,
977
+ displayed_only,
978
+ extract_links,
979
+ storage_options,
980
+ )
981
+
982
+ try:
983
+ tables = p.parse_tables()
984
+ except ValueError as caught:
985
+ # if `io` is an io-like object, check if it's seekable
986
+ # and try to rewind it before trying the next parser
987
+ if hasattr(io, "seekable") and io.seekable():
988
+ io.seek(0)
989
+ elif hasattr(io, "seekable") and not io.seekable():
990
+ # if we couldn't rewind it, let the user know
991
+ raise ValueError(
992
+ f"The flavor {flav} failed to parse your input. "
993
+ "Since you passed a non-rewindable file "
994
+ "object, we can't rewind it to try "
995
+ "another parser. Try read_html() with a different flavor."
996
+ ) from caught
997
+
998
+ retained = caught
999
+ else:
1000
+ break
1001
+ else:
1002
+ assert retained is not None # for mypy
1003
+ raise retained
1004
+
1005
+ ret = []
1006
+ for table in tables:
1007
+ try:
1008
+ df = _data_to_frame(data=table, **kwargs)
1009
+ # Cast MultiIndex header to an Index of tuples when extracting header
1010
+ # links and replace nan with None (therefore can't use mi.to_flat_index()).
1011
+ # This maintains consistency of selection (e.g. df.columns.str[1])
1012
+ if extract_links in ("all", "header") and isinstance(
1013
+ df.columns, MultiIndex
1014
+ ):
1015
+ df.columns = Index(
1016
+ ((col[0], None if isna(col[1]) else col[1]) for col in df.columns),
1017
+ tupleize_cols=False,
1018
+ )
1019
+
1020
+ ret.append(df)
1021
+ except EmptyDataError: # empty table
1022
+ continue
1023
+ return ret
1024
+
1025
+
1026
+ @doc(storage_options=_shared_docs["storage_options"])
1027
+ def read_html(
1028
+ io: FilePath | ReadBuffer[str],
1029
+ *,
1030
+ match: str | Pattern = ".+",
1031
+ flavor: HTMLFlavors | Sequence[HTMLFlavors] | None = None,
1032
+ header: int | Sequence[int] | None = None,
1033
+ index_col: int | Sequence[int] | None = None,
1034
+ skiprows: int | Sequence[int] | slice | None = None,
1035
+ attrs: dict[str, str] | None = None,
1036
+ parse_dates: bool = False,
1037
+ thousands: str | None = ",",
1038
+ encoding: str | None = None,
1039
+ decimal: str = ".",
1040
+ converters: dict | None = None,
1041
+ na_values: Iterable[object] | None = None,
1042
+ keep_default_na: bool = True,
1043
+ displayed_only: bool = True,
1044
+ extract_links: Literal[None, "header", "footer", "body", "all"] = None,
1045
+ dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
1046
+ storage_options: StorageOptions = None,
1047
+ ) -> list[DataFrame]:
1048
+ r"""
1049
+ Read HTML tables into a ``list`` of ``DataFrame`` objects.
1050
+
1051
+ Parameters
1052
+ ----------
1053
+ io : str, path object, or file-like object
1054
+ String, path object (implementing ``os.PathLike[str]``), or file-like
1055
+ object implementing a string ``read()`` function.
1056
+ The string can represent a URL or the HTML itself. Note that
1057
+ lxml only accepts the http, ftp and file url protocols. If you have a
1058
+ URL that starts with ``'https'`` you might try removing the ``'s'``.
1059
+
1060
+ .. deprecated:: 2.1.0
1061
+ Passing html literal strings is deprecated.
1062
+ Wrap literal string/bytes input in ``io.StringIO``/``io.BytesIO`` instead.
1063
+
1064
+ match : str or compiled regular expression, optional
1065
+ The set of tables containing text matching this regex or string will be
1066
+ returned. Unless the HTML is extremely simple you will probably need to
1067
+ pass a non-empty string here. Defaults to '.+' (match any non-empty
1068
+ string). The default value will return all tables contained on a page.
1069
+ This value is converted to a regular expression so that there is
1070
+ consistent behavior between Beautiful Soup and lxml.
1071
+
1072
+ flavor : {{"lxml", "html5lib", "bs4"}} or list-like, optional
1073
+ The parsing engine (or list of parsing engines) to use. 'bs4' and
1074
+ 'html5lib' are synonymous with each other, they are both there for
1075
+ backwards compatibility. The default of ``None`` tries to use ``lxml``
1076
+ to parse and if that fails it falls back on ``bs4`` + ``html5lib``.
1077
+
1078
+ header : int or list-like, optional
1079
+ The row (or list of rows for a :class:`~pandas.MultiIndex`) to use to
1080
+ make the columns headers.
1081
+
1082
+ index_col : int or list-like, optional
1083
+ The column (or list of columns) to use to create the index.
1084
+
1085
+ skiprows : int, list-like or slice, optional
1086
+ Number of rows to skip after parsing the column integer. 0-based. If a
1087
+ sequence of integers or a slice is given, will skip the rows indexed by
1088
+ that sequence. Note that a single element sequence means 'skip the nth
1089
+ row' whereas an integer means 'skip n rows'.
1090
+
1091
+ attrs : dict, optional
1092
+ This is a dictionary of attributes that you can pass to use to identify
1093
+ the table in the HTML. These are not checked for validity before being
1094
+ passed to lxml or Beautiful Soup. However, these attributes must be
1095
+ valid HTML table attributes to work correctly. For example, ::
1096
+
1097
+ attrs = {{'id': 'table'}}
1098
+
1099
+ is a valid attribute dictionary because the 'id' HTML tag attribute is
1100
+ a valid HTML attribute for *any* HTML tag as per `this document
1101
+ <https://html.spec.whatwg.org/multipage/dom.html#global-attributes>`__. ::
1102
+
1103
+ attrs = {{'asdf': 'table'}}
1104
+
1105
+ is *not* a valid attribute dictionary because 'asdf' is not a valid
1106
+ HTML attribute even if it is a valid XML attribute. Valid HTML 4.01
1107
+ table attributes can be found `here
1108
+ <http://www.w3.org/TR/REC-html40/struct/tables.html#h-11.2>`__. A
1109
+ working draft of the HTML 5 spec can be found `here
1110
+ <https://html.spec.whatwg.org/multipage/tables.html>`__. It contains the
1111
+ latest information on table attributes for the modern web.
1112
+
1113
+ parse_dates : bool, optional
1114
+ See :func:`~read_csv` for more details.
1115
+
1116
+ thousands : str, optional
1117
+ Separator to use to parse thousands. Defaults to ``','``.
1118
+
1119
+ encoding : str, optional
1120
+ The encoding used to decode the web page. Defaults to ``None``.``None``
1121
+ preserves the previous encoding behavior, which depends on the
1122
+ underlying parser library (e.g., the parser library will try to use
1123
+ the encoding provided by the document).
1124
+
1125
+ decimal : str, default '.'
1126
+ Character to recognize as decimal point (e.g. use ',' for European
1127
+ data).
1128
+
1129
+ converters : dict, default None
1130
+ Dict of functions for converting values in certain columns. Keys can
1131
+ either be integers or column labels, values are functions that take one
1132
+ input argument, the cell (not column) content, and return the
1133
+ transformed content.
1134
+
1135
+ na_values : iterable, default None
1136
+ Custom NA values.
1137
+
1138
+ keep_default_na : bool, default True
1139
+ If na_values are specified and keep_default_na is False the default NaN
1140
+ values are overridden, otherwise they're appended to.
1141
+
1142
+ displayed_only : bool, default True
1143
+ Whether elements with "display: none" should be parsed.
1144
+
1145
+ extract_links : {{None, "all", "header", "body", "footer"}}
1146
+ Table elements in the specified section(s) with <a> tags will have their
1147
+ href extracted.
1148
+
1149
+ .. versionadded:: 1.5.0
1150
+
1151
+ dtype_backend : {{'numpy_nullable', 'pyarrow'}}, default 'numpy_nullable'
1152
+ Back-end data type applied to the resultant :class:`DataFrame`
1153
+ (still experimental). Behaviour is as follows:
1154
+
1155
+ * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
1156
+ (default).
1157
+ * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`
1158
+ DataFrame.
1159
+
1160
+ .. versionadded:: 2.0
1161
+
1162
+ {storage_options}
1163
+
1164
+ .. versionadded:: 2.1.0
1165
+
1166
+ Returns
1167
+ -------
1168
+ dfs
1169
+ A list of DataFrames.
1170
+
1171
+ See Also
1172
+ --------
1173
+ read_csv : Read a comma-separated values (csv) file into DataFrame.
1174
+
1175
+ Notes
1176
+ -----
1177
+ Before using this function you should read the :ref:`gotchas about the
1178
+ HTML parsing libraries <io.html.gotchas>`.
1179
+
1180
+ Expect to do some cleanup after you call this function. For example, you
1181
+ might need to manually assign column names if the column names are
1182
+ converted to NaN when you pass the `header=0` argument. We try to assume as
1183
+ little as possible about the structure of the table and push the
1184
+ idiosyncrasies of the HTML contained in the table to the user.
1185
+
1186
+ This function searches for ``<table>`` elements and only for ``<tr>``
1187
+ and ``<th>`` rows and ``<td>`` elements within each ``<tr>`` or ``<th>``
1188
+ element in the table. ``<td>`` stands for "table data". This function
1189
+ attempts to properly handle ``colspan`` and ``rowspan`` attributes.
1190
+ If the function has a ``<thead>`` argument, it is used to construct
1191
+ the header, otherwise the function attempts to find the header within
1192
+ the body (by putting rows with only ``<th>`` elements into the header).
1193
+
1194
+ Similar to :func:`~read_csv` the `header` argument is applied
1195
+ **after** `skiprows` is applied.
1196
+
1197
+ This function will *always* return a list of :class:`DataFrame` *or*
1198
+ it will fail, e.g., it will *not* return an empty list.
1199
+
1200
+ Examples
1201
+ --------
1202
+ See the :ref:`read_html documentation in the IO section of the docs
1203
+ <io.read_html>` for some examples of reading in HTML tables.
1204
+ """
1205
+ # Type check here. We don't want to parse only to fail because of an
1206
+ # invalid value of an integer skiprows.
1207
+ if isinstance(skiprows, numbers.Integral) and skiprows < 0:
1208
+ raise ValueError(
1209
+ "cannot skip rows starting from the end of the "
1210
+ "data (you passed a negative value)"
1211
+ )
1212
+ if extract_links not in [None, "header", "footer", "body", "all"]:
1213
+ raise ValueError(
1214
+ "`extract_links` must be one of "
1215
+ '{None, "header", "footer", "body", "all"}, got '
1216
+ f'"{extract_links}"'
1217
+ )
1218
+
1219
+ validate_header_arg(header)
1220
+ check_dtype_backend(dtype_backend)
1221
+
1222
+ io = stringify_path(io)
1223
+
1224
+ if isinstance(io, str) and not any(
1225
+ [
1226
+ is_file_like(io),
1227
+ file_exists(io),
1228
+ is_url(io),
1229
+ is_fsspec_url(io),
1230
+ ]
1231
+ ):
1232
+ warnings.warn(
1233
+ "Passing literal html to 'read_html' is deprecated and "
1234
+ "will be removed in a future version. To read from a "
1235
+ "literal string, wrap it in a 'StringIO' object.",
1236
+ FutureWarning,
1237
+ stacklevel=find_stack_level(),
1238
+ )
1239
+
1240
+ return _parse(
1241
+ flavor=flavor,
1242
+ io=io,
1243
+ match=match,
1244
+ header=header,
1245
+ index_col=index_col,
1246
+ skiprows=skiprows,
1247
+ parse_dates=parse_dates,
1248
+ thousands=thousands,
1249
+ attrs=attrs,
1250
+ encoding=encoding,
1251
+ decimal=decimal,
1252
+ converters=converters,
1253
+ na_values=na_values,
1254
+ keep_default_na=keep_default_na,
1255
+ displayed_only=displayed_only,
1256
+ extract_links=extract_links,
1257
+ dtype_backend=dtype_backend,
1258
+ storage_options=storage_options,
1259
+ )
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/io/pickle.py ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ pickle compat """
2
+ from __future__ import annotations
3
+
4
+ import pickle
5
+ from typing import (
6
+ TYPE_CHECKING,
7
+ Any,
8
+ )
9
+ import warnings
10
+
11
+ from pandas.compat import pickle_compat as pc
12
+ from pandas.util._decorators import doc
13
+
14
+ from pandas.core.shared_docs import _shared_docs
15
+
16
+ from pandas.io.common import get_handle
17
+
18
+ if TYPE_CHECKING:
19
+ from pandas._typing import (
20
+ CompressionOptions,
21
+ FilePath,
22
+ ReadPickleBuffer,
23
+ StorageOptions,
24
+ WriteBuffer,
25
+ )
26
+
27
+ from pandas import (
28
+ DataFrame,
29
+ Series,
30
+ )
31
+
32
+
33
+ @doc(
34
+ storage_options=_shared_docs["storage_options"],
35
+ compression_options=_shared_docs["compression_options"] % "filepath_or_buffer",
36
+ )
37
+ def to_pickle(
38
+ obj: Any,
39
+ filepath_or_buffer: FilePath | WriteBuffer[bytes],
40
+ compression: CompressionOptions = "infer",
41
+ protocol: int = pickle.HIGHEST_PROTOCOL,
42
+ storage_options: StorageOptions | None = None,
43
+ ) -> None:
44
+ """
45
+ Pickle (serialize) object to file.
46
+
47
+ Parameters
48
+ ----------
49
+ obj : any object
50
+ Any python object.
51
+ filepath_or_buffer : str, path object, or file-like object
52
+ String, path object (implementing ``os.PathLike[str]``), or file-like
53
+ object implementing a binary ``write()`` function.
54
+ Also accepts URL. URL has to be of S3 or GCS.
55
+ {compression_options}
56
+
57
+ .. versionchanged:: 1.4.0 Zstandard support.
58
+
59
+ protocol : int
60
+ Int which indicates which protocol should be used by the pickler,
61
+ default HIGHEST_PROTOCOL (see [1], paragraph 12.1.2). The possible
62
+ values for this parameter depend on the version of Python. For Python
63
+ 2.x, possible values are 0, 1, 2. For Python>=3.0, 3 is a valid value.
64
+ For Python >= 3.4, 4 is a valid value. A negative value for the
65
+ protocol parameter is equivalent to setting its value to
66
+ HIGHEST_PROTOCOL.
67
+
68
+ {storage_options}
69
+
70
+ .. [1] https://docs.python.org/3/library/pickle.html
71
+
72
+ See Also
73
+ --------
74
+ read_pickle : Load pickled pandas object (or any object) from file.
75
+ DataFrame.to_hdf : Write DataFrame to an HDF5 file.
76
+ DataFrame.to_sql : Write DataFrame to a SQL database.
77
+ DataFrame.to_parquet : Write a DataFrame to the binary parquet format.
78
+
79
+ Examples
80
+ --------
81
+ >>> original_df = pd.DataFrame({{"foo": range(5), "bar": range(5, 10)}}) # doctest: +SKIP
82
+ >>> original_df # doctest: +SKIP
83
+ foo bar
84
+ 0 0 5
85
+ 1 1 6
86
+ 2 2 7
87
+ 3 3 8
88
+ 4 4 9
89
+ >>> pd.to_pickle(original_df, "./dummy.pkl") # doctest: +SKIP
90
+
91
+ >>> unpickled_df = pd.read_pickle("./dummy.pkl") # doctest: +SKIP
92
+ >>> unpickled_df # doctest: +SKIP
93
+ foo bar
94
+ 0 0 5
95
+ 1 1 6
96
+ 2 2 7
97
+ 3 3 8
98
+ 4 4 9
99
+ """ # noqa: E501
100
+ if protocol < 0:
101
+ protocol = pickle.HIGHEST_PROTOCOL
102
+
103
+ with get_handle(
104
+ filepath_or_buffer,
105
+ "wb",
106
+ compression=compression,
107
+ is_text=False,
108
+ storage_options=storage_options,
109
+ ) as handles:
110
+ # letting pickle write directly to the buffer is more memory-efficient
111
+ pickle.dump(obj, handles.handle, protocol=protocol)
112
+
113
+
114
+ @doc(
115
+ storage_options=_shared_docs["storage_options"],
116
+ decompression_options=_shared_docs["decompression_options"] % "filepath_or_buffer",
117
+ )
118
+ def read_pickle(
119
+ filepath_or_buffer: FilePath | ReadPickleBuffer,
120
+ compression: CompressionOptions = "infer",
121
+ storage_options: StorageOptions | None = None,
122
+ ) -> DataFrame | Series:
123
+ """
124
+ Load pickled pandas object (or any object) from file.
125
+
126
+ .. warning::
127
+
128
+ Loading pickled data received from untrusted sources can be
129
+ unsafe. See `here <https://docs.python.org/3/library/pickle.html>`__.
130
+
131
+ Parameters
132
+ ----------
133
+ filepath_or_buffer : str, path object, or file-like object
134
+ String, path object (implementing ``os.PathLike[str]``), or file-like
135
+ object implementing a binary ``readlines()`` function.
136
+ Also accepts URL. URL is not limited to S3 and GCS.
137
+
138
+ {decompression_options}
139
+
140
+ .. versionchanged:: 1.4.0 Zstandard support.
141
+
142
+ {storage_options}
143
+
144
+ Returns
145
+ -------
146
+ same type as object stored in file
147
+
148
+ See Also
149
+ --------
150
+ DataFrame.to_pickle : Pickle (serialize) DataFrame object to file.
151
+ Series.to_pickle : Pickle (serialize) Series object to file.
152
+ read_hdf : Read HDF5 file into a DataFrame.
153
+ read_sql : Read SQL query or database table into a DataFrame.
154
+ read_parquet : Load a parquet object, returning a DataFrame.
155
+
156
+ Notes
157
+ -----
158
+ read_pickle is only guaranteed to be backwards compatible to pandas 0.20.3
159
+ provided the object was serialized with to_pickle.
160
+
161
+ Examples
162
+ --------
163
+ >>> original_df = pd.DataFrame(
164
+ ... {{"foo": range(5), "bar": range(5, 10)}}
165
+ ... ) # doctest: +SKIP
166
+ >>> original_df # doctest: +SKIP
167
+ foo bar
168
+ 0 0 5
169
+ 1 1 6
170
+ 2 2 7
171
+ 3 3 8
172
+ 4 4 9
173
+ >>> pd.to_pickle(original_df, "./dummy.pkl") # doctest: +SKIP
174
+
175
+ >>> unpickled_df = pd.read_pickle("./dummy.pkl") # doctest: +SKIP
176
+ >>> unpickled_df # doctest: +SKIP
177
+ foo bar
178
+ 0 0 5
179
+ 1 1 6
180
+ 2 2 7
181
+ 3 3 8
182
+ 4 4 9
183
+ """
184
+ excs_to_catch = (AttributeError, ImportError, ModuleNotFoundError, TypeError)
185
+ with get_handle(
186
+ filepath_or_buffer,
187
+ "rb",
188
+ compression=compression,
189
+ is_text=False,
190
+ storage_options=storage_options,
191
+ ) as handles:
192
+ # 1) try standard library Pickle
193
+ # 2) try pickle_compat (older pandas version) to handle subclass changes
194
+ # 3) try pickle_compat with latin-1 encoding upon a UnicodeDecodeError
195
+
196
+ try:
197
+ # TypeError for Cython complaints about object.__new__ vs Tick.__new__
198
+ try:
199
+ with warnings.catch_warnings(record=True):
200
+ # We want to silence any warnings about, e.g. moved modules.
201
+ warnings.simplefilter("ignore", Warning)
202
+ return pickle.load(handles.handle)
203
+ except excs_to_catch:
204
+ # e.g.
205
+ # "No module named 'pandas.core.sparse.series'"
206
+ # "Can't get attribute '__nat_unpickle' on <module 'pandas._libs.tslib"
207
+ return pc.load(handles.handle, encoding=None)
208
+ except UnicodeDecodeError:
209
+ # e.g. can occur for files written in py27; see GH#28645 and GH#31988
210
+ return pc.load(handles.handle, encoding="latin-1")
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/io/pytables.py ADDED
The diff for this file is too large to render. See raw diff
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/io/sql.py ADDED
The diff for this file is too large to render. See raw diff
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (218 Bytes). View file
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_aggregation.cpython-312.pyc ADDED
Binary file (4.43 kB). View file
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_common.cpython-312.pyc ADDED
Binary file (14.8 kB). View file
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_downstream.cpython-312.pyc ADDED
Binary file (16 kB). View file
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_errors.cpython-312.pyc ADDED
Binary file (4.26 kB). View file
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_expressions.cpython-312.pyc ADDED
Binary file (23.5 kB). View file
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_flags.cpython-312.pyc ADDED
Binary file (3.19 kB). View file
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_multilevel.cpython-312.pyc ADDED
Binary file (19.3 kB). View file
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_nanops.cpython-312.pyc ADDED
Binary file (66.6 kB). View file
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_optional_dependency.cpython-312.pyc ADDED
Binary file (4.64 kB). View file
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_register_accessor.cpython-312.pyc ADDED
Binary file (6.88 kB). View file
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_sorting.cpython-312.pyc ADDED
Binary file (27.7 kB). View file
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__pycache__/test_take.cpython-312.pyc ADDED
Binary file (19.9 kB). View file
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/arrays/__init__.py ADDED
File without changes
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/arrays/masked_shared.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Tests shared by MaskedArray subclasses.
3
+ """
4
+ import numpy as np
5
+ import pytest
6
+
7
+ import pandas as pd
8
+ import pandas._testing as tm
9
+ from pandas.tests.extension.base import BaseOpsUtil
10
+
11
+
12
+ class ComparisonOps(BaseOpsUtil):
13
+ def _compare_other(self, data, op, other):
14
+ # array
15
+ result = pd.Series(op(data, other))
16
+ expected = pd.Series(op(data._data, other), dtype="boolean")
17
+
18
+ # fill the nan locations
19
+ expected[data._mask] = pd.NA
20
+
21
+ tm.assert_series_equal(result, expected)
22
+
23
+ # series
24
+ ser = pd.Series(data)
25
+ result = op(ser, other)
26
+
27
+ # Set nullable dtype here to avoid upcasting when setting to pd.NA below
28
+ expected = op(pd.Series(data._data), other).astype("boolean")
29
+
30
+ # fill the nan locations
31
+ expected[data._mask] = pd.NA
32
+
33
+ tm.assert_series_equal(result, expected)
34
+
35
+ # subclass will override to parametrize 'other'
36
+ def test_scalar(self, other, comparison_op, dtype):
37
+ op = comparison_op
38
+ left = pd.array([1, 0, None], dtype=dtype)
39
+
40
+ result = op(left, other)
41
+
42
+ if other is pd.NA:
43
+ expected = pd.array([None, None, None], dtype="boolean")
44
+ else:
45
+ values = op(left._data, other)
46
+ expected = pd.arrays.BooleanArray(values, left._mask, copy=True)
47
+ tm.assert_extension_array_equal(result, expected)
48
+
49
+ # ensure we haven't mutated anything inplace
50
+ result[0] = pd.NA
51
+ tm.assert_extension_array_equal(left, pd.array([1, 0, None], dtype=dtype))
52
+
53
+
54
+ class NumericOps:
55
+ # Shared by IntegerArray and FloatingArray, not BooleanArray
56
+
57
+ def test_searchsorted_nan(self, dtype):
58
+ # The base class casts to object dtype, for which searchsorted returns
59
+ # 0 from the left and 10 from the right.
60
+ arr = pd.array(range(10), dtype=dtype)
61
+
62
+ assert arr.searchsorted(np.nan, side="left") == 10
63
+ assert arr.searchsorted(np.nan, side="right") == 10
64
+
65
+ def test_no_shared_mask(self, data):
66
+ result = data + 1
67
+ assert not tm.shares_memory(result, data)
68
+
69
+ def test_array(self, comparison_op, dtype):
70
+ op = comparison_op
71
+
72
+ left = pd.array([0, 1, 2, None, None, None], dtype=dtype)
73
+ right = pd.array([0, 1, None, 0, 1, None], dtype=dtype)
74
+
75
+ result = op(left, right)
76
+ values = op(left._data, right._data)
77
+ mask = left._mask | right._mask
78
+
79
+ expected = pd.arrays.BooleanArray(values, mask)
80
+ tm.assert_extension_array_equal(result, expected)
81
+
82
+ # ensure we haven't mutated anything inplace
83
+ result[0] = pd.NA
84
+ tm.assert_extension_array_equal(
85
+ left, pd.array([0, 1, 2, None, None, None], dtype=dtype)
86
+ )
87
+ tm.assert_extension_array_equal(
88
+ right, pd.array([0, 1, None, 0, 1, None], dtype=dtype)
89
+ )
90
+
91
+ def test_compare_with_booleanarray(self, comparison_op, dtype):
92
+ op = comparison_op
93
+
94
+ left = pd.array([True, False, None] * 3, dtype="boolean")
95
+ right = pd.array([0] * 3 + [1] * 3 + [None] * 3, dtype=dtype)
96
+ other = pd.array([False] * 3 + [True] * 3 + [None] * 3, dtype="boolean")
97
+
98
+ expected = op(left, other)
99
+ result = op(left, right)
100
+ tm.assert_extension_array_equal(result, expected)
101
+
102
+ # reversed op
103
+ expected = op(other, left)
104
+ result = op(right, left)
105
+ tm.assert_extension_array_equal(result, expected)
106
+
107
+ def test_compare_to_string(self, dtype):
108
+ # GH#28930
109
+ ser = pd.Series([1, None], dtype=dtype)
110
+ result = ser == "a"
111
+ expected = pd.Series([False, pd.NA], dtype="boolean")
112
+
113
+ tm.assert_series_equal(result, expected)
114
+
115
+ def test_ufunc_with_out(self, dtype):
116
+ arr = pd.array([1, 2, 3], dtype=dtype)
117
+ arr2 = pd.array([1, 2, pd.NA], dtype=dtype)
118
+
119
+ mask = arr == arr
120
+ mask2 = arr2 == arr2
121
+
122
+ result = np.zeros(3, dtype=bool)
123
+ result |= mask
124
+ # If MaskedArray.__array_ufunc__ handled "out" appropriately,
125
+ # `result` should still be an ndarray.
126
+ assert isinstance(result, np.ndarray)
127
+ assert result.all()
128
+
129
+ # result |= mask worked because mask could be cast losslessly to
130
+ # boolean ndarray. mask2 can't, so this raises
131
+ result = np.zeros(3, dtype=bool)
132
+ msg = "Specify an appropriate 'na_value' for this dtype"
133
+ with pytest.raises(ValueError, match=msg):
134
+ result |= mask2
135
+
136
+ # addition
137
+ res = np.add(arr, arr2)
138
+ expected = pd.array([2, 4, pd.NA], dtype=dtype)
139
+ tm.assert_extension_array_equal(res, expected)
140
+
141
+ # when passing out=arr, we will modify 'arr' inplace.
142
+ res = np.add(arr, arr2, out=arr)
143
+ assert res is arr
144
+ tm.assert_extension_array_equal(res, expected)
145
+ tm.assert_extension_array_equal(arr, expected)
146
+
147
+ def test_mul_td64_array(self, dtype):
148
+ # GH#45622
149
+ arr = pd.array([1, 2, pd.NA], dtype=dtype)
150
+ other = np.arange(3, dtype=np.int64).view("m8[ns]")
151
+
152
+ result = arr * other
153
+ expected = pd.array([pd.Timedelta(0), pd.Timedelta(2), pd.NaT])
154
+ tm.assert_extension_array_equal(result, expected)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/arrays/test_array.py ADDED
@@ -0,0 +1,478 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime
2
+ import decimal
3
+ import re
4
+
5
+ import numpy as np
6
+ import pytest
7
+ import pytz
8
+
9
+ import pandas as pd
10
+ import pandas._testing as tm
11
+ from pandas.api.extensions import register_extension_dtype
12
+ from pandas.arrays import (
13
+ BooleanArray,
14
+ DatetimeArray,
15
+ FloatingArray,
16
+ IntegerArray,
17
+ IntervalArray,
18
+ SparseArray,
19
+ TimedeltaArray,
20
+ )
21
+ from pandas.core.arrays import (
22
+ NumpyExtensionArray,
23
+ period_array,
24
+ )
25
+ from pandas.tests.extension.decimal import (
26
+ DecimalArray,
27
+ DecimalDtype,
28
+ to_decimal,
29
+ )
30
+
31
+
32
+ @pytest.mark.parametrize("dtype_unit", ["M8[h]", "M8[m]", "m8[h]", "M8[m]"])
33
+ def test_dt64_array(dtype_unit):
34
+ # PR 53817
35
+ dtype_var = np.dtype(dtype_unit)
36
+ msg = (
37
+ r"datetime64 and timedelta64 dtype resolutions other than "
38
+ r"'s', 'ms', 'us', and 'ns' are deprecated. "
39
+ r"In future releases passing unsupported resolutions will "
40
+ r"raise an exception."
41
+ )
42
+ with tm.assert_produces_warning(FutureWarning, match=re.escape(msg)):
43
+ pd.array([], dtype=dtype_var)
44
+
45
+
46
+ @pytest.mark.parametrize(
47
+ "data, dtype, expected",
48
+ [
49
+ # Basic NumPy defaults.
50
+ ([], None, FloatingArray._from_sequence([], dtype="Float64")),
51
+ ([1, 2], None, IntegerArray._from_sequence([1, 2], dtype="Int64")),
52
+ ([1, 2], object, NumpyExtensionArray(np.array([1, 2], dtype=object))),
53
+ (
54
+ [1, 2],
55
+ np.dtype("float32"),
56
+ NumpyExtensionArray(np.array([1.0, 2.0], dtype=np.dtype("float32"))),
57
+ ),
58
+ (
59
+ np.array([], dtype=object),
60
+ None,
61
+ NumpyExtensionArray(np.array([], dtype=object)),
62
+ ),
63
+ (
64
+ np.array([1, 2], dtype="int64"),
65
+ None,
66
+ IntegerArray._from_sequence([1, 2], dtype="Int64"),
67
+ ),
68
+ (
69
+ np.array([1.0, 2.0], dtype="float64"),
70
+ None,
71
+ FloatingArray._from_sequence([1.0, 2.0], dtype="Float64"),
72
+ ),
73
+ # String alias passes through to NumPy
74
+ ([1, 2], "float32", NumpyExtensionArray(np.array([1, 2], dtype="float32"))),
75
+ ([1, 2], "int64", NumpyExtensionArray(np.array([1, 2], dtype=np.int64))),
76
+ # GH#44715 FloatingArray does not support float16, so fall
77
+ # back to NumpyExtensionArray
78
+ (
79
+ np.array([1, 2], dtype=np.float16),
80
+ None,
81
+ NumpyExtensionArray(np.array([1, 2], dtype=np.float16)),
82
+ ),
83
+ # idempotency with e.g. pd.array(pd.array([1, 2], dtype="int64"))
84
+ (
85
+ NumpyExtensionArray(np.array([1, 2], dtype=np.int32)),
86
+ None,
87
+ NumpyExtensionArray(np.array([1, 2], dtype=np.int32)),
88
+ ),
89
+ # Period alias
90
+ (
91
+ [pd.Period("2000", "D"), pd.Period("2001", "D")],
92
+ "Period[D]",
93
+ period_array(["2000", "2001"], freq="D"),
94
+ ),
95
+ # Period dtype
96
+ (
97
+ [pd.Period("2000", "D")],
98
+ pd.PeriodDtype("D"),
99
+ period_array(["2000"], freq="D"),
100
+ ),
101
+ # Datetime (naive)
102
+ (
103
+ [1, 2],
104
+ np.dtype("datetime64[ns]"),
105
+ DatetimeArray._from_sequence(
106
+ np.array([1, 2], dtype="M8[ns]"), dtype="M8[ns]"
107
+ ),
108
+ ),
109
+ (
110
+ [1, 2],
111
+ np.dtype("datetime64[s]"),
112
+ DatetimeArray._from_sequence(
113
+ np.array([1, 2], dtype="M8[s]"), dtype="M8[s]"
114
+ ),
115
+ ),
116
+ (
117
+ np.array([1, 2], dtype="datetime64[ns]"),
118
+ None,
119
+ DatetimeArray._from_sequence(
120
+ np.array([1, 2], dtype="M8[ns]"), dtype="M8[ns]"
121
+ ),
122
+ ),
123
+ (
124
+ pd.DatetimeIndex(["2000", "2001"]),
125
+ np.dtype("datetime64[ns]"),
126
+ DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[ns]"),
127
+ ),
128
+ (
129
+ pd.DatetimeIndex(["2000", "2001"]),
130
+ None,
131
+ DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[ns]"),
132
+ ),
133
+ (
134
+ ["2000", "2001"],
135
+ np.dtype("datetime64[ns]"),
136
+ DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[ns]"),
137
+ ),
138
+ # Datetime (tz-aware)
139
+ (
140
+ ["2000", "2001"],
141
+ pd.DatetimeTZDtype(tz="CET"),
142
+ DatetimeArray._from_sequence(
143
+ ["2000", "2001"], dtype=pd.DatetimeTZDtype(tz="CET")
144
+ ),
145
+ ),
146
+ # Timedelta
147
+ (
148
+ ["1h", "2h"],
149
+ np.dtype("timedelta64[ns]"),
150
+ TimedeltaArray._from_sequence(["1h", "2h"], dtype="m8[ns]"),
151
+ ),
152
+ (
153
+ pd.TimedeltaIndex(["1h", "2h"]),
154
+ np.dtype("timedelta64[ns]"),
155
+ TimedeltaArray._from_sequence(["1h", "2h"], dtype="m8[ns]"),
156
+ ),
157
+ (
158
+ np.array([1, 2], dtype="m8[s]"),
159
+ np.dtype("timedelta64[s]"),
160
+ TimedeltaArray._from_sequence(
161
+ np.array([1, 2], dtype="m8[s]"), dtype="m8[s]"
162
+ ),
163
+ ),
164
+ (
165
+ pd.TimedeltaIndex(["1h", "2h"]),
166
+ None,
167
+ TimedeltaArray._from_sequence(["1h", "2h"], dtype="m8[ns]"),
168
+ ),
169
+ (
170
+ # preserve non-nano, i.e. don't cast to NumpyExtensionArray
171
+ TimedeltaArray._simple_new(
172
+ np.arange(5, dtype=np.int64).view("m8[s]"), dtype=np.dtype("m8[s]")
173
+ ),
174
+ None,
175
+ TimedeltaArray._simple_new(
176
+ np.arange(5, dtype=np.int64).view("m8[s]"), dtype=np.dtype("m8[s]")
177
+ ),
178
+ ),
179
+ (
180
+ # preserve non-nano, i.e. don't cast to NumpyExtensionArray
181
+ TimedeltaArray._simple_new(
182
+ np.arange(5, dtype=np.int64).view("m8[s]"), dtype=np.dtype("m8[s]")
183
+ ),
184
+ np.dtype("m8[s]"),
185
+ TimedeltaArray._simple_new(
186
+ np.arange(5, dtype=np.int64).view("m8[s]"), dtype=np.dtype("m8[s]")
187
+ ),
188
+ ),
189
+ # Category
190
+ (["a", "b"], "category", pd.Categorical(["a", "b"])),
191
+ (
192
+ ["a", "b"],
193
+ pd.CategoricalDtype(None, ordered=True),
194
+ pd.Categorical(["a", "b"], ordered=True),
195
+ ),
196
+ # Interval
197
+ (
198
+ [pd.Interval(1, 2), pd.Interval(3, 4)],
199
+ "interval",
200
+ IntervalArray.from_tuples([(1, 2), (3, 4)]),
201
+ ),
202
+ # Sparse
203
+ ([0, 1], "Sparse[int64]", SparseArray([0, 1], dtype="int64")),
204
+ # IntegerNA
205
+ ([1, None], "Int16", pd.array([1, None], dtype="Int16")),
206
+ (
207
+ pd.Series([1, 2]),
208
+ None,
209
+ NumpyExtensionArray(np.array([1, 2], dtype=np.int64)),
210
+ ),
211
+ # String
212
+ (
213
+ ["a", None],
214
+ "string",
215
+ pd.StringDtype()
216
+ .construct_array_type()
217
+ ._from_sequence(["a", None], dtype=pd.StringDtype()),
218
+ ),
219
+ (
220
+ ["a", None],
221
+ pd.StringDtype(),
222
+ pd.StringDtype()
223
+ .construct_array_type()
224
+ ._from_sequence(["a", None], dtype=pd.StringDtype()),
225
+ ),
226
+ # Boolean
227
+ (
228
+ [True, None],
229
+ "boolean",
230
+ BooleanArray._from_sequence([True, None], dtype="boolean"),
231
+ ),
232
+ (
233
+ [True, None],
234
+ pd.BooleanDtype(),
235
+ BooleanArray._from_sequence([True, None], dtype="boolean"),
236
+ ),
237
+ # Index
238
+ (pd.Index([1, 2]), None, NumpyExtensionArray(np.array([1, 2], dtype=np.int64))),
239
+ # Series[EA] returns the EA
240
+ (
241
+ pd.Series(pd.Categorical(["a", "b"], categories=["a", "b", "c"])),
242
+ None,
243
+ pd.Categorical(["a", "b"], categories=["a", "b", "c"]),
244
+ ),
245
+ # "3rd party" EAs work
246
+ ([decimal.Decimal(0), decimal.Decimal(1)], "decimal", to_decimal([0, 1])),
247
+ # pass an ExtensionArray, but a different dtype
248
+ (
249
+ period_array(["2000", "2001"], freq="D"),
250
+ "category",
251
+ pd.Categorical([pd.Period("2000", "D"), pd.Period("2001", "D")]),
252
+ ),
253
+ ],
254
+ )
255
+ def test_array(data, dtype, expected):
256
+ result = pd.array(data, dtype=dtype)
257
+ tm.assert_equal(result, expected)
258
+
259
+
260
+ def test_array_copy():
261
+ a = np.array([1, 2])
262
+ # default is to copy
263
+ b = pd.array(a, dtype=a.dtype)
264
+ assert not tm.shares_memory(a, b)
265
+
266
+ # copy=True
267
+ b = pd.array(a, dtype=a.dtype, copy=True)
268
+ assert not tm.shares_memory(a, b)
269
+
270
+ # copy=False
271
+ b = pd.array(a, dtype=a.dtype, copy=False)
272
+ assert tm.shares_memory(a, b)
273
+
274
+
275
+ cet = pytz.timezone("CET")
276
+
277
+
278
+ @pytest.mark.parametrize(
279
+ "data, expected",
280
+ [
281
+ # period
282
+ (
283
+ [pd.Period("2000", "D"), pd.Period("2001", "D")],
284
+ period_array(["2000", "2001"], freq="D"),
285
+ ),
286
+ # interval
287
+ ([pd.Interval(0, 1), pd.Interval(1, 2)], IntervalArray.from_breaks([0, 1, 2])),
288
+ # datetime
289
+ (
290
+ [pd.Timestamp("2000"), pd.Timestamp("2001")],
291
+ DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[ns]"),
292
+ ),
293
+ (
294
+ [datetime.datetime(2000, 1, 1), datetime.datetime(2001, 1, 1)],
295
+ DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[ns]"),
296
+ ),
297
+ (
298
+ np.array([1, 2], dtype="M8[ns]"),
299
+ DatetimeArray._from_sequence(np.array([1, 2], dtype="M8[ns]")),
300
+ ),
301
+ (
302
+ np.array([1, 2], dtype="M8[us]"),
303
+ DatetimeArray._simple_new(
304
+ np.array([1, 2], dtype="M8[us]"), dtype=np.dtype("M8[us]")
305
+ ),
306
+ ),
307
+ # datetimetz
308
+ (
309
+ [pd.Timestamp("2000", tz="CET"), pd.Timestamp("2001", tz="CET")],
310
+ DatetimeArray._from_sequence(
311
+ ["2000", "2001"], dtype=pd.DatetimeTZDtype(tz="CET", unit="ns")
312
+ ),
313
+ ),
314
+ (
315
+ [
316
+ datetime.datetime(2000, 1, 1, tzinfo=cet),
317
+ datetime.datetime(2001, 1, 1, tzinfo=cet),
318
+ ],
319
+ DatetimeArray._from_sequence(
320
+ ["2000", "2001"], dtype=pd.DatetimeTZDtype(tz=cet, unit="ns")
321
+ ),
322
+ ),
323
+ # timedelta
324
+ (
325
+ [pd.Timedelta("1h"), pd.Timedelta("2h")],
326
+ TimedeltaArray._from_sequence(["1h", "2h"], dtype="m8[ns]"),
327
+ ),
328
+ (
329
+ np.array([1, 2], dtype="m8[ns]"),
330
+ TimedeltaArray._from_sequence(np.array([1, 2], dtype="m8[ns]")),
331
+ ),
332
+ (
333
+ np.array([1, 2], dtype="m8[us]"),
334
+ TimedeltaArray._from_sequence(np.array([1, 2], dtype="m8[us]")),
335
+ ),
336
+ # integer
337
+ ([1, 2], IntegerArray._from_sequence([1, 2], dtype="Int64")),
338
+ ([1, None], IntegerArray._from_sequence([1, None], dtype="Int64")),
339
+ ([1, pd.NA], IntegerArray._from_sequence([1, pd.NA], dtype="Int64")),
340
+ ([1, np.nan], IntegerArray._from_sequence([1, np.nan], dtype="Int64")),
341
+ # float
342
+ ([0.1, 0.2], FloatingArray._from_sequence([0.1, 0.2], dtype="Float64")),
343
+ ([0.1, None], FloatingArray._from_sequence([0.1, pd.NA], dtype="Float64")),
344
+ ([0.1, np.nan], FloatingArray._from_sequence([0.1, pd.NA], dtype="Float64")),
345
+ ([0.1, pd.NA], FloatingArray._from_sequence([0.1, pd.NA], dtype="Float64")),
346
+ # integer-like float
347
+ ([1.0, 2.0], FloatingArray._from_sequence([1.0, 2.0], dtype="Float64")),
348
+ ([1.0, None], FloatingArray._from_sequence([1.0, pd.NA], dtype="Float64")),
349
+ ([1.0, np.nan], FloatingArray._from_sequence([1.0, pd.NA], dtype="Float64")),
350
+ ([1.0, pd.NA], FloatingArray._from_sequence([1.0, pd.NA], dtype="Float64")),
351
+ # mixed-integer-float
352
+ ([1, 2.0], FloatingArray._from_sequence([1.0, 2.0], dtype="Float64")),
353
+ (
354
+ [1, np.nan, 2.0],
355
+ FloatingArray._from_sequence([1.0, None, 2.0], dtype="Float64"),
356
+ ),
357
+ # string
358
+ (
359
+ ["a", "b"],
360
+ pd.StringDtype()
361
+ .construct_array_type()
362
+ ._from_sequence(["a", "b"], dtype=pd.StringDtype()),
363
+ ),
364
+ (
365
+ ["a", None],
366
+ pd.StringDtype()
367
+ .construct_array_type()
368
+ ._from_sequence(["a", None], dtype=pd.StringDtype()),
369
+ ),
370
+ # Boolean
371
+ ([True, False], BooleanArray._from_sequence([True, False], dtype="boolean")),
372
+ ([True, None], BooleanArray._from_sequence([True, None], dtype="boolean")),
373
+ ],
374
+ )
375
+ def test_array_inference(data, expected):
376
+ result = pd.array(data)
377
+ tm.assert_equal(result, expected)
378
+
379
+
380
+ @pytest.mark.parametrize(
381
+ "data",
382
+ [
383
+ # mix of frequencies
384
+ [pd.Period("2000", "D"), pd.Period("2001", "Y")],
385
+ # mix of closed
386
+ [pd.Interval(0, 1, closed="left"), pd.Interval(1, 2, closed="right")],
387
+ # Mix of timezones
388
+ [pd.Timestamp("2000", tz="CET"), pd.Timestamp("2000", tz="UTC")],
389
+ # Mix of tz-aware and tz-naive
390
+ [pd.Timestamp("2000", tz="CET"), pd.Timestamp("2000")],
391
+ np.array([pd.Timestamp("2000"), pd.Timestamp("2000", tz="CET")]),
392
+ ],
393
+ )
394
+ def test_array_inference_fails(data):
395
+ result = pd.array(data)
396
+ expected = NumpyExtensionArray(np.array(data, dtype=object))
397
+ tm.assert_extension_array_equal(result, expected)
398
+
399
+
400
+ @pytest.mark.parametrize("data", [np.array(0)])
401
+ def test_nd_raises(data):
402
+ with pytest.raises(ValueError, match="NumpyExtensionArray must be 1-dimensional"):
403
+ pd.array(data, dtype="int64")
404
+
405
+
406
+ def test_scalar_raises():
407
+ with pytest.raises(ValueError, match="Cannot pass scalar '1'"):
408
+ pd.array(1)
409
+
410
+
411
+ def test_dataframe_raises():
412
+ # GH#51167 don't accidentally cast to StringArray by doing inference on columns
413
+ df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"])
414
+ msg = "Cannot pass DataFrame to 'pandas.array'"
415
+ with pytest.raises(TypeError, match=msg):
416
+ pd.array(df)
417
+
418
+
419
+ def test_bounds_check():
420
+ # GH21796
421
+ with pytest.raises(
422
+ TypeError, match=r"cannot safely cast non-equivalent int(32|64) to uint16"
423
+ ):
424
+ pd.array([-1, 2, 3], dtype="UInt16")
425
+
426
+
427
+ # ---------------------------------------------------------------------------
428
+ # A couple dummy classes to ensure that Series and Indexes are unboxed before
429
+ # getting to the EA classes.
430
+
431
+
432
+ @register_extension_dtype
433
+ class DecimalDtype2(DecimalDtype):
434
+ name = "decimal2"
435
+
436
+ @classmethod
437
+ def construct_array_type(cls):
438
+ """
439
+ Return the array type associated with this dtype.
440
+
441
+ Returns
442
+ -------
443
+ type
444
+ """
445
+ return DecimalArray2
446
+
447
+
448
+ class DecimalArray2(DecimalArray):
449
+ @classmethod
450
+ def _from_sequence(cls, scalars, *, dtype=None, copy=False):
451
+ if isinstance(scalars, (pd.Series, pd.Index)):
452
+ raise TypeError("scalars should not be of type pd.Series or pd.Index")
453
+
454
+ return super()._from_sequence(scalars, dtype=dtype, copy=copy)
455
+
456
+
457
+ def test_array_unboxes(index_or_series):
458
+ box = index_or_series
459
+
460
+ data = box([decimal.Decimal("1"), decimal.Decimal("2")])
461
+ dtype = DecimalDtype2()
462
+ # make sure it works
463
+ with pytest.raises(
464
+ TypeError, match="scalars should not be of type pd.Series or pd.Index"
465
+ ):
466
+ DecimalArray2._from_sequence(data, dtype=dtype)
467
+
468
+ result = pd.array(data, dtype="decimal2")
469
+ expected = DecimalArray2._from_sequence(data.values, dtype=dtype)
470
+ tm.assert_equal(result, expected)
471
+
472
+
473
+ def test_array_to_numpy_na():
474
+ # GH#40638
475
+ arr = pd.array([pd.NA, 1], dtype="string[python]")
476
+ result = arr.to_numpy(na_value=True, dtype=bool)
477
+ expected = np.array([True, True])
478
+ tm.assert_numpy_array_equal(result, expected)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/arrays/test_datetimelike.py ADDED
@@ -0,0 +1,1344 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ import warnings
5
+
6
+ import numpy as np
7
+ import pytest
8
+
9
+ from pandas._libs import (
10
+ NaT,
11
+ OutOfBoundsDatetime,
12
+ Timestamp,
13
+ )
14
+ from pandas._libs.tslibs.dtypes import freq_to_period_freqstr
15
+ from pandas.compat.numpy import np_version_gt2
16
+
17
+ import pandas as pd
18
+ from pandas import (
19
+ DatetimeIndex,
20
+ Period,
21
+ PeriodIndex,
22
+ TimedeltaIndex,
23
+ )
24
+ import pandas._testing as tm
25
+ from pandas.core.arrays import (
26
+ DatetimeArray,
27
+ NumpyExtensionArray,
28
+ PeriodArray,
29
+ TimedeltaArray,
30
+ )
31
+
32
+
33
+ # TODO: more freq variants
34
+ @pytest.fixture(params=["D", "B", "W", "ME", "QE", "YE"])
35
+ def freqstr(request):
36
+ """Fixture returning parametrized frequency in string format."""
37
+ return request.param
38
+
39
+
40
+ @pytest.fixture
41
+ def period_index(freqstr):
42
+ """
43
+ A fixture to provide PeriodIndex objects with different frequencies.
44
+
45
+ Most PeriodArray behavior is already tested in PeriodIndex tests,
46
+ so here we just test that the PeriodArray behavior matches
47
+ the PeriodIndex behavior.
48
+ """
49
+ # TODO: non-monotone indexes; NaTs, different start dates
50
+ with warnings.catch_warnings():
51
+ # suppress deprecation of Period[B]
52
+ warnings.filterwarnings(
53
+ "ignore", message="Period with BDay freq", category=FutureWarning
54
+ )
55
+ freqstr = freq_to_period_freqstr(1, freqstr)
56
+ pi = pd.period_range(start=Timestamp("2000-01-01"), periods=100, freq=freqstr)
57
+ return pi
58
+
59
+
60
+ @pytest.fixture
61
+ def datetime_index(freqstr):
62
+ """
63
+ A fixture to provide DatetimeIndex objects with different frequencies.
64
+
65
+ Most DatetimeArray behavior is already tested in DatetimeIndex tests,
66
+ so here we just test that the DatetimeArray behavior matches
67
+ the DatetimeIndex behavior.
68
+ """
69
+ # TODO: non-monotone indexes; NaTs, different start dates, timezones
70
+ dti = pd.date_range(start=Timestamp("2000-01-01"), periods=100, freq=freqstr)
71
+ return dti
72
+
73
+
74
+ @pytest.fixture
75
+ def timedelta_index():
76
+ """
77
+ A fixture to provide TimedeltaIndex objects with different frequencies.
78
+ Most TimedeltaArray behavior is already tested in TimedeltaIndex tests,
79
+ so here we just test that the TimedeltaArray behavior matches
80
+ the TimedeltaIndex behavior.
81
+ """
82
+ # TODO: flesh this out
83
+ return TimedeltaIndex(["1 Day", "3 Hours", "NaT"])
84
+
85
+
86
+ class SharedTests:
87
+ index_cls: type[DatetimeIndex | PeriodIndex | TimedeltaIndex]
88
+
89
+ @pytest.fixture
90
+ def arr1d(self):
91
+ """Fixture returning DatetimeArray with daily frequency."""
92
+ data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
93
+ if self.array_cls is PeriodArray:
94
+ arr = self.array_cls(data, freq="D")
95
+ else:
96
+ arr = self.index_cls(data, freq="D")._data
97
+ return arr
98
+
99
+ def test_compare_len1_raises(self, arr1d):
100
+ # make sure we raise when comparing with different lengths, specific
101
+ # to the case where one has length-1, which numpy would broadcast
102
+ arr = arr1d
103
+ idx = self.index_cls(arr)
104
+
105
+ with pytest.raises(ValueError, match="Lengths must match"):
106
+ arr == arr[:1]
107
+
108
+ # test the index classes while we're at it, GH#23078
109
+ with pytest.raises(ValueError, match="Lengths must match"):
110
+ idx <= idx[[0]]
111
+
112
+ @pytest.mark.parametrize(
113
+ "result",
114
+ [
115
+ pd.date_range("2020", periods=3),
116
+ pd.date_range("2020", periods=3, tz="UTC"),
117
+ pd.timedelta_range("0 days", periods=3),
118
+ pd.period_range("2020Q1", periods=3, freq="Q"),
119
+ ],
120
+ )
121
+ def test_compare_with_Categorical(self, result):
122
+ expected = pd.Categorical(result)
123
+ assert all(result == expected)
124
+ assert not any(result != expected)
125
+
126
+ @pytest.mark.parametrize("reverse", [True, False])
127
+ @pytest.mark.parametrize("as_index", [True, False])
128
+ def test_compare_categorical_dtype(self, arr1d, as_index, reverse, ordered):
129
+ other = pd.Categorical(arr1d, ordered=ordered)
130
+ if as_index:
131
+ other = pd.CategoricalIndex(other)
132
+
133
+ left, right = arr1d, other
134
+ if reverse:
135
+ left, right = right, left
136
+
137
+ ones = np.ones(arr1d.shape, dtype=bool)
138
+ zeros = ~ones
139
+
140
+ result = left == right
141
+ tm.assert_numpy_array_equal(result, ones)
142
+
143
+ result = left != right
144
+ tm.assert_numpy_array_equal(result, zeros)
145
+
146
+ if not reverse and not as_index:
147
+ # Otherwise Categorical raises TypeError bc it is not ordered
148
+ # TODO: we should probably get the same behavior regardless?
149
+ result = left < right
150
+ tm.assert_numpy_array_equal(result, zeros)
151
+
152
+ result = left <= right
153
+ tm.assert_numpy_array_equal(result, ones)
154
+
155
+ result = left > right
156
+ tm.assert_numpy_array_equal(result, zeros)
157
+
158
+ result = left >= right
159
+ tm.assert_numpy_array_equal(result, ones)
160
+
161
+ def test_take(self):
162
+ data = np.arange(100, dtype="i8") * 24 * 3600 * 10**9
163
+ np.random.default_rng(2).shuffle(data)
164
+
165
+ if self.array_cls is PeriodArray:
166
+ arr = PeriodArray(data, dtype="period[D]")
167
+ else:
168
+ arr = self.index_cls(data)._data
169
+ idx = self.index_cls._simple_new(arr)
170
+
171
+ takers = [1, 4, 94]
172
+ result = arr.take(takers)
173
+ expected = idx.take(takers)
174
+
175
+ tm.assert_index_equal(self.index_cls(result), expected)
176
+
177
+ takers = np.array([1, 4, 94])
178
+ result = arr.take(takers)
179
+ expected = idx.take(takers)
180
+
181
+ tm.assert_index_equal(self.index_cls(result), expected)
182
+
183
+ @pytest.mark.parametrize("fill_value", [2, 2.0, Timestamp(2021, 1, 1, 12).time])
184
+ def test_take_fill_raises(self, fill_value, arr1d):
185
+ msg = f"value should be a '{arr1d._scalar_type.__name__}' or 'NaT'. Got"
186
+ with pytest.raises(TypeError, match=msg):
187
+ arr1d.take([0, 1], allow_fill=True, fill_value=fill_value)
188
+
189
+ def test_take_fill(self, arr1d):
190
+ arr = arr1d
191
+
192
+ result = arr.take([-1, 1], allow_fill=True, fill_value=None)
193
+ assert result[0] is NaT
194
+
195
+ result = arr.take([-1, 1], allow_fill=True, fill_value=np.nan)
196
+ assert result[0] is NaT
197
+
198
+ result = arr.take([-1, 1], allow_fill=True, fill_value=NaT)
199
+ assert result[0] is NaT
200
+
201
+ @pytest.mark.filterwarnings(
202
+ "ignore:Period with BDay freq is deprecated:FutureWarning"
203
+ )
204
+ def test_take_fill_str(self, arr1d):
205
+ # Cast str fill_value matching other fill_value-taking methods
206
+ result = arr1d.take([-1, 1], allow_fill=True, fill_value=str(arr1d[-1]))
207
+ expected = arr1d[[-1, 1]]
208
+ tm.assert_equal(result, expected)
209
+
210
+ msg = f"value should be a '{arr1d._scalar_type.__name__}' or 'NaT'. Got"
211
+ with pytest.raises(TypeError, match=msg):
212
+ arr1d.take([-1, 1], allow_fill=True, fill_value="foo")
213
+
214
+ def test_concat_same_type(self, arr1d):
215
+ arr = arr1d
216
+ idx = self.index_cls(arr)
217
+ idx = idx.insert(0, NaT)
218
+ arr = arr1d
219
+
220
+ result = arr._concat_same_type([arr[:-1], arr[1:], arr])
221
+ arr2 = arr.astype(object)
222
+ expected = self.index_cls(np.concatenate([arr2[:-1], arr2[1:], arr2]))
223
+
224
+ tm.assert_index_equal(self.index_cls(result), expected)
225
+
226
+ def test_unbox_scalar(self, arr1d):
227
+ result = arr1d._unbox_scalar(arr1d[0])
228
+ expected = arr1d._ndarray.dtype.type
229
+ assert isinstance(result, expected)
230
+
231
+ result = arr1d._unbox_scalar(NaT)
232
+ assert isinstance(result, expected)
233
+
234
+ msg = f"'value' should be a {self.scalar_type.__name__}."
235
+ with pytest.raises(ValueError, match=msg):
236
+ arr1d._unbox_scalar("foo")
237
+
238
+ def test_check_compatible_with(self, arr1d):
239
+ arr1d._check_compatible_with(arr1d[0])
240
+ arr1d._check_compatible_with(arr1d[:1])
241
+ arr1d._check_compatible_with(NaT)
242
+
243
+ def test_scalar_from_string(self, arr1d):
244
+ result = arr1d._scalar_from_string(str(arr1d[0]))
245
+ assert result == arr1d[0]
246
+
247
+ def test_reduce_invalid(self, arr1d):
248
+ msg = "does not support reduction 'not a method'"
249
+ with pytest.raises(TypeError, match=msg):
250
+ arr1d._reduce("not a method")
251
+
252
+ @pytest.mark.parametrize("method", ["pad", "backfill"])
253
+ def test_fillna_method_doesnt_change_orig(self, method):
254
+ data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
255
+ if self.array_cls is PeriodArray:
256
+ arr = self.array_cls(data, dtype="period[D]")
257
+ else:
258
+ arr = self.array_cls._from_sequence(data)
259
+ arr[4] = NaT
260
+
261
+ fill_value = arr[3] if method == "pad" else arr[5]
262
+
263
+ result = arr._pad_or_backfill(method=method)
264
+ assert result[4] == fill_value
265
+
266
+ # check that the original was not changed
267
+ assert arr[4] is NaT
268
+
269
+ def test_searchsorted(self):
270
+ data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
271
+ if self.array_cls is PeriodArray:
272
+ arr = self.array_cls(data, dtype="period[D]")
273
+ else:
274
+ arr = self.array_cls._from_sequence(data)
275
+
276
+ # scalar
277
+ result = arr.searchsorted(arr[1])
278
+ assert result == 1
279
+
280
+ result = arr.searchsorted(arr[2], side="right")
281
+ assert result == 3
282
+
283
+ # own-type
284
+ result = arr.searchsorted(arr[1:3])
285
+ expected = np.array([1, 2], dtype=np.intp)
286
+ tm.assert_numpy_array_equal(result, expected)
287
+
288
+ result = arr.searchsorted(arr[1:3], side="right")
289
+ expected = np.array([2, 3], dtype=np.intp)
290
+ tm.assert_numpy_array_equal(result, expected)
291
+
292
+ # GH#29884 match numpy convention on whether NaT goes
293
+ # at the end or the beginning
294
+ result = arr.searchsorted(NaT)
295
+ assert result == 10
296
+
297
+ @pytest.mark.parametrize("box", [None, "index", "series"])
298
+ def test_searchsorted_castable_strings(self, arr1d, box, string_storage):
299
+ arr = arr1d
300
+ if box is None:
301
+ pass
302
+ elif box == "index":
303
+ # Test the equivalent Index.searchsorted method while we're here
304
+ arr = self.index_cls(arr)
305
+ else:
306
+ # Test the equivalent Series.searchsorted method while we're here
307
+ arr = pd.Series(arr)
308
+
309
+ # scalar
310
+ result = arr.searchsorted(str(arr[1]))
311
+ assert result == 1
312
+
313
+ result = arr.searchsorted(str(arr[2]), side="right")
314
+ assert result == 3
315
+
316
+ result = arr.searchsorted([str(x) for x in arr[1:3]])
317
+ expected = np.array([1, 2], dtype=np.intp)
318
+ tm.assert_numpy_array_equal(result, expected)
319
+
320
+ with pytest.raises(
321
+ TypeError,
322
+ match=re.escape(
323
+ f"value should be a '{arr1d._scalar_type.__name__}', 'NaT', "
324
+ "or array of those. Got 'str' instead."
325
+ ),
326
+ ):
327
+ arr.searchsorted("foo")
328
+
329
+ with pd.option_context("string_storage", string_storage):
330
+ with pytest.raises(
331
+ TypeError,
332
+ match=re.escape(
333
+ f"value should be a '{arr1d._scalar_type.__name__}', 'NaT', "
334
+ "or array of those. Got string array instead."
335
+ ),
336
+ ):
337
+ arr.searchsorted([str(arr[1]), "baz"])
338
+
339
+ def test_getitem_near_implementation_bounds(self):
340
+ # We only check tz-naive for DTA bc the bounds are slightly different
341
+ # for other tzs
342
+ i8vals = np.asarray([NaT._value + n for n in range(1, 5)], dtype="i8")
343
+ if self.array_cls is PeriodArray:
344
+ arr = self.array_cls(i8vals, dtype="period[ns]")
345
+ else:
346
+ arr = self.index_cls(i8vals, freq="ns")._data
347
+ arr[0] # should not raise OutOfBoundsDatetime
348
+
349
+ index = pd.Index(arr)
350
+ index[0] # should not raise OutOfBoundsDatetime
351
+
352
+ ser = pd.Series(arr)
353
+ ser[0] # should not raise OutOfBoundsDatetime
354
+
355
+ def test_getitem_2d(self, arr1d):
356
+ # 2d slicing on a 1D array
357
+ expected = type(arr1d)._simple_new(
358
+ arr1d._ndarray[:, np.newaxis], dtype=arr1d.dtype
359
+ )
360
+ result = arr1d[:, np.newaxis]
361
+ tm.assert_equal(result, expected)
362
+
363
+ # Lookup on a 2D array
364
+ arr2d = expected
365
+ expected = type(arr2d)._simple_new(arr2d._ndarray[:3, 0], dtype=arr2d.dtype)
366
+ result = arr2d[:3, 0]
367
+ tm.assert_equal(result, expected)
368
+
369
+ # Scalar lookup
370
+ result = arr2d[-1, 0]
371
+ expected = arr1d[-1]
372
+ assert result == expected
373
+
374
+ def test_iter_2d(self, arr1d):
375
+ data2d = arr1d._ndarray[:3, np.newaxis]
376
+ arr2d = type(arr1d)._simple_new(data2d, dtype=arr1d.dtype)
377
+ result = list(arr2d)
378
+ assert len(result) == 3
379
+ for x in result:
380
+ assert isinstance(x, type(arr1d))
381
+ assert x.ndim == 1
382
+ assert x.dtype == arr1d.dtype
383
+
384
+ def test_repr_2d(self, arr1d):
385
+ data2d = arr1d._ndarray[:3, np.newaxis]
386
+ arr2d = type(arr1d)._simple_new(data2d, dtype=arr1d.dtype)
387
+
388
+ result = repr(arr2d)
389
+
390
+ if isinstance(arr2d, TimedeltaArray):
391
+ expected = (
392
+ f"<{type(arr2d).__name__}>\n"
393
+ "[\n"
394
+ f"['{arr1d[0]._repr_base()}'],\n"
395
+ f"['{arr1d[1]._repr_base()}'],\n"
396
+ f"['{arr1d[2]._repr_base()}']\n"
397
+ "]\n"
398
+ f"Shape: (3, 1), dtype: {arr1d.dtype}"
399
+ )
400
+ else:
401
+ expected = (
402
+ f"<{type(arr2d).__name__}>\n"
403
+ "[\n"
404
+ f"['{arr1d[0]}'],\n"
405
+ f"['{arr1d[1]}'],\n"
406
+ f"['{arr1d[2]}']\n"
407
+ "]\n"
408
+ f"Shape: (3, 1), dtype: {arr1d.dtype}"
409
+ )
410
+
411
+ assert result == expected
412
+
413
+ def test_setitem(self):
414
+ data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
415
+ if self.array_cls is PeriodArray:
416
+ arr = self.array_cls(data, dtype="period[D]")
417
+ else:
418
+ arr = self.index_cls(data, freq="D")._data
419
+
420
+ arr[0] = arr[1]
421
+ expected = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
422
+ expected[0] = expected[1]
423
+
424
+ tm.assert_numpy_array_equal(arr.asi8, expected)
425
+
426
+ arr[:2] = arr[-2:]
427
+ expected[:2] = expected[-2:]
428
+ tm.assert_numpy_array_equal(arr.asi8, expected)
429
+
430
+ @pytest.mark.parametrize(
431
+ "box",
432
+ [
433
+ pd.Index,
434
+ pd.Series,
435
+ np.array,
436
+ list,
437
+ NumpyExtensionArray,
438
+ ],
439
+ )
440
+ def test_setitem_object_dtype(self, box, arr1d):
441
+ expected = arr1d.copy()[::-1]
442
+ if expected.dtype.kind in ["m", "M"]:
443
+ expected = expected._with_freq(None)
444
+
445
+ vals = expected
446
+ if box is list:
447
+ vals = list(vals)
448
+ elif box is np.array:
449
+ # if we do np.array(x).astype(object) then dt64 and td64 cast to ints
450
+ vals = np.array(vals.astype(object))
451
+ elif box is NumpyExtensionArray:
452
+ vals = box(np.asarray(vals, dtype=object))
453
+ else:
454
+ vals = box(vals).astype(object)
455
+
456
+ arr1d[:] = vals
457
+
458
+ tm.assert_equal(arr1d, expected)
459
+
460
+ def test_setitem_strs(self, arr1d):
461
+ # Check that we parse strs in both scalar and listlike
462
+
463
+ # Setting list-like of strs
464
+ expected = arr1d.copy()
465
+ expected[[0, 1]] = arr1d[-2:]
466
+
467
+ result = arr1d.copy()
468
+ result[:2] = [str(x) for x in arr1d[-2:]]
469
+ tm.assert_equal(result, expected)
470
+
471
+ # Same thing but now for just a scalar str
472
+ expected = arr1d.copy()
473
+ expected[0] = arr1d[-1]
474
+
475
+ result = arr1d.copy()
476
+ result[0] = str(arr1d[-1])
477
+ tm.assert_equal(result, expected)
478
+
479
+ @pytest.mark.parametrize("as_index", [True, False])
480
+ def test_setitem_categorical(self, arr1d, as_index):
481
+ expected = arr1d.copy()[::-1]
482
+ if not isinstance(expected, PeriodArray):
483
+ expected = expected._with_freq(None)
484
+
485
+ cat = pd.Categorical(arr1d)
486
+ if as_index:
487
+ cat = pd.CategoricalIndex(cat)
488
+
489
+ arr1d[:] = cat[::-1]
490
+
491
+ tm.assert_equal(arr1d, expected)
492
+
493
+ def test_setitem_raises(self, arr1d):
494
+ arr = arr1d[:10]
495
+ val = arr[0]
496
+
497
+ with pytest.raises(IndexError, match="index 12 is out of bounds"):
498
+ arr[12] = val
499
+
500
+ with pytest.raises(TypeError, match="value should be a.* 'object'"):
501
+ arr[0] = object()
502
+
503
+ msg = "cannot set using a list-like indexer with a different length"
504
+ with pytest.raises(ValueError, match=msg):
505
+ # GH#36339
506
+ arr[[]] = [arr[1]]
507
+
508
+ msg = "cannot set using a slice indexer with a different length than"
509
+ with pytest.raises(ValueError, match=msg):
510
+ # GH#36339
511
+ arr[1:1] = arr[:3]
512
+
513
+ @pytest.mark.parametrize("box", [list, np.array, pd.Index, pd.Series])
514
+ def test_setitem_numeric_raises(self, arr1d, box):
515
+ # We dont case e.g. int64 to our own dtype for setitem
516
+
517
+ msg = (
518
+ f"value should be a '{arr1d._scalar_type.__name__}', "
519
+ "'NaT', or array of those. Got"
520
+ )
521
+ with pytest.raises(TypeError, match=msg):
522
+ arr1d[:2] = box([0, 1])
523
+
524
+ with pytest.raises(TypeError, match=msg):
525
+ arr1d[:2] = box([0.0, 1.0])
526
+
527
+ def test_inplace_arithmetic(self):
528
+ # GH#24115 check that iadd and isub are actually in-place
529
+ data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
530
+ if self.array_cls is PeriodArray:
531
+ arr = self.array_cls(data, dtype="period[D]")
532
+ else:
533
+ arr = self.index_cls(data, freq="D")._data
534
+
535
+ expected = arr + pd.Timedelta(days=1)
536
+ arr += pd.Timedelta(days=1)
537
+ tm.assert_equal(arr, expected)
538
+
539
+ expected = arr - pd.Timedelta(days=1)
540
+ arr -= pd.Timedelta(days=1)
541
+ tm.assert_equal(arr, expected)
542
+
543
+ def test_shift_fill_int_deprecated(self, arr1d):
544
+ # GH#31971, enforced in 2.0
545
+ with pytest.raises(TypeError, match="value should be a"):
546
+ arr1d.shift(1, fill_value=1)
547
+
548
+ def test_median(self, arr1d):
549
+ arr = arr1d
550
+ if len(arr) % 2 == 0:
551
+ # make it easier to define `expected`
552
+ arr = arr[:-1]
553
+
554
+ expected = arr[len(arr) // 2]
555
+
556
+ result = arr.median()
557
+ assert type(result) is type(expected)
558
+ assert result == expected
559
+
560
+ arr[len(arr) // 2] = NaT
561
+ if not isinstance(expected, Period):
562
+ expected = arr[len(arr) // 2 - 1 : len(arr) // 2 + 2].mean()
563
+
564
+ assert arr.median(skipna=False) is NaT
565
+
566
+ result = arr.median()
567
+ assert type(result) is type(expected)
568
+ assert result == expected
569
+
570
+ assert arr[:0].median() is NaT
571
+ assert arr[:0].median(skipna=False) is NaT
572
+
573
+ # 2d Case
574
+ arr2 = arr.reshape(-1, 1)
575
+
576
+ result = arr2.median(axis=None)
577
+ assert type(result) is type(expected)
578
+ assert result == expected
579
+
580
+ assert arr2.median(axis=None, skipna=False) is NaT
581
+
582
+ result = arr2.median(axis=0)
583
+ expected2 = type(arr)._from_sequence([expected], dtype=arr.dtype)
584
+ tm.assert_equal(result, expected2)
585
+
586
+ result = arr2.median(axis=0, skipna=False)
587
+ expected2 = type(arr)._from_sequence([NaT], dtype=arr.dtype)
588
+ tm.assert_equal(result, expected2)
589
+
590
+ result = arr2.median(axis=1)
591
+ tm.assert_equal(result, arr)
592
+
593
+ result = arr2.median(axis=1, skipna=False)
594
+ tm.assert_equal(result, arr)
595
+
596
+ def test_from_integer_array(self):
597
+ arr = np.array([1, 2, 3], dtype=np.int64)
598
+ data = pd.array(arr, dtype="Int64")
599
+ if self.array_cls is PeriodArray:
600
+ expected = self.array_cls(arr, dtype=self.example_dtype)
601
+ result = self.array_cls(data, dtype=self.example_dtype)
602
+ else:
603
+ expected = self.array_cls._from_sequence(arr, dtype=self.example_dtype)
604
+ result = self.array_cls._from_sequence(data, dtype=self.example_dtype)
605
+
606
+ tm.assert_extension_array_equal(result, expected)
607
+
608
+
609
+ class TestDatetimeArray(SharedTests):
610
+ index_cls = DatetimeIndex
611
+ array_cls = DatetimeArray
612
+ scalar_type = Timestamp
613
+ example_dtype = "M8[ns]"
614
+
615
+ @pytest.fixture
616
+ def arr1d(self, tz_naive_fixture, freqstr):
617
+ """
618
+ Fixture returning DatetimeArray with parametrized frequency and
619
+ timezones
620
+ """
621
+ tz = tz_naive_fixture
622
+ dti = pd.date_range("2016-01-01 01:01:00", periods=5, freq=freqstr, tz=tz)
623
+ dta = dti._data
624
+ return dta
625
+
626
+ def test_round(self, arr1d):
627
+ # GH#24064
628
+ dti = self.index_cls(arr1d)
629
+
630
+ result = dti.round(freq="2min")
631
+ expected = dti - pd.Timedelta(minutes=1)
632
+ expected = expected._with_freq(None)
633
+ tm.assert_index_equal(result, expected)
634
+
635
+ dta = dti._data
636
+ result = dta.round(freq="2min")
637
+ expected = expected._data._with_freq(None)
638
+ tm.assert_datetime_array_equal(result, expected)
639
+
640
+ def test_array_interface(self, datetime_index):
641
+ arr = datetime_index._data
642
+ copy_false = None if np_version_gt2 else False
643
+
644
+ # default asarray gives the same underlying data (for tz naive)
645
+ result = np.asarray(arr)
646
+ expected = arr._ndarray
647
+ assert result is expected
648
+ tm.assert_numpy_array_equal(result, expected)
649
+ result = np.array(arr, copy=copy_false)
650
+ assert result is expected
651
+ tm.assert_numpy_array_equal(result, expected)
652
+
653
+ # specifying M8[ns] gives the same result as default
654
+ result = np.asarray(arr, dtype="datetime64[ns]")
655
+ expected = arr._ndarray
656
+ assert result is expected
657
+ tm.assert_numpy_array_equal(result, expected)
658
+ result = np.array(arr, dtype="datetime64[ns]", copy=copy_false)
659
+ assert result is expected
660
+ tm.assert_numpy_array_equal(result, expected)
661
+ result = np.array(arr, dtype="datetime64[ns]")
662
+ if not np_version_gt2:
663
+ # TODO: GH 57739
664
+ assert result is not expected
665
+ tm.assert_numpy_array_equal(result, expected)
666
+
667
+ # to object dtype
668
+ result = np.asarray(arr, dtype=object)
669
+ expected = np.array(list(arr), dtype=object)
670
+ tm.assert_numpy_array_equal(result, expected)
671
+
672
+ # to other dtype always copies
673
+ result = np.asarray(arr, dtype="int64")
674
+ assert result is not arr.asi8
675
+ assert not np.may_share_memory(arr, result)
676
+ expected = arr.asi8.copy()
677
+ tm.assert_numpy_array_equal(result, expected)
678
+
679
+ # other dtypes handled by numpy
680
+ for dtype in ["float64", str]:
681
+ result = np.asarray(arr, dtype=dtype)
682
+ expected = np.asarray(arr).astype(dtype)
683
+ tm.assert_numpy_array_equal(result, expected)
684
+
685
+ def test_array_object_dtype(self, arr1d):
686
+ # GH#23524
687
+ arr = arr1d
688
+ dti = self.index_cls(arr1d)
689
+
690
+ expected = np.array(list(dti))
691
+
692
+ result = np.array(arr, dtype=object)
693
+ tm.assert_numpy_array_equal(result, expected)
694
+
695
+ # also test the DatetimeIndex method while we're at it
696
+ result = np.array(dti, dtype=object)
697
+ tm.assert_numpy_array_equal(result, expected)
698
+
699
+ def test_array_tz(self, arr1d):
700
+ # GH#23524
701
+ arr = arr1d
702
+ dti = self.index_cls(arr1d)
703
+ copy_false = None if np_version_gt2 else False
704
+
705
+ expected = dti.asi8.view("M8[ns]")
706
+ result = np.array(arr, dtype="M8[ns]")
707
+ tm.assert_numpy_array_equal(result, expected)
708
+
709
+ result = np.array(arr, dtype="datetime64[ns]")
710
+ tm.assert_numpy_array_equal(result, expected)
711
+
712
+ # check that we are not making copies when setting copy=copy_false
713
+ result = np.array(arr, dtype="M8[ns]", copy=copy_false)
714
+ assert result.base is expected.base
715
+ assert result.base is not None
716
+ result = np.array(arr, dtype="datetime64[ns]", copy=copy_false)
717
+ assert result.base is expected.base
718
+ assert result.base is not None
719
+
720
+ def test_array_i8_dtype(self, arr1d):
721
+ arr = arr1d
722
+ dti = self.index_cls(arr1d)
723
+ copy_false = None if np_version_gt2 else False
724
+
725
+ expected = dti.asi8
726
+ result = np.array(arr, dtype="i8")
727
+ tm.assert_numpy_array_equal(result, expected)
728
+
729
+ result = np.array(arr, dtype=np.int64)
730
+ tm.assert_numpy_array_equal(result, expected)
731
+
732
+ # check that we are still making copies when setting copy=copy_false
733
+ result = np.array(arr, dtype="i8", copy=copy_false)
734
+ assert result.base is not expected.base
735
+ assert result.base is None
736
+
737
+ def test_from_array_keeps_base(self):
738
+ # Ensure that DatetimeArray._ndarray.base isn't lost.
739
+ arr = np.array(["2000-01-01", "2000-01-02"], dtype="M8[ns]")
740
+ dta = DatetimeArray._from_sequence(arr)
741
+
742
+ assert dta._ndarray is arr
743
+ dta = DatetimeArray._from_sequence(arr[:0])
744
+ assert dta._ndarray.base is arr
745
+
746
+ def test_from_dti(self, arr1d):
747
+ arr = arr1d
748
+ dti = self.index_cls(arr1d)
749
+ assert list(dti) == list(arr)
750
+
751
+ # Check that Index.__new__ knows what to do with DatetimeArray
752
+ dti2 = pd.Index(arr)
753
+ assert isinstance(dti2, DatetimeIndex)
754
+ assert list(dti2) == list(arr)
755
+
756
+ def test_astype_object(self, arr1d):
757
+ arr = arr1d
758
+ dti = self.index_cls(arr1d)
759
+
760
+ asobj = arr.astype("O")
761
+ assert isinstance(asobj, np.ndarray)
762
+ assert asobj.dtype == "O"
763
+ assert list(asobj) == list(dti)
764
+
765
+ @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
766
+ def test_to_period(self, datetime_index, freqstr):
767
+ dti = datetime_index
768
+ arr = dti._data
769
+
770
+ freqstr = freq_to_period_freqstr(1, freqstr)
771
+ expected = dti.to_period(freq=freqstr)
772
+ result = arr.to_period(freq=freqstr)
773
+ assert isinstance(result, PeriodArray)
774
+
775
+ tm.assert_equal(result, expected._data)
776
+
777
+ def test_to_period_2d(self, arr1d):
778
+ arr2d = arr1d.reshape(1, -1)
779
+
780
+ warn = None if arr1d.tz is None else UserWarning
781
+ with tm.assert_produces_warning(warn):
782
+ result = arr2d.to_period("D")
783
+ expected = arr1d.to_period("D").reshape(1, -1)
784
+ tm.assert_period_array_equal(result, expected)
785
+
786
+ @pytest.mark.parametrize("propname", DatetimeArray._bool_ops)
787
+ def test_bool_properties(self, arr1d, propname):
788
+ # in this case _bool_ops is just `is_leap_year`
789
+ dti = self.index_cls(arr1d)
790
+ arr = arr1d
791
+ assert dti.freq == arr.freq
792
+
793
+ result = getattr(arr, propname)
794
+ expected = np.array(getattr(dti, propname), dtype=result.dtype)
795
+
796
+ tm.assert_numpy_array_equal(result, expected)
797
+
798
+ @pytest.mark.parametrize("propname", DatetimeArray._field_ops)
799
+ def test_int_properties(self, arr1d, propname):
800
+ dti = self.index_cls(arr1d)
801
+ arr = arr1d
802
+
803
+ result = getattr(arr, propname)
804
+ expected = np.array(getattr(dti, propname), dtype=result.dtype)
805
+
806
+ tm.assert_numpy_array_equal(result, expected)
807
+
808
+ def test_take_fill_valid(self, arr1d, fixed_now_ts):
809
+ arr = arr1d
810
+ dti = self.index_cls(arr1d)
811
+
812
+ now = fixed_now_ts.tz_localize(dti.tz)
813
+ result = arr.take([-1, 1], allow_fill=True, fill_value=now)
814
+ assert result[0] == now
815
+
816
+ msg = f"value should be a '{arr1d._scalar_type.__name__}' or 'NaT'. Got"
817
+ with pytest.raises(TypeError, match=msg):
818
+ # fill_value Timedelta invalid
819
+ arr.take([-1, 1], allow_fill=True, fill_value=now - now)
820
+
821
+ with pytest.raises(TypeError, match=msg):
822
+ # fill_value Period invalid
823
+ arr.take([-1, 1], allow_fill=True, fill_value=Period("2014Q1"))
824
+
825
+ tz = None if dti.tz is not None else "US/Eastern"
826
+ now = fixed_now_ts.tz_localize(tz)
827
+ msg = "Cannot compare tz-naive and tz-aware datetime-like objects"
828
+ with pytest.raises(TypeError, match=msg):
829
+ # Timestamp with mismatched tz-awareness
830
+ arr.take([-1, 1], allow_fill=True, fill_value=now)
831
+
832
+ value = NaT._value
833
+ msg = f"value should be a '{arr1d._scalar_type.__name__}' or 'NaT'. Got"
834
+ with pytest.raises(TypeError, match=msg):
835
+ # require NaT, not iNaT, as it could be confused with an integer
836
+ arr.take([-1, 1], allow_fill=True, fill_value=value)
837
+
838
+ value = np.timedelta64("NaT", "ns")
839
+ with pytest.raises(TypeError, match=msg):
840
+ # require appropriate-dtype if we have a NA value
841
+ arr.take([-1, 1], allow_fill=True, fill_value=value)
842
+
843
+ if arr.tz is not None:
844
+ # GH#37356
845
+ # Assuming here that arr1d fixture does not include Australia/Melbourne
846
+ value = fixed_now_ts.tz_localize("Australia/Melbourne")
847
+ result = arr.take([-1, 1], allow_fill=True, fill_value=value)
848
+
849
+ expected = arr.take(
850
+ [-1, 1],
851
+ allow_fill=True,
852
+ fill_value=value.tz_convert(arr.dtype.tz),
853
+ )
854
+ tm.assert_equal(result, expected)
855
+
856
+ def test_concat_same_type_invalid(self, arr1d):
857
+ # different timezones
858
+ arr = arr1d
859
+
860
+ if arr.tz is None:
861
+ other = arr.tz_localize("UTC")
862
+ else:
863
+ other = arr.tz_localize(None)
864
+
865
+ with pytest.raises(ValueError, match="to_concat must have the same"):
866
+ arr._concat_same_type([arr, other])
867
+
868
+ def test_concat_same_type_different_freq(self, unit):
869
+ # we *can* concatenate DTI with different freqs.
870
+ a = pd.date_range("2000", periods=2, freq="D", tz="US/Central", unit=unit)._data
871
+ b = pd.date_range("2000", periods=2, freq="h", tz="US/Central", unit=unit)._data
872
+ result = DatetimeArray._concat_same_type([a, b])
873
+ expected = (
874
+ pd.to_datetime(
875
+ [
876
+ "2000-01-01 00:00:00",
877
+ "2000-01-02 00:00:00",
878
+ "2000-01-01 00:00:00",
879
+ "2000-01-01 01:00:00",
880
+ ]
881
+ )
882
+ .tz_localize("US/Central")
883
+ .as_unit(unit)
884
+ ._data
885
+ )
886
+
887
+ tm.assert_datetime_array_equal(result, expected)
888
+
889
+ def test_strftime(self, arr1d):
890
+ arr = arr1d
891
+
892
+ result = arr.strftime("%Y %b")
893
+ expected = np.array([ts.strftime("%Y %b") for ts in arr], dtype=object)
894
+ tm.assert_numpy_array_equal(result, expected)
895
+
896
+ def test_strftime_nat(self):
897
+ # GH 29578
898
+ arr = DatetimeIndex(["2019-01-01", NaT])._data
899
+
900
+ result = arr.strftime("%Y-%m-%d")
901
+ expected = np.array(["2019-01-01", np.nan], dtype=object)
902
+ tm.assert_numpy_array_equal(result, expected)
903
+
904
+
905
+ class TestTimedeltaArray(SharedTests):
906
+ index_cls = TimedeltaIndex
907
+ array_cls = TimedeltaArray
908
+ scalar_type = pd.Timedelta
909
+ example_dtype = "m8[ns]"
910
+
911
+ def test_from_tdi(self):
912
+ tdi = TimedeltaIndex(["1 Day", "3 Hours"])
913
+ arr = tdi._data
914
+ assert list(arr) == list(tdi)
915
+
916
+ # Check that Index.__new__ knows what to do with TimedeltaArray
917
+ tdi2 = pd.Index(arr)
918
+ assert isinstance(tdi2, TimedeltaIndex)
919
+ assert list(tdi2) == list(arr)
920
+
921
+ def test_astype_object(self):
922
+ tdi = TimedeltaIndex(["1 Day", "3 Hours"])
923
+ arr = tdi._data
924
+ asobj = arr.astype("O")
925
+ assert isinstance(asobj, np.ndarray)
926
+ assert asobj.dtype == "O"
927
+ assert list(asobj) == list(tdi)
928
+
929
+ def test_to_pytimedelta(self, timedelta_index):
930
+ tdi = timedelta_index
931
+ arr = tdi._data
932
+
933
+ expected = tdi.to_pytimedelta()
934
+ result = arr.to_pytimedelta()
935
+
936
+ tm.assert_numpy_array_equal(result, expected)
937
+
938
+ def test_total_seconds(self, timedelta_index):
939
+ tdi = timedelta_index
940
+ arr = tdi._data
941
+
942
+ expected = tdi.total_seconds()
943
+ result = arr.total_seconds()
944
+
945
+ tm.assert_numpy_array_equal(result, expected.values)
946
+
947
+ @pytest.mark.parametrize("propname", TimedeltaArray._field_ops)
948
+ def test_int_properties(self, timedelta_index, propname):
949
+ tdi = timedelta_index
950
+ arr = tdi._data
951
+
952
+ result = getattr(arr, propname)
953
+ expected = np.array(getattr(tdi, propname), dtype=result.dtype)
954
+
955
+ tm.assert_numpy_array_equal(result, expected)
956
+
957
+ def test_array_interface(self, timedelta_index):
958
+ arr = timedelta_index._data
959
+ copy_false = None if np_version_gt2 else False
960
+
961
+ # default asarray gives the same underlying data
962
+ result = np.asarray(arr)
963
+ expected = arr._ndarray
964
+ assert result is expected
965
+ tm.assert_numpy_array_equal(result, expected)
966
+ result = np.array(arr, copy=copy_false)
967
+ assert result is expected
968
+ tm.assert_numpy_array_equal(result, expected)
969
+
970
+ # specifying m8[ns] gives the same result as default
971
+ result = np.asarray(arr, dtype="timedelta64[ns]")
972
+ expected = arr._ndarray
973
+ assert result is expected
974
+ tm.assert_numpy_array_equal(result, expected)
975
+ result = np.array(arr, dtype="timedelta64[ns]", copy=copy_false)
976
+ assert result is expected
977
+ tm.assert_numpy_array_equal(result, expected)
978
+ result = np.array(arr, dtype="timedelta64[ns]")
979
+ if not np_version_gt2:
980
+ # TODO: GH 57739
981
+ assert result is not expected
982
+ tm.assert_numpy_array_equal(result, expected)
983
+
984
+ # to object dtype
985
+ result = np.asarray(arr, dtype=object)
986
+ expected = np.array(list(arr), dtype=object)
987
+ tm.assert_numpy_array_equal(result, expected)
988
+
989
+ # to other dtype always copies
990
+ result = np.asarray(arr, dtype="int64")
991
+ assert result is not arr.asi8
992
+ assert not np.may_share_memory(arr, result)
993
+ expected = arr.asi8.copy()
994
+ tm.assert_numpy_array_equal(result, expected)
995
+
996
+ # other dtypes handled by numpy
997
+ for dtype in ["float64", str]:
998
+ result = np.asarray(arr, dtype=dtype)
999
+ expected = np.asarray(arr).astype(dtype)
1000
+ tm.assert_numpy_array_equal(result, expected)
1001
+
1002
+ def test_take_fill_valid(self, timedelta_index, fixed_now_ts):
1003
+ tdi = timedelta_index
1004
+ arr = tdi._data
1005
+
1006
+ td1 = pd.Timedelta(days=1)
1007
+ result = arr.take([-1, 1], allow_fill=True, fill_value=td1)
1008
+ assert result[0] == td1
1009
+
1010
+ value = fixed_now_ts
1011
+ msg = f"value should be a '{arr._scalar_type.__name__}' or 'NaT'. Got"
1012
+ with pytest.raises(TypeError, match=msg):
1013
+ # fill_value Timestamp invalid
1014
+ arr.take([0, 1], allow_fill=True, fill_value=value)
1015
+
1016
+ value = fixed_now_ts.to_period("D")
1017
+ with pytest.raises(TypeError, match=msg):
1018
+ # fill_value Period invalid
1019
+ arr.take([0, 1], allow_fill=True, fill_value=value)
1020
+
1021
+ value = np.datetime64("NaT", "ns")
1022
+ with pytest.raises(TypeError, match=msg):
1023
+ # require appropriate-dtype if we have a NA value
1024
+ arr.take([-1, 1], allow_fill=True, fill_value=value)
1025
+
1026
+
1027
+ @pytest.mark.filterwarnings(r"ignore:Period with BDay freq is deprecated:FutureWarning")
1028
+ @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
1029
+ class TestPeriodArray(SharedTests):
1030
+ index_cls = PeriodIndex
1031
+ array_cls = PeriodArray
1032
+ scalar_type = Period
1033
+ example_dtype = PeriodIndex([], freq="W").dtype
1034
+
1035
+ @pytest.fixture
1036
+ def arr1d(self, period_index):
1037
+ """
1038
+ Fixture returning DatetimeArray from parametrized PeriodIndex objects
1039
+ """
1040
+ return period_index._data
1041
+
1042
+ def test_from_pi(self, arr1d):
1043
+ pi = self.index_cls(arr1d)
1044
+ arr = arr1d
1045
+ assert list(arr) == list(pi)
1046
+
1047
+ # Check that Index.__new__ knows what to do with PeriodArray
1048
+ pi2 = pd.Index(arr)
1049
+ assert isinstance(pi2, PeriodIndex)
1050
+ assert list(pi2) == list(arr)
1051
+
1052
+ def test_astype_object(self, arr1d):
1053
+ pi = self.index_cls(arr1d)
1054
+ arr = arr1d
1055
+ asobj = arr.astype("O")
1056
+ assert isinstance(asobj, np.ndarray)
1057
+ assert asobj.dtype == "O"
1058
+ assert list(asobj) == list(pi)
1059
+
1060
+ def test_take_fill_valid(self, arr1d):
1061
+ arr = arr1d
1062
+
1063
+ value = NaT._value
1064
+ msg = f"value should be a '{arr1d._scalar_type.__name__}' or 'NaT'. Got"
1065
+ with pytest.raises(TypeError, match=msg):
1066
+ # require NaT, not iNaT, as it could be confused with an integer
1067
+ arr.take([-1, 1], allow_fill=True, fill_value=value)
1068
+
1069
+ value = np.timedelta64("NaT", "ns")
1070
+ with pytest.raises(TypeError, match=msg):
1071
+ # require appropriate-dtype if we have a NA value
1072
+ arr.take([-1, 1], allow_fill=True, fill_value=value)
1073
+
1074
+ @pytest.mark.parametrize("how", ["S", "E"])
1075
+ def test_to_timestamp(self, how, arr1d):
1076
+ pi = self.index_cls(arr1d)
1077
+ arr = arr1d
1078
+
1079
+ expected = DatetimeIndex(pi.to_timestamp(how=how))._data
1080
+ result = arr.to_timestamp(how=how)
1081
+ assert isinstance(result, DatetimeArray)
1082
+
1083
+ tm.assert_equal(result, expected)
1084
+
1085
+ def test_to_timestamp_roundtrip_bday(self):
1086
+ # Case where infer_freq inside would choose "D" instead of "B"
1087
+ dta = pd.date_range("2021-10-18", periods=3, freq="B")._data
1088
+ parr = dta.to_period()
1089
+ result = parr.to_timestamp()
1090
+ assert result.freq == "B"
1091
+ tm.assert_extension_array_equal(result, dta)
1092
+
1093
+ dta2 = dta[::2]
1094
+ parr2 = dta2.to_period()
1095
+ result2 = parr2.to_timestamp()
1096
+ assert result2.freq == "2B"
1097
+ tm.assert_extension_array_equal(result2, dta2)
1098
+
1099
+ parr3 = dta.to_period("2B")
1100
+ result3 = parr3.to_timestamp()
1101
+ assert result3.freq == "B"
1102
+ tm.assert_extension_array_equal(result3, dta)
1103
+
1104
+ def test_to_timestamp_out_of_bounds(self):
1105
+ # GH#19643 previously overflowed silently
1106
+ pi = pd.period_range("1500", freq="Y", periods=3)
1107
+ msg = "Out of bounds nanosecond timestamp: 1500-01-01 00:00:00"
1108
+ with pytest.raises(OutOfBoundsDatetime, match=msg):
1109
+ pi.to_timestamp()
1110
+
1111
+ with pytest.raises(OutOfBoundsDatetime, match=msg):
1112
+ pi._data.to_timestamp()
1113
+
1114
+ @pytest.mark.parametrize("propname", PeriodArray._bool_ops)
1115
+ def test_bool_properties(self, arr1d, propname):
1116
+ # in this case _bool_ops is just `is_leap_year`
1117
+ pi = self.index_cls(arr1d)
1118
+ arr = arr1d
1119
+
1120
+ result = getattr(arr, propname)
1121
+ expected = np.array(getattr(pi, propname))
1122
+
1123
+ tm.assert_numpy_array_equal(result, expected)
1124
+
1125
+ @pytest.mark.parametrize("propname", PeriodArray._field_ops)
1126
+ def test_int_properties(self, arr1d, propname):
1127
+ pi = self.index_cls(arr1d)
1128
+ arr = arr1d
1129
+
1130
+ result = getattr(arr, propname)
1131
+ expected = np.array(getattr(pi, propname))
1132
+
1133
+ tm.assert_numpy_array_equal(result, expected)
1134
+
1135
+ def test_array_interface(self, arr1d):
1136
+ arr = arr1d
1137
+
1138
+ # default asarray gives objects
1139
+ result = np.asarray(arr)
1140
+ expected = np.array(list(arr), dtype=object)
1141
+ tm.assert_numpy_array_equal(result, expected)
1142
+
1143
+ # to object dtype (same as default)
1144
+ result = np.asarray(arr, dtype=object)
1145
+ tm.assert_numpy_array_equal(result, expected)
1146
+
1147
+ result = np.asarray(arr, dtype="int64")
1148
+ tm.assert_numpy_array_equal(result, arr.asi8)
1149
+
1150
+ # to other dtypes
1151
+ msg = r"float\(\) argument must be a string or a( real)? number, not 'Period'"
1152
+ with pytest.raises(TypeError, match=msg):
1153
+ np.asarray(arr, dtype="float64")
1154
+
1155
+ result = np.asarray(arr, dtype="S20")
1156
+ expected = np.asarray(arr).astype("S20")
1157
+ tm.assert_numpy_array_equal(result, expected)
1158
+
1159
+ def test_strftime(self, arr1d):
1160
+ arr = arr1d
1161
+
1162
+ result = arr.strftime("%Y")
1163
+ expected = np.array([per.strftime("%Y") for per in arr], dtype=object)
1164
+ tm.assert_numpy_array_equal(result, expected)
1165
+
1166
+ def test_strftime_nat(self):
1167
+ # GH 29578
1168
+ arr = PeriodArray(PeriodIndex(["2019-01-01", NaT], dtype="period[D]"))
1169
+
1170
+ result = arr.strftime("%Y-%m-%d")
1171
+ expected = np.array(["2019-01-01", np.nan], dtype=object)
1172
+ tm.assert_numpy_array_equal(result, expected)
1173
+
1174
+
1175
+ @pytest.mark.parametrize(
1176
+ "arr,casting_nats",
1177
+ [
1178
+ (
1179
+ TimedeltaIndex(["1 Day", "3 Hours", "NaT"])._data,
1180
+ (NaT, np.timedelta64("NaT", "ns")),
1181
+ ),
1182
+ (
1183
+ pd.date_range("2000-01-01", periods=3, freq="D")._data,
1184
+ (NaT, np.datetime64("NaT", "ns")),
1185
+ ),
1186
+ (pd.period_range("2000-01-01", periods=3, freq="D")._data, (NaT,)),
1187
+ ],
1188
+ ids=lambda x: type(x).__name__,
1189
+ )
1190
+ def test_casting_nat_setitem_array(arr, casting_nats):
1191
+ expected = type(arr)._from_sequence([NaT, arr[1], arr[2]], dtype=arr.dtype)
1192
+
1193
+ for nat in casting_nats:
1194
+ arr = arr.copy()
1195
+ arr[0] = nat
1196
+ tm.assert_equal(arr, expected)
1197
+
1198
+
1199
+ @pytest.mark.parametrize(
1200
+ "arr,non_casting_nats",
1201
+ [
1202
+ (
1203
+ TimedeltaIndex(["1 Day", "3 Hours", "NaT"])._data,
1204
+ (np.datetime64("NaT", "ns"), NaT._value),
1205
+ ),
1206
+ (
1207
+ pd.date_range("2000-01-01", periods=3, freq="D")._data,
1208
+ (np.timedelta64("NaT", "ns"), NaT._value),
1209
+ ),
1210
+ (
1211
+ pd.period_range("2000-01-01", periods=3, freq="D")._data,
1212
+ (np.datetime64("NaT", "ns"), np.timedelta64("NaT", "ns"), NaT._value),
1213
+ ),
1214
+ ],
1215
+ ids=lambda x: type(x).__name__,
1216
+ )
1217
+ def test_invalid_nat_setitem_array(arr, non_casting_nats):
1218
+ msg = (
1219
+ "value should be a '(Timestamp|Timedelta|Period)', 'NaT', or array of those. "
1220
+ "Got '(timedelta64|datetime64|int)' instead."
1221
+ )
1222
+
1223
+ for nat in non_casting_nats:
1224
+ with pytest.raises(TypeError, match=msg):
1225
+ arr[0] = nat
1226
+
1227
+
1228
+ @pytest.mark.parametrize(
1229
+ "arr",
1230
+ [
1231
+ pd.date_range("2000", periods=4).array,
1232
+ pd.timedelta_range("2000", periods=4).array,
1233
+ ],
1234
+ )
1235
+ def test_to_numpy_extra(arr):
1236
+ arr[0] = NaT
1237
+ original = arr.copy()
1238
+
1239
+ result = arr.to_numpy()
1240
+ assert np.isnan(result[0])
1241
+
1242
+ result = arr.to_numpy(dtype="int64")
1243
+ assert result[0] == -9223372036854775808
1244
+
1245
+ result = arr.to_numpy(dtype="int64", na_value=0)
1246
+ assert result[0] == 0
1247
+
1248
+ result = arr.to_numpy(na_value=arr[1].to_numpy())
1249
+ assert result[0] == result[1]
1250
+
1251
+ result = arr.to_numpy(na_value=arr[1].to_numpy(copy=False))
1252
+ assert result[0] == result[1]
1253
+
1254
+ tm.assert_equal(arr, original)
1255
+
1256
+
1257
+ @pytest.mark.parametrize("as_index", [True, False])
1258
+ @pytest.mark.parametrize(
1259
+ "values",
1260
+ [
1261
+ pd.to_datetime(["2020-01-01", "2020-02-01"]),
1262
+ pd.to_timedelta([1, 2], unit="D"),
1263
+ PeriodIndex(["2020-01-01", "2020-02-01"], freq="D"),
1264
+ ],
1265
+ )
1266
+ @pytest.mark.parametrize(
1267
+ "klass",
1268
+ [
1269
+ list,
1270
+ np.array,
1271
+ pd.array,
1272
+ pd.Series,
1273
+ pd.Index,
1274
+ pd.Categorical,
1275
+ pd.CategoricalIndex,
1276
+ ],
1277
+ )
1278
+ def test_searchsorted_datetimelike_with_listlike(values, klass, as_index):
1279
+ # https://github.com/pandas-dev/pandas/issues/32762
1280
+ if not as_index:
1281
+ values = values._data
1282
+
1283
+ result = values.searchsorted(klass(values))
1284
+ expected = np.array([0, 1], dtype=result.dtype)
1285
+
1286
+ tm.assert_numpy_array_equal(result, expected)
1287
+
1288
+
1289
+ @pytest.mark.parametrize(
1290
+ "values",
1291
+ [
1292
+ pd.to_datetime(["2020-01-01", "2020-02-01"]),
1293
+ pd.to_timedelta([1, 2], unit="D"),
1294
+ PeriodIndex(["2020-01-01", "2020-02-01"], freq="D"),
1295
+ ],
1296
+ )
1297
+ @pytest.mark.parametrize(
1298
+ "arg", [[1, 2], ["a", "b"], [Timestamp("2020-01-01", tz="Europe/London")] * 2]
1299
+ )
1300
+ def test_searchsorted_datetimelike_with_listlike_invalid_dtype(values, arg):
1301
+ # https://github.com/pandas-dev/pandas/issues/32762
1302
+ msg = "[Unexpected type|Cannot compare]"
1303
+ with pytest.raises(TypeError, match=msg):
1304
+ values.searchsorted(arg)
1305
+
1306
+
1307
+ @pytest.mark.parametrize("klass", [list, tuple, np.array, pd.Series])
1308
+ def test_period_index_construction_from_strings(klass):
1309
+ # https://github.com/pandas-dev/pandas/issues/26109
1310
+ strings = ["2020Q1", "2020Q2"] * 2
1311
+ data = klass(strings)
1312
+ result = PeriodIndex(data, freq="Q")
1313
+ expected = PeriodIndex([Period(s) for s in strings])
1314
+ tm.assert_index_equal(result, expected)
1315
+
1316
+
1317
+ @pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"])
1318
+ def test_from_pandas_array(dtype):
1319
+ # GH#24615
1320
+ data = np.array([1, 2, 3], dtype=dtype)
1321
+ arr = NumpyExtensionArray(data)
1322
+
1323
+ cls = {"M8[ns]": DatetimeArray, "m8[ns]": TimedeltaArray}[dtype]
1324
+
1325
+ depr_msg = f"{cls.__name__}.__init__ is deprecated"
1326
+ with tm.assert_produces_warning(FutureWarning, match=depr_msg):
1327
+ result = cls(arr)
1328
+ expected = cls(data)
1329
+ tm.assert_extension_array_equal(result, expected)
1330
+
1331
+ result = cls._from_sequence(arr, dtype=dtype)
1332
+ expected = cls._from_sequence(data, dtype=dtype)
1333
+ tm.assert_extension_array_equal(result, expected)
1334
+
1335
+ func = {"M8[ns]": pd.to_datetime, "m8[ns]": pd.to_timedelta}[dtype]
1336
+ result = func(arr).array
1337
+ expected = func(data).array
1338
+ tm.assert_equal(result, expected)
1339
+
1340
+ # Let's check the Indexes while we're here
1341
+ idx_cls = {"M8[ns]": DatetimeIndex, "m8[ns]": TimedeltaIndex}[dtype]
1342
+ result = idx_cls(arr)
1343
+ expected = idx_cls(data)
1344
+ tm.assert_index_equal(result, expected)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/arrays/test_datetimes.py ADDED
@@ -0,0 +1,840 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Tests for DatetimeArray
3
+ """
4
+ from __future__ import annotations
5
+
6
+ from datetime import timedelta
7
+ import operator
8
+
9
+ try:
10
+ from zoneinfo import ZoneInfo
11
+ except ImportError:
12
+ # Cannot assign to a type
13
+ ZoneInfo = None # type: ignore[misc, assignment]
14
+
15
+ import numpy as np
16
+ import pytest
17
+
18
+ from pandas._libs.tslibs import tz_compare
19
+
20
+ from pandas.core.dtypes.dtypes import DatetimeTZDtype
21
+
22
+ import pandas as pd
23
+ import pandas._testing as tm
24
+ from pandas.core.arrays import (
25
+ DatetimeArray,
26
+ TimedeltaArray,
27
+ )
28
+
29
+
30
+ class TestNonNano:
31
+ @pytest.fixture(params=["s", "ms", "us"])
32
+ def unit(self, request):
33
+ """Fixture returning parametrized time units"""
34
+ return request.param
35
+
36
+ @pytest.fixture
37
+ def dtype(self, unit, tz_naive_fixture):
38
+ tz = tz_naive_fixture
39
+ if tz is None:
40
+ return np.dtype(f"datetime64[{unit}]")
41
+ else:
42
+ return DatetimeTZDtype(unit=unit, tz=tz)
43
+
44
+ @pytest.fixture
45
+ def dta_dti(self, unit, dtype):
46
+ tz = getattr(dtype, "tz", None)
47
+
48
+ dti = pd.date_range("2016-01-01", periods=55, freq="D", tz=tz)
49
+ if tz is None:
50
+ arr = np.asarray(dti).astype(f"M8[{unit}]")
51
+ else:
52
+ arr = np.asarray(dti.tz_convert("UTC").tz_localize(None)).astype(
53
+ f"M8[{unit}]"
54
+ )
55
+
56
+ dta = DatetimeArray._simple_new(arr, dtype=dtype)
57
+ return dta, dti
58
+
59
+ @pytest.fixture
60
+ def dta(self, dta_dti):
61
+ dta, dti = dta_dti
62
+ return dta
63
+
64
+ def test_non_nano(self, unit, dtype):
65
+ arr = np.arange(5, dtype=np.int64).view(f"M8[{unit}]")
66
+ dta = DatetimeArray._simple_new(arr, dtype=dtype)
67
+
68
+ assert dta.dtype == dtype
69
+ assert dta[0].unit == unit
70
+ assert tz_compare(dta.tz, dta[0].tz)
71
+ assert (dta[0] == dta[:1]).all()
72
+
73
+ @pytest.mark.parametrize(
74
+ "field", DatetimeArray._field_ops + DatetimeArray._bool_ops
75
+ )
76
+ def test_fields(self, unit, field, dtype, dta_dti):
77
+ dta, dti = dta_dti
78
+
79
+ assert (dti == dta).all()
80
+
81
+ res = getattr(dta, field)
82
+ expected = getattr(dti._data, field)
83
+ tm.assert_numpy_array_equal(res, expected)
84
+
85
+ def test_normalize(self, unit):
86
+ dti = pd.date_range("2016-01-01 06:00:00", periods=55, freq="D")
87
+ arr = np.asarray(dti).astype(f"M8[{unit}]")
88
+
89
+ dta = DatetimeArray._simple_new(arr, dtype=arr.dtype)
90
+
91
+ assert not dta.is_normalized
92
+
93
+ # TODO: simplify once we can just .astype to other unit
94
+ exp = np.asarray(dti.normalize()).astype(f"M8[{unit}]")
95
+ expected = DatetimeArray._simple_new(exp, dtype=exp.dtype)
96
+
97
+ res = dta.normalize()
98
+ tm.assert_extension_array_equal(res, expected)
99
+
100
+ def test_simple_new_requires_match(self, unit):
101
+ arr = np.arange(5, dtype=np.int64).view(f"M8[{unit}]")
102
+ dtype = DatetimeTZDtype(unit, "UTC")
103
+
104
+ dta = DatetimeArray._simple_new(arr, dtype=dtype)
105
+ assert dta.dtype == dtype
106
+
107
+ wrong = DatetimeTZDtype("ns", "UTC")
108
+ with pytest.raises(AssertionError, match=""):
109
+ DatetimeArray._simple_new(arr, dtype=wrong)
110
+
111
+ def test_std_non_nano(self, unit):
112
+ dti = pd.date_range("2016-01-01", periods=55, freq="D")
113
+ arr = np.asarray(dti).astype(f"M8[{unit}]")
114
+
115
+ dta = DatetimeArray._simple_new(arr, dtype=arr.dtype)
116
+
117
+ # we should match the nano-reso std, but floored to our reso.
118
+ res = dta.std()
119
+ assert res._creso == dta._creso
120
+ assert res == dti.std().floor(unit)
121
+
122
+ @pytest.mark.filterwarnings("ignore:Converting to PeriodArray.*:UserWarning")
123
+ def test_to_period(self, dta_dti):
124
+ dta, dti = dta_dti
125
+ result = dta.to_period("D")
126
+ expected = dti._data.to_period("D")
127
+
128
+ tm.assert_extension_array_equal(result, expected)
129
+
130
+ def test_iter(self, dta):
131
+ res = next(iter(dta))
132
+ expected = dta[0]
133
+
134
+ assert type(res) is pd.Timestamp
135
+ assert res._value == expected._value
136
+ assert res._creso == expected._creso
137
+ assert res == expected
138
+
139
+ def test_astype_object(self, dta):
140
+ result = dta.astype(object)
141
+ assert all(x._creso == dta._creso for x in result)
142
+ assert all(x == y for x, y in zip(result, dta))
143
+
144
+ def test_to_pydatetime(self, dta_dti):
145
+ dta, dti = dta_dti
146
+
147
+ result = dta.to_pydatetime()
148
+ expected = dti.to_pydatetime()
149
+ tm.assert_numpy_array_equal(result, expected)
150
+
151
+ @pytest.mark.parametrize("meth", ["time", "timetz", "date"])
152
+ def test_time_date(self, dta_dti, meth):
153
+ dta, dti = dta_dti
154
+
155
+ result = getattr(dta, meth)
156
+ expected = getattr(dti, meth)
157
+ tm.assert_numpy_array_equal(result, expected)
158
+
159
+ def test_format_native_types(self, unit, dtype, dta_dti):
160
+ # In this case we should get the same formatted values with our nano
161
+ # version dti._data as we do with the non-nano dta
162
+ dta, dti = dta_dti
163
+
164
+ res = dta._format_native_types()
165
+ exp = dti._data._format_native_types()
166
+ tm.assert_numpy_array_equal(res, exp)
167
+
168
+ def test_repr(self, dta_dti, unit):
169
+ dta, dti = dta_dti
170
+
171
+ assert repr(dta) == repr(dti._data).replace("[ns", f"[{unit}")
172
+
173
+ # TODO: tests with td64
174
+ def test_compare_mismatched_resolutions(self, comparison_op):
175
+ # comparison that numpy gets wrong bc of silent overflows
176
+ op = comparison_op
177
+
178
+ iinfo = np.iinfo(np.int64)
179
+ vals = np.array([iinfo.min, iinfo.min + 1, iinfo.max], dtype=np.int64)
180
+
181
+ # Construct so that arr2[1] < arr[1] < arr[2] < arr2[2]
182
+ arr = np.array(vals).view("M8[ns]")
183
+ arr2 = arr.view("M8[s]")
184
+
185
+ left = DatetimeArray._simple_new(arr, dtype=arr.dtype)
186
+ right = DatetimeArray._simple_new(arr2, dtype=arr2.dtype)
187
+
188
+ if comparison_op is operator.eq:
189
+ expected = np.array([False, False, False])
190
+ elif comparison_op is operator.ne:
191
+ expected = np.array([True, True, True])
192
+ elif comparison_op in [operator.lt, operator.le]:
193
+ expected = np.array([False, False, True])
194
+ else:
195
+ expected = np.array([False, True, False])
196
+
197
+ result = op(left, right)
198
+ tm.assert_numpy_array_equal(result, expected)
199
+
200
+ result = op(left[1], right)
201
+ tm.assert_numpy_array_equal(result, expected)
202
+
203
+ if op not in [operator.eq, operator.ne]:
204
+ # check that numpy still gets this wrong; if it is fixed we may be
205
+ # able to remove compare_mismatched_resolutions
206
+ np_res = op(left._ndarray, right._ndarray)
207
+ tm.assert_numpy_array_equal(np_res[1:], ~expected[1:])
208
+
209
+ def test_add_mismatched_reso_doesnt_downcast(self):
210
+ # https://github.com/pandas-dev/pandas/pull/48748#issuecomment-1260181008
211
+ td = pd.Timedelta(microseconds=1)
212
+ dti = pd.date_range("2016-01-01", periods=3) - td
213
+ dta = dti._data.as_unit("us")
214
+
215
+ res = dta + td.as_unit("us")
216
+ # even though the result is an even number of days
217
+ # (so we _could_ downcast to unit="s"), we do not.
218
+ assert res.unit == "us"
219
+
220
+ @pytest.mark.parametrize(
221
+ "scalar",
222
+ [
223
+ timedelta(hours=2),
224
+ pd.Timedelta(hours=2),
225
+ np.timedelta64(2, "h"),
226
+ np.timedelta64(2 * 3600 * 1000, "ms"),
227
+ pd.offsets.Minute(120),
228
+ pd.offsets.Hour(2),
229
+ ],
230
+ )
231
+ def test_add_timedeltalike_scalar_mismatched_reso(self, dta_dti, scalar):
232
+ dta, dti = dta_dti
233
+
234
+ td = pd.Timedelta(scalar)
235
+ exp_unit = tm.get_finest_unit(dta.unit, td.unit)
236
+
237
+ expected = (dti + td)._data.as_unit(exp_unit)
238
+ result = dta + scalar
239
+ tm.assert_extension_array_equal(result, expected)
240
+
241
+ result = scalar + dta
242
+ tm.assert_extension_array_equal(result, expected)
243
+
244
+ expected = (dti - td)._data.as_unit(exp_unit)
245
+ result = dta - scalar
246
+ tm.assert_extension_array_equal(result, expected)
247
+
248
+ def test_sub_datetimelike_scalar_mismatch(self):
249
+ dti = pd.date_range("2016-01-01", periods=3)
250
+ dta = dti._data.as_unit("us")
251
+
252
+ ts = dta[0].as_unit("s")
253
+
254
+ result = dta - ts
255
+ expected = (dti - dti[0])._data.as_unit("us")
256
+ assert result.dtype == "m8[us]"
257
+ tm.assert_extension_array_equal(result, expected)
258
+
259
+ def test_sub_datetime64_reso_mismatch(self):
260
+ dti = pd.date_range("2016-01-01", periods=3)
261
+ left = dti._data.as_unit("s")
262
+ right = left.as_unit("ms")
263
+
264
+ result = left - right
265
+ exp_values = np.array([0, 0, 0], dtype="m8[ms]")
266
+ expected = TimedeltaArray._simple_new(
267
+ exp_values,
268
+ dtype=exp_values.dtype,
269
+ )
270
+ tm.assert_extension_array_equal(result, expected)
271
+ result2 = right - left
272
+ tm.assert_extension_array_equal(result2, expected)
273
+
274
+
275
+ class TestDatetimeArrayComparisons:
276
+ # TODO: merge this into tests/arithmetic/test_datetime64 once it is
277
+ # sufficiently robust
278
+
279
+ def test_cmp_dt64_arraylike_tznaive(self, comparison_op):
280
+ # arbitrary tz-naive DatetimeIndex
281
+ op = comparison_op
282
+
283
+ dti = pd.date_range("2016-01-1", freq="MS", periods=9, tz=None)
284
+ arr = dti._data
285
+ assert arr.freq == dti.freq
286
+ assert arr.tz == dti.tz
287
+
288
+ right = dti
289
+
290
+ expected = np.ones(len(arr), dtype=bool)
291
+ if comparison_op.__name__ in ["ne", "gt", "lt"]:
292
+ # for these the comparisons should be all-False
293
+ expected = ~expected
294
+
295
+ result = op(arr, arr)
296
+ tm.assert_numpy_array_equal(result, expected)
297
+ for other in [
298
+ right,
299
+ np.array(right),
300
+ list(right),
301
+ tuple(right),
302
+ right.astype(object),
303
+ ]:
304
+ result = op(arr, other)
305
+ tm.assert_numpy_array_equal(result, expected)
306
+
307
+ result = op(other, arr)
308
+ tm.assert_numpy_array_equal(result, expected)
309
+
310
+
311
+ class TestDatetimeArray:
312
+ def test_astype_ns_to_ms_near_bounds(self):
313
+ # GH#55979
314
+ ts = pd.Timestamp("1677-09-21 00:12:43.145225")
315
+ target = ts.as_unit("ms")
316
+
317
+ dta = DatetimeArray._from_sequence([ts], dtype="M8[ns]")
318
+ assert (dta.view("i8") == ts.as_unit("ns").value).all()
319
+
320
+ result = dta.astype("M8[ms]")
321
+ assert result[0] == target
322
+
323
+ expected = DatetimeArray._from_sequence([ts], dtype="M8[ms]")
324
+ assert (expected.view("i8") == target._value).all()
325
+
326
+ tm.assert_datetime_array_equal(result, expected)
327
+
328
+ def test_astype_non_nano_tznaive(self):
329
+ dti = pd.date_range("2016-01-01", periods=3)
330
+
331
+ res = dti.astype("M8[s]")
332
+ assert res.dtype == "M8[s]"
333
+
334
+ dta = dti._data
335
+ res = dta.astype("M8[s]")
336
+ assert res.dtype == "M8[s]"
337
+ assert isinstance(res, pd.core.arrays.DatetimeArray) # used to be ndarray
338
+
339
+ def test_astype_non_nano_tzaware(self):
340
+ dti = pd.date_range("2016-01-01", periods=3, tz="UTC")
341
+
342
+ res = dti.astype("M8[s, US/Pacific]")
343
+ assert res.dtype == "M8[s, US/Pacific]"
344
+
345
+ dta = dti._data
346
+ res = dta.astype("M8[s, US/Pacific]")
347
+ assert res.dtype == "M8[s, US/Pacific]"
348
+
349
+ # from non-nano to non-nano, preserving reso
350
+ res2 = res.astype("M8[s, UTC]")
351
+ assert res2.dtype == "M8[s, UTC]"
352
+ assert not tm.shares_memory(res2, res)
353
+
354
+ res3 = res.astype("M8[s, UTC]", copy=False)
355
+ assert res2.dtype == "M8[s, UTC]"
356
+ assert tm.shares_memory(res3, res)
357
+
358
+ def test_astype_to_same(self):
359
+ arr = DatetimeArray._from_sequence(
360
+ ["2000"], dtype=DatetimeTZDtype(tz="US/Central")
361
+ )
362
+ result = arr.astype(DatetimeTZDtype(tz="US/Central"), copy=False)
363
+ assert result is arr
364
+
365
+ @pytest.mark.parametrize("dtype", ["datetime64[ns]", "datetime64[ns, UTC]"])
366
+ @pytest.mark.parametrize(
367
+ "other", ["datetime64[ns]", "datetime64[ns, UTC]", "datetime64[ns, CET]"]
368
+ )
369
+ def test_astype_copies(self, dtype, other):
370
+ # https://github.com/pandas-dev/pandas/pull/32490
371
+ ser = pd.Series([1, 2], dtype=dtype)
372
+ orig = ser.copy()
373
+
374
+ err = False
375
+ if (dtype == "datetime64[ns]") ^ (other == "datetime64[ns]"):
376
+ # deprecated in favor of tz_localize
377
+ err = True
378
+
379
+ if err:
380
+ if dtype == "datetime64[ns]":
381
+ msg = "Use obj.tz_localize instead or series.dt.tz_localize instead"
382
+ else:
383
+ msg = "from timezone-aware dtype to timezone-naive dtype"
384
+ with pytest.raises(TypeError, match=msg):
385
+ ser.astype(other)
386
+ else:
387
+ t = ser.astype(other)
388
+ t[:] = pd.NaT
389
+ tm.assert_series_equal(ser, orig)
390
+
391
+ @pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"])
392
+ def test_astype_int(self, dtype):
393
+ arr = DatetimeArray._from_sequence(
394
+ [pd.Timestamp("2000"), pd.Timestamp("2001")], dtype="M8[ns]"
395
+ )
396
+
397
+ if np.dtype(dtype) != np.int64:
398
+ with pytest.raises(TypeError, match=r"Do obj.astype\('int64'\)"):
399
+ arr.astype(dtype)
400
+ return
401
+
402
+ result = arr.astype(dtype)
403
+ expected = arr._ndarray.view("i8")
404
+ tm.assert_numpy_array_equal(result, expected)
405
+
406
+ def test_astype_to_sparse_dt64(self):
407
+ # GH#50082
408
+ dti = pd.date_range("2016-01-01", periods=4)
409
+ dta = dti._data
410
+ result = dta.astype("Sparse[datetime64[ns]]")
411
+
412
+ assert result.dtype == "Sparse[datetime64[ns]]"
413
+ assert (result == dta).all()
414
+
415
+ def test_tz_setter_raises(self):
416
+ arr = DatetimeArray._from_sequence(
417
+ ["2000"], dtype=DatetimeTZDtype(tz="US/Central")
418
+ )
419
+ with pytest.raises(AttributeError, match="tz_localize"):
420
+ arr.tz = "UTC"
421
+
422
+ def test_setitem_str_impute_tz(self, tz_naive_fixture):
423
+ # Like for getitem, if we are passed a naive-like string, we impute
424
+ # our own timezone.
425
+ tz = tz_naive_fixture
426
+
427
+ data = np.array([1, 2, 3], dtype="M8[ns]")
428
+ dtype = data.dtype if tz is None else DatetimeTZDtype(tz=tz)
429
+ arr = DatetimeArray._from_sequence(data, dtype=dtype)
430
+ expected = arr.copy()
431
+
432
+ ts = pd.Timestamp("2020-09-08 16:50").tz_localize(tz)
433
+ setter = str(ts.tz_localize(None))
434
+
435
+ # Setting a scalar tznaive string
436
+ expected[0] = ts
437
+ arr[0] = setter
438
+ tm.assert_equal(arr, expected)
439
+
440
+ # Setting a listlike of tznaive strings
441
+ expected[1] = ts
442
+ arr[:2] = [setter, setter]
443
+ tm.assert_equal(arr, expected)
444
+
445
+ def test_setitem_different_tz_raises(self):
446
+ # pre-2.0 we required exact tz match, in 2.0 we require only
447
+ # tzawareness-match
448
+ data = np.array([1, 2, 3], dtype="M8[ns]")
449
+ arr = DatetimeArray._from_sequence(
450
+ data, copy=False, dtype=DatetimeTZDtype(tz="US/Central")
451
+ )
452
+ with pytest.raises(TypeError, match="Cannot compare tz-naive and tz-aware"):
453
+ arr[0] = pd.Timestamp("2000")
454
+
455
+ ts = pd.Timestamp("2000", tz="US/Eastern")
456
+ arr[0] = ts
457
+ assert arr[0] == ts.tz_convert("US/Central")
458
+
459
+ def test_setitem_clears_freq(self):
460
+ a = pd.date_range("2000", periods=2, freq="D", tz="US/Central")._data
461
+ a[0] = pd.Timestamp("2000", tz="US/Central")
462
+ assert a.freq is None
463
+
464
+ @pytest.mark.parametrize(
465
+ "obj",
466
+ [
467
+ pd.Timestamp("2021-01-01"),
468
+ pd.Timestamp("2021-01-01").to_datetime64(),
469
+ pd.Timestamp("2021-01-01").to_pydatetime(),
470
+ ],
471
+ )
472
+ def test_setitem_objects(self, obj):
473
+ # make sure we accept datetime64 and datetime in addition to Timestamp
474
+ dti = pd.date_range("2000", periods=2, freq="D")
475
+ arr = dti._data
476
+
477
+ arr[0] = obj
478
+ assert arr[0] == obj
479
+
480
+ def test_repeat_preserves_tz(self):
481
+ dti = pd.date_range("2000", periods=2, freq="D", tz="US/Central")
482
+ arr = dti._data
483
+
484
+ repeated = arr.repeat([1, 1])
485
+
486
+ # preserves tz and values, but not freq
487
+ expected = DatetimeArray._from_sequence(arr.asi8, dtype=arr.dtype)
488
+ tm.assert_equal(repeated, expected)
489
+
490
+ def test_value_counts_preserves_tz(self):
491
+ dti = pd.date_range("2000", periods=2, freq="D", tz="US/Central")
492
+ arr = dti._data.repeat([4, 3])
493
+
494
+ result = arr.value_counts()
495
+
496
+ # Note: not tm.assert_index_equal, since `freq`s do not match
497
+ assert result.index.equals(dti)
498
+
499
+ arr[-2] = pd.NaT
500
+ result = arr.value_counts(dropna=False)
501
+ expected = pd.Series([4, 2, 1], index=[dti[0], dti[1], pd.NaT], name="count")
502
+ tm.assert_series_equal(result, expected)
503
+
504
+ @pytest.mark.parametrize("method", ["pad", "backfill"])
505
+ def test_fillna_preserves_tz(self, method):
506
+ dti = pd.date_range("2000-01-01", periods=5, freq="D", tz="US/Central")
507
+ arr = DatetimeArray._from_sequence(dti, copy=True)
508
+ arr[2] = pd.NaT
509
+
510
+ fill_val = dti[1] if method == "pad" else dti[3]
511
+ expected = DatetimeArray._from_sequence(
512
+ [dti[0], dti[1], fill_val, dti[3], dti[4]],
513
+ dtype=DatetimeTZDtype(tz="US/Central"),
514
+ )
515
+
516
+ result = arr._pad_or_backfill(method=method)
517
+ tm.assert_extension_array_equal(result, expected)
518
+
519
+ # assert that arr and dti were not modified in-place
520
+ assert arr[2] is pd.NaT
521
+ assert dti[2] == pd.Timestamp("2000-01-03", tz="US/Central")
522
+
523
+ def test_fillna_2d(self):
524
+ dti = pd.date_range("2016-01-01", periods=6, tz="US/Pacific")
525
+ dta = dti._data.reshape(3, 2).copy()
526
+ dta[0, 1] = pd.NaT
527
+ dta[1, 0] = pd.NaT
528
+
529
+ res1 = dta._pad_or_backfill(method="pad")
530
+ expected1 = dta.copy()
531
+ expected1[1, 0] = dta[0, 0]
532
+ tm.assert_extension_array_equal(res1, expected1)
533
+
534
+ res2 = dta._pad_or_backfill(method="backfill")
535
+ expected2 = dta.copy()
536
+ expected2 = dta.copy()
537
+ expected2[1, 0] = dta[2, 0]
538
+ expected2[0, 1] = dta[1, 1]
539
+ tm.assert_extension_array_equal(res2, expected2)
540
+
541
+ # with different ordering for underlying ndarray; behavior should
542
+ # be unchanged
543
+ dta2 = dta._from_backing_data(dta._ndarray.copy(order="F"))
544
+ assert dta2._ndarray.flags["F_CONTIGUOUS"]
545
+ assert not dta2._ndarray.flags["C_CONTIGUOUS"]
546
+ tm.assert_extension_array_equal(dta, dta2)
547
+
548
+ res3 = dta2._pad_or_backfill(method="pad")
549
+ tm.assert_extension_array_equal(res3, expected1)
550
+
551
+ res4 = dta2._pad_or_backfill(method="backfill")
552
+ tm.assert_extension_array_equal(res4, expected2)
553
+
554
+ # test the DataFrame method while we're here
555
+ df = pd.DataFrame(dta)
556
+ res = df.ffill()
557
+ expected = pd.DataFrame(expected1)
558
+ tm.assert_frame_equal(res, expected)
559
+
560
+ res = df.bfill()
561
+ expected = pd.DataFrame(expected2)
562
+ tm.assert_frame_equal(res, expected)
563
+
564
+ def test_array_interface_tz(self):
565
+ tz = "US/Central"
566
+ data = pd.date_range("2017", periods=2, tz=tz)._data
567
+ result = np.asarray(data)
568
+
569
+ expected = np.array(
570
+ [
571
+ pd.Timestamp("2017-01-01T00:00:00", tz=tz),
572
+ pd.Timestamp("2017-01-02T00:00:00", tz=tz),
573
+ ],
574
+ dtype=object,
575
+ )
576
+ tm.assert_numpy_array_equal(result, expected)
577
+
578
+ result = np.asarray(data, dtype=object)
579
+ tm.assert_numpy_array_equal(result, expected)
580
+
581
+ result = np.asarray(data, dtype="M8[ns]")
582
+
583
+ expected = np.array(
584
+ ["2017-01-01T06:00:00", "2017-01-02T06:00:00"], dtype="M8[ns]"
585
+ )
586
+ tm.assert_numpy_array_equal(result, expected)
587
+
588
+ def test_array_interface(self):
589
+ data = pd.date_range("2017", periods=2)._data
590
+ expected = np.array(
591
+ ["2017-01-01T00:00:00", "2017-01-02T00:00:00"], dtype="datetime64[ns]"
592
+ )
593
+
594
+ result = np.asarray(data)
595
+ tm.assert_numpy_array_equal(result, expected)
596
+
597
+ result = np.asarray(data, dtype=object)
598
+ expected = np.array(
599
+ [pd.Timestamp("2017-01-01T00:00:00"), pd.Timestamp("2017-01-02T00:00:00")],
600
+ dtype=object,
601
+ )
602
+ tm.assert_numpy_array_equal(result, expected)
603
+
604
+ @pytest.mark.parametrize("index", [True, False])
605
+ def test_searchsorted_different_tz(self, index):
606
+ data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
607
+ arr = pd.DatetimeIndex(data, freq="D")._data.tz_localize("Asia/Tokyo")
608
+ if index:
609
+ arr = pd.Index(arr)
610
+
611
+ expected = arr.searchsorted(arr[2])
612
+ result = arr.searchsorted(arr[2].tz_convert("UTC"))
613
+ assert result == expected
614
+
615
+ expected = arr.searchsorted(arr[2:6])
616
+ result = arr.searchsorted(arr[2:6].tz_convert("UTC"))
617
+ tm.assert_equal(result, expected)
618
+
619
+ @pytest.mark.parametrize("index", [True, False])
620
+ def test_searchsorted_tzawareness_compat(self, index):
621
+ data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
622
+ arr = pd.DatetimeIndex(data, freq="D")._data
623
+ if index:
624
+ arr = pd.Index(arr)
625
+
626
+ mismatch = arr.tz_localize("Asia/Tokyo")
627
+
628
+ msg = "Cannot compare tz-naive and tz-aware datetime-like objects"
629
+ with pytest.raises(TypeError, match=msg):
630
+ arr.searchsorted(mismatch[0])
631
+ with pytest.raises(TypeError, match=msg):
632
+ arr.searchsorted(mismatch)
633
+
634
+ with pytest.raises(TypeError, match=msg):
635
+ mismatch.searchsorted(arr[0])
636
+ with pytest.raises(TypeError, match=msg):
637
+ mismatch.searchsorted(arr)
638
+
639
+ @pytest.mark.parametrize(
640
+ "other",
641
+ [
642
+ 1,
643
+ np.int64(1),
644
+ 1.0,
645
+ np.timedelta64("NaT"),
646
+ pd.Timedelta(days=2),
647
+ "invalid",
648
+ np.arange(10, dtype="i8") * 24 * 3600 * 10**9,
649
+ np.arange(10).view("timedelta64[ns]") * 24 * 3600 * 10**9,
650
+ pd.Timestamp("2021-01-01").to_period("D"),
651
+ ],
652
+ )
653
+ @pytest.mark.parametrize("index", [True, False])
654
+ def test_searchsorted_invalid_types(self, other, index):
655
+ data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
656
+ arr = pd.DatetimeIndex(data, freq="D")._data
657
+ if index:
658
+ arr = pd.Index(arr)
659
+
660
+ msg = "|".join(
661
+ [
662
+ "searchsorted requires compatible dtype or scalar",
663
+ "value should be a 'Timestamp', 'NaT', or array of those. Got",
664
+ ]
665
+ )
666
+ with pytest.raises(TypeError, match=msg):
667
+ arr.searchsorted(other)
668
+
669
+ def test_shift_fill_value(self):
670
+ dti = pd.date_range("2016-01-01", periods=3)
671
+
672
+ dta = dti._data
673
+ expected = DatetimeArray._from_sequence(np.roll(dta._ndarray, 1))
674
+
675
+ fv = dta[-1]
676
+ for fill_value in [fv, fv.to_pydatetime(), fv.to_datetime64()]:
677
+ result = dta.shift(1, fill_value=fill_value)
678
+ tm.assert_datetime_array_equal(result, expected)
679
+
680
+ dta = dta.tz_localize("UTC")
681
+ expected = expected.tz_localize("UTC")
682
+ fv = dta[-1]
683
+ for fill_value in [fv, fv.to_pydatetime()]:
684
+ result = dta.shift(1, fill_value=fill_value)
685
+ tm.assert_datetime_array_equal(result, expected)
686
+
687
+ def test_shift_value_tzawareness_mismatch(self):
688
+ dti = pd.date_range("2016-01-01", periods=3)
689
+
690
+ dta = dti._data
691
+
692
+ fv = dta[-1].tz_localize("UTC")
693
+ for invalid in [fv, fv.to_pydatetime()]:
694
+ with pytest.raises(TypeError, match="Cannot compare"):
695
+ dta.shift(1, fill_value=invalid)
696
+
697
+ dta = dta.tz_localize("UTC")
698
+ fv = dta[-1].tz_localize(None)
699
+ for invalid in [fv, fv.to_pydatetime(), fv.to_datetime64()]:
700
+ with pytest.raises(TypeError, match="Cannot compare"):
701
+ dta.shift(1, fill_value=invalid)
702
+
703
+ def test_shift_requires_tzmatch(self):
704
+ # pre-2.0 we required exact tz match, in 2.0 we require just
705
+ # matching tzawareness
706
+ dti = pd.date_range("2016-01-01", periods=3, tz="UTC")
707
+ dta = dti._data
708
+
709
+ fill_value = pd.Timestamp("2020-10-18 18:44", tz="US/Pacific")
710
+
711
+ result = dta.shift(1, fill_value=fill_value)
712
+ expected = dta.shift(1, fill_value=fill_value.tz_convert("UTC"))
713
+ tm.assert_equal(result, expected)
714
+
715
+ def test_tz_localize_t2d(self):
716
+ dti = pd.date_range("1994-05-12", periods=12, tz="US/Pacific")
717
+ dta = dti._data.reshape(3, 4)
718
+ result = dta.tz_localize(None)
719
+
720
+ expected = dta.ravel().tz_localize(None).reshape(dta.shape)
721
+ tm.assert_datetime_array_equal(result, expected)
722
+
723
+ roundtrip = expected.tz_localize("US/Pacific")
724
+ tm.assert_datetime_array_equal(roundtrip, dta)
725
+
726
+ easts = ["US/Eastern", "dateutil/US/Eastern"]
727
+ if ZoneInfo is not None:
728
+ try:
729
+ tz = ZoneInfo("US/Eastern")
730
+ except KeyError:
731
+ # no tzdata
732
+ pass
733
+ else:
734
+ # Argument 1 to "append" of "list" has incompatible type "ZoneInfo";
735
+ # expected "str"
736
+ easts.append(tz) # type: ignore[arg-type]
737
+
738
+ @pytest.mark.parametrize("tz", easts)
739
+ def test_iter_zoneinfo_fold(self, tz):
740
+ # GH#49684
741
+ utc_vals = np.array(
742
+ [1320552000, 1320555600, 1320559200, 1320562800], dtype=np.int64
743
+ )
744
+ utc_vals *= 1_000_000_000
745
+
746
+ dta = DatetimeArray._from_sequence(utc_vals).tz_localize("UTC").tz_convert(tz)
747
+
748
+ left = dta[2]
749
+ right = list(dta)[2]
750
+ assert str(left) == str(right)
751
+ # previously there was a bug where with non-pytz right would be
752
+ # Timestamp('2011-11-06 01:00:00-0400', tz='US/Eastern')
753
+ # while left would be
754
+ # Timestamp('2011-11-06 01:00:00-0500', tz='US/Eastern')
755
+ # The .value's would match (so they would compare as equal),
756
+ # but the folds would not
757
+ assert left.utcoffset() == right.utcoffset()
758
+
759
+ # The same bug in ints_to_pydatetime affected .astype, so we test
760
+ # that here.
761
+ right2 = dta.astype(object)[2]
762
+ assert str(left) == str(right2)
763
+ assert left.utcoffset() == right2.utcoffset()
764
+
765
+ @pytest.mark.parametrize(
766
+ "freq, freq_depr",
767
+ [
768
+ ("2ME", "2M"),
769
+ ("2SME", "2SM"),
770
+ ("2SME", "2sm"),
771
+ ("2QE", "2Q"),
772
+ ("2QE-SEP", "2Q-SEP"),
773
+ ("1YE", "1Y"),
774
+ ("2YE-MAR", "2Y-MAR"),
775
+ ("1YE", "1A"),
776
+ ("2YE-MAR", "2A-MAR"),
777
+ ("2ME", "2m"),
778
+ ("2QE-SEP", "2q-sep"),
779
+ ("2YE-MAR", "2a-mar"),
780
+ ("2YE", "2y"),
781
+ ],
782
+ )
783
+ def test_date_range_frequency_M_Q_Y_A_deprecated(self, freq, freq_depr):
784
+ # GH#9586, GH#54275
785
+ depr_msg = f"'{freq_depr[1:]}' is deprecated and will be removed "
786
+ f"in a future version, please use '{freq[1:]}' instead."
787
+
788
+ expected = pd.date_range("1/1/2000", periods=4, freq=freq)
789
+ with tm.assert_produces_warning(FutureWarning, match=depr_msg):
790
+ result = pd.date_range("1/1/2000", periods=4, freq=freq_depr)
791
+ tm.assert_index_equal(result, expected)
792
+
793
+ @pytest.mark.parametrize("freq_depr", ["2H", "2CBH", "2MIN", "2S", "2mS", "2Us"])
794
+ def test_date_range_uppercase_frequency_deprecated(self, freq_depr):
795
+ # GH#9586, GH#54939
796
+ depr_msg = f"'{freq_depr[1:]}' is deprecated and will be removed in a "
797
+ f"future version. Please use '{freq_depr.lower()[1:]}' instead."
798
+
799
+ expected = pd.date_range("1/1/2000", periods=4, freq=freq_depr.lower())
800
+ with tm.assert_produces_warning(FutureWarning, match=depr_msg):
801
+ result = pd.date_range("1/1/2000", periods=4, freq=freq_depr)
802
+ tm.assert_index_equal(result, expected)
803
+
804
+ @pytest.mark.parametrize(
805
+ "freq_depr",
806
+ [
807
+ "2ye-mar",
808
+ "2ys",
809
+ "2qe",
810
+ "2qs-feb",
811
+ "2bqs",
812
+ "2sms",
813
+ "2bms",
814
+ "2cbme",
815
+ "2me",
816
+ "2w",
817
+ ],
818
+ )
819
+ def test_date_range_lowercase_frequency_deprecated(self, freq_depr):
820
+ # GH#9586, GH#54939
821
+ depr_msg = f"'{freq_depr[1:]}' is deprecated and will be removed in a "
822
+ f"future version, please use '{freq_depr.upper()[1:]}' instead."
823
+
824
+ expected = pd.date_range("1/1/2000", periods=4, freq=freq_depr.upper())
825
+ with tm.assert_produces_warning(FutureWarning, match=depr_msg):
826
+ result = pd.date_range("1/1/2000", periods=4, freq=freq_depr)
827
+ tm.assert_index_equal(result, expected)
828
+
829
+
830
+ def test_factorize_sort_without_freq():
831
+ dta = DatetimeArray._from_sequence([0, 2, 1], dtype="M8[ns]")
832
+
833
+ msg = r"call pd.factorize\(obj, sort=True\) instead"
834
+ with pytest.raises(NotImplementedError, match=msg):
835
+ dta.factorize(sort=True)
836
+
837
+ # Do TimedeltaArray while we're here
838
+ tda = dta - dta[0]
839
+ with pytest.raises(NotImplementedError, match=msg):
840
+ tda.factorize(sort=True)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/arrays/test_ndarray_backed.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Tests for subclasses of NDArrayBackedExtensionArray
3
+ """
4
+ import numpy as np
5
+
6
+ from pandas import (
7
+ CategoricalIndex,
8
+ date_range,
9
+ )
10
+ from pandas.core.arrays import (
11
+ Categorical,
12
+ DatetimeArray,
13
+ NumpyExtensionArray,
14
+ TimedeltaArray,
15
+ )
16
+
17
+
18
+ class TestEmpty:
19
+ def test_empty_categorical(self):
20
+ ci = CategoricalIndex(["a", "b", "c"], ordered=True)
21
+ dtype = ci.dtype
22
+
23
+ # case with int8 codes
24
+ shape = (4,)
25
+ result = Categorical._empty(shape, dtype=dtype)
26
+ assert isinstance(result, Categorical)
27
+ assert result.shape == shape
28
+ assert result._ndarray.dtype == np.int8
29
+
30
+ # case where repr would segfault if we didn't override base implementation
31
+ result = Categorical._empty((4096,), dtype=dtype)
32
+ assert isinstance(result, Categorical)
33
+ assert result.shape == (4096,)
34
+ assert result._ndarray.dtype == np.int8
35
+ repr(result)
36
+
37
+ # case with int16 codes
38
+ ci = CategoricalIndex(list(range(512)) * 4, ordered=False)
39
+ dtype = ci.dtype
40
+ result = Categorical._empty(shape, dtype=dtype)
41
+ assert isinstance(result, Categorical)
42
+ assert result.shape == shape
43
+ assert result._ndarray.dtype == np.int16
44
+
45
+ def test_empty_dt64tz(self):
46
+ dti = date_range("2016-01-01", periods=2, tz="Asia/Tokyo")
47
+ dtype = dti.dtype
48
+
49
+ shape = (0,)
50
+ result = DatetimeArray._empty(shape, dtype=dtype)
51
+ assert result.dtype == dtype
52
+ assert isinstance(result, DatetimeArray)
53
+ assert result.shape == shape
54
+
55
+ def test_empty_dt64(self):
56
+ shape = (3, 9)
57
+ result = DatetimeArray._empty(shape, dtype="datetime64[ns]")
58
+ assert isinstance(result, DatetimeArray)
59
+ assert result.shape == shape
60
+
61
+ def test_empty_td64(self):
62
+ shape = (3, 9)
63
+ result = TimedeltaArray._empty(shape, dtype="m8[ns]")
64
+ assert isinstance(result, TimedeltaArray)
65
+ assert result.shape == shape
66
+
67
+ def test_empty_pandas_array(self):
68
+ arr = NumpyExtensionArray(np.array([1, 2]))
69
+ dtype = arr.dtype
70
+
71
+ shape = (3, 9)
72
+ result = NumpyExtensionArray._empty(shape, dtype=dtype)
73
+ assert isinstance(result, NumpyExtensionArray)
74
+ assert result.dtype == dtype
75
+ assert result.shape == shape
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/arrays/test_period.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ from pandas._libs.tslibs import iNaT
5
+ from pandas._libs.tslibs.period import IncompatibleFrequency
6
+
7
+ from pandas.core.dtypes.base import _registry as registry
8
+ from pandas.core.dtypes.dtypes import PeriodDtype
9
+
10
+ import pandas as pd
11
+ import pandas._testing as tm
12
+ from pandas.core.arrays import PeriodArray
13
+
14
+ # ----------------------------------------------------------------------------
15
+ # Dtype
16
+
17
+
18
+ def test_registered():
19
+ assert PeriodDtype in registry.dtypes
20
+ result = registry.find("Period[D]")
21
+ expected = PeriodDtype("D")
22
+ assert result == expected
23
+
24
+
25
+ # ----------------------------------------------------------------------------
26
+ # period_array
27
+
28
+
29
+ def test_asi8():
30
+ result = PeriodArray._from_sequence(["2000", "2001", None], dtype="period[D]").asi8
31
+ expected = np.array([10957, 11323, iNaT])
32
+ tm.assert_numpy_array_equal(result, expected)
33
+
34
+
35
+ def test_take_raises():
36
+ arr = PeriodArray._from_sequence(["2000", "2001"], dtype="period[D]")
37
+ with pytest.raises(IncompatibleFrequency, match="freq"):
38
+ arr.take([0, -1], allow_fill=True, fill_value=pd.Period("2000", freq="W"))
39
+
40
+ msg = "value should be a 'Period' or 'NaT'. Got 'str' instead"
41
+ with pytest.raises(TypeError, match=msg):
42
+ arr.take([0, -1], allow_fill=True, fill_value="foo")
43
+
44
+
45
+ def test_fillna_raises():
46
+ arr = PeriodArray._from_sequence(["2000", "2001", "2002"], dtype="period[D]")
47
+ with pytest.raises(ValueError, match="Length"):
48
+ arr.fillna(arr[:2])
49
+
50
+
51
+ def test_fillna_copies():
52
+ arr = PeriodArray._from_sequence(["2000", "2001", "2002"], dtype="period[D]")
53
+ result = arr.fillna(pd.Period("2000", "D"))
54
+ assert result is not arr
55
+
56
+
57
+ # ----------------------------------------------------------------------------
58
+ # setitem
59
+
60
+
61
+ @pytest.mark.parametrize(
62
+ "key, value, expected",
63
+ [
64
+ ([0], pd.Period("2000", "D"), [10957, 1, 2]),
65
+ ([0], None, [iNaT, 1, 2]),
66
+ ([0], np.nan, [iNaT, 1, 2]),
67
+ ([0, 1, 2], pd.Period("2000", "D"), [10957] * 3),
68
+ (
69
+ [0, 1, 2],
70
+ [pd.Period("2000", "D"), pd.Period("2001", "D"), pd.Period("2002", "D")],
71
+ [10957, 11323, 11688],
72
+ ),
73
+ ],
74
+ )
75
+ def test_setitem(key, value, expected):
76
+ arr = PeriodArray(np.arange(3), dtype="period[D]")
77
+ expected = PeriodArray(expected, dtype="period[D]")
78
+ arr[key] = value
79
+ tm.assert_period_array_equal(arr, expected)
80
+
81
+
82
+ def test_setitem_raises_incompatible_freq():
83
+ arr = PeriodArray(np.arange(3), dtype="period[D]")
84
+ with pytest.raises(IncompatibleFrequency, match="freq"):
85
+ arr[0] = pd.Period("2000", freq="Y")
86
+
87
+ other = PeriodArray._from_sequence(["2000", "2001"], dtype="period[Y]")
88
+ with pytest.raises(IncompatibleFrequency, match="freq"):
89
+ arr[[0, 1]] = other
90
+
91
+
92
+ def test_setitem_raises_length():
93
+ arr = PeriodArray(np.arange(3), dtype="period[D]")
94
+ with pytest.raises(ValueError, match="length"):
95
+ arr[[0, 1]] = [pd.Period("2000", freq="D")]
96
+
97
+
98
+ def test_setitem_raises_type():
99
+ arr = PeriodArray(np.arange(3), dtype="period[D]")
100
+ with pytest.raises(TypeError, match="int"):
101
+ arr[0] = 1
102
+
103
+
104
+ # ----------------------------------------------------------------------------
105
+ # Ops
106
+
107
+
108
+ def test_sub_period():
109
+ arr = PeriodArray._from_sequence(["2000", "2001"], dtype="period[D]")
110
+ other = pd.Period("2000", freq="M")
111
+ with pytest.raises(IncompatibleFrequency, match="freq"):
112
+ arr - other
113
+
114
+
115
+ def test_sub_period_overflow():
116
+ # GH#47538
117
+ dti = pd.date_range("1677-09-22", periods=2, freq="D")
118
+ pi = dti.to_period("ns")
119
+
120
+ per = pd.Period._from_ordinal(10**14, pi.freq)
121
+
122
+ with pytest.raises(OverflowError, match="Overflow in int64 addition"):
123
+ pi - per
124
+
125
+ with pytest.raises(OverflowError, match="Overflow in int64 addition"):
126
+ per - pi
127
+
128
+
129
+ # ----------------------------------------------------------------------------
130
+ # Methods
131
+
132
+
133
+ @pytest.mark.parametrize(
134
+ "other",
135
+ [
136
+ pd.Period("2000", freq="h"),
137
+ PeriodArray._from_sequence(["2000", "2001", "2000"], dtype="period[h]"),
138
+ ],
139
+ )
140
+ def test_where_different_freq_raises(other):
141
+ # GH#45768 The PeriodArray method raises, the Series method coerces
142
+ ser = pd.Series(
143
+ PeriodArray._from_sequence(["2000", "2001", "2002"], dtype="period[D]")
144
+ )
145
+ cond = np.array([True, False, True])
146
+
147
+ with pytest.raises(IncompatibleFrequency, match="freq"):
148
+ ser.array._where(cond, other)
149
+
150
+ res = ser.where(cond, other)
151
+ expected = ser.astype(object).where(cond, other)
152
+ tm.assert_series_equal(res, expected)
153
+
154
+
155
+ # ----------------------------------------------------------------------------
156
+ # Printing
157
+
158
+
159
+ def test_repr_small():
160
+ arr = PeriodArray._from_sequence(["2000", "2001"], dtype="period[D]")
161
+ result = str(arr)
162
+ expected = (
163
+ "<PeriodArray>\n['2000-01-01', '2001-01-01']\nLength: 2, dtype: period[D]"
164
+ )
165
+ assert result == expected
166
+
167
+
168
+ def test_repr_large():
169
+ arr = PeriodArray._from_sequence(["2000", "2001"] * 500, dtype="period[D]")
170
+ result = str(arr)
171
+ expected = (
172
+ "<PeriodArray>\n"
173
+ "['2000-01-01', '2001-01-01', '2000-01-01', '2001-01-01', "
174
+ "'2000-01-01',\n"
175
+ " '2001-01-01', '2000-01-01', '2001-01-01', '2000-01-01', "
176
+ "'2001-01-01',\n"
177
+ " ...\n"
178
+ " '2000-01-01', '2001-01-01', '2000-01-01', '2001-01-01', "
179
+ "'2000-01-01',\n"
180
+ " '2001-01-01', '2000-01-01', '2001-01-01', '2000-01-01', "
181
+ "'2001-01-01']\n"
182
+ "Length: 1000, dtype: period[D]"
183
+ )
184
+ assert result == expected
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/arrays/test_timedeltas.py ADDED
@@ -0,0 +1,313 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import timedelta
2
+
3
+ import numpy as np
4
+ import pytest
5
+
6
+ import pandas as pd
7
+ from pandas import Timedelta
8
+ import pandas._testing as tm
9
+ from pandas.core.arrays import (
10
+ DatetimeArray,
11
+ TimedeltaArray,
12
+ )
13
+
14
+
15
+ class TestNonNano:
16
+ @pytest.fixture(params=["s", "ms", "us"])
17
+ def unit(self, request):
18
+ return request.param
19
+
20
+ @pytest.fixture
21
+ def tda(self, unit):
22
+ arr = np.arange(5, dtype=np.int64).view(f"m8[{unit}]")
23
+ return TimedeltaArray._simple_new(arr, dtype=arr.dtype)
24
+
25
+ def test_non_nano(self, unit):
26
+ arr = np.arange(5, dtype=np.int64).view(f"m8[{unit}]")
27
+ tda = TimedeltaArray._simple_new(arr, dtype=arr.dtype)
28
+
29
+ assert tda.dtype == arr.dtype
30
+ assert tda[0].unit == unit
31
+
32
+ def test_as_unit_raises(self, tda):
33
+ # GH#50616
34
+ with pytest.raises(ValueError, match="Supported units"):
35
+ tda.as_unit("D")
36
+
37
+ tdi = pd.Index(tda)
38
+ with pytest.raises(ValueError, match="Supported units"):
39
+ tdi.as_unit("D")
40
+
41
+ @pytest.mark.parametrize("field", TimedeltaArray._field_ops)
42
+ def test_fields(self, tda, field):
43
+ as_nano = tda._ndarray.astype("m8[ns]")
44
+ tda_nano = TimedeltaArray._simple_new(as_nano, dtype=as_nano.dtype)
45
+
46
+ result = getattr(tda, field)
47
+ expected = getattr(tda_nano, field)
48
+ tm.assert_numpy_array_equal(result, expected)
49
+
50
+ def test_to_pytimedelta(self, tda):
51
+ as_nano = tda._ndarray.astype("m8[ns]")
52
+ tda_nano = TimedeltaArray._simple_new(as_nano, dtype=as_nano.dtype)
53
+
54
+ result = tda.to_pytimedelta()
55
+ expected = tda_nano.to_pytimedelta()
56
+ tm.assert_numpy_array_equal(result, expected)
57
+
58
+ def test_total_seconds(self, unit, tda):
59
+ as_nano = tda._ndarray.astype("m8[ns]")
60
+ tda_nano = TimedeltaArray._simple_new(as_nano, dtype=as_nano.dtype)
61
+
62
+ result = tda.total_seconds()
63
+ expected = tda_nano.total_seconds()
64
+ tm.assert_numpy_array_equal(result, expected)
65
+
66
+ def test_timedelta_array_total_seconds(self):
67
+ # GH34290
68
+ expected = Timedelta("2 min").total_seconds()
69
+
70
+ result = pd.array([Timedelta("2 min")]).total_seconds()[0]
71
+ assert result == expected
72
+
73
+ def test_total_seconds_nanoseconds(self):
74
+ # issue #48521
75
+ start_time = pd.Series(["2145-11-02 06:00:00"]).astype("datetime64[ns]")
76
+ end_time = pd.Series(["2145-11-02 07:06:00"]).astype("datetime64[ns]")
77
+ expected = (end_time - start_time).values / np.timedelta64(1, "s")
78
+ result = (end_time - start_time).dt.total_seconds().values
79
+ assert result == expected
80
+
81
+ @pytest.mark.parametrize(
82
+ "nat", [np.datetime64("NaT", "ns"), np.datetime64("NaT", "us")]
83
+ )
84
+ def test_add_nat_datetimelike_scalar(self, nat, tda):
85
+ result = tda + nat
86
+ assert isinstance(result, DatetimeArray)
87
+ assert result._creso == tda._creso
88
+ assert result.isna().all()
89
+
90
+ result = nat + tda
91
+ assert isinstance(result, DatetimeArray)
92
+ assert result._creso == tda._creso
93
+ assert result.isna().all()
94
+
95
+ def test_add_pdnat(self, tda):
96
+ result = tda + pd.NaT
97
+ assert isinstance(result, TimedeltaArray)
98
+ assert result._creso == tda._creso
99
+ assert result.isna().all()
100
+
101
+ result = pd.NaT + tda
102
+ assert isinstance(result, TimedeltaArray)
103
+ assert result._creso == tda._creso
104
+ assert result.isna().all()
105
+
106
+ # TODO: 2022-07-11 this is the only test that gets to DTA.tz_convert
107
+ # or tz_localize with non-nano; implement tests specific to that.
108
+ def test_add_datetimelike_scalar(self, tda, tz_naive_fixture):
109
+ ts = pd.Timestamp("2016-01-01", tz=tz_naive_fixture).as_unit("ns")
110
+
111
+ expected = tda.as_unit("ns") + ts
112
+ res = tda + ts
113
+ tm.assert_extension_array_equal(res, expected)
114
+ res = ts + tda
115
+ tm.assert_extension_array_equal(res, expected)
116
+
117
+ ts += Timedelta(1) # case where we can't cast losslessly
118
+
119
+ exp_values = tda._ndarray + ts.asm8
120
+ expected = (
121
+ DatetimeArray._simple_new(exp_values, dtype=exp_values.dtype)
122
+ .tz_localize("UTC")
123
+ .tz_convert(ts.tz)
124
+ )
125
+
126
+ result = tda + ts
127
+ tm.assert_extension_array_equal(result, expected)
128
+
129
+ result = ts + tda
130
+ tm.assert_extension_array_equal(result, expected)
131
+
132
+ def test_mul_scalar(self, tda):
133
+ other = 2
134
+ result = tda * other
135
+ expected = TimedeltaArray._simple_new(tda._ndarray * other, dtype=tda.dtype)
136
+ tm.assert_extension_array_equal(result, expected)
137
+ assert result._creso == tda._creso
138
+
139
+ def test_mul_listlike(self, tda):
140
+ other = np.arange(len(tda))
141
+ result = tda * other
142
+ expected = TimedeltaArray._simple_new(tda._ndarray * other, dtype=tda.dtype)
143
+ tm.assert_extension_array_equal(result, expected)
144
+ assert result._creso == tda._creso
145
+
146
+ def test_mul_listlike_object(self, tda):
147
+ other = np.arange(len(tda))
148
+ result = tda * other.astype(object)
149
+ expected = TimedeltaArray._simple_new(tda._ndarray * other, dtype=tda.dtype)
150
+ tm.assert_extension_array_equal(result, expected)
151
+ assert result._creso == tda._creso
152
+
153
+ def test_div_numeric_scalar(self, tda):
154
+ other = 2
155
+ result = tda / other
156
+ expected = TimedeltaArray._simple_new(tda._ndarray / other, dtype=tda.dtype)
157
+ tm.assert_extension_array_equal(result, expected)
158
+ assert result._creso == tda._creso
159
+
160
+ def test_div_td_scalar(self, tda):
161
+ other = timedelta(seconds=1)
162
+ result = tda / other
163
+ expected = tda._ndarray / np.timedelta64(1, "s")
164
+ tm.assert_numpy_array_equal(result, expected)
165
+
166
+ def test_div_numeric_array(self, tda):
167
+ other = np.arange(len(tda))
168
+ result = tda / other
169
+ expected = TimedeltaArray._simple_new(tda._ndarray / other, dtype=tda.dtype)
170
+ tm.assert_extension_array_equal(result, expected)
171
+ assert result._creso == tda._creso
172
+
173
+ def test_div_td_array(self, tda):
174
+ other = tda._ndarray + tda._ndarray[-1]
175
+ result = tda / other
176
+ expected = tda._ndarray / other
177
+ tm.assert_numpy_array_equal(result, expected)
178
+
179
+ def test_add_timedeltaarraylike(self, tda):
180
+ tda_nano = tda.astype("m8[ns]")
181
+
182
+ expected = tda_nano * 2
183
+ res = tda_nano + tda
184
+ tm.assert_extension_array_equal(res, expected)
185
+ res = tda + tda_nano
186
+ tm.assert_extension_array_equal(res, expected)
187
+
188
+ expected = tda_nano * 0
189
+ res = tda - tda_nano
190
+ tm.assert_extension_array_equal(res, expected)
191
+
192
+ res = tda_nano - tda
193
+ tm.assert_extension_array_equal(res, expected)
194
+
195
+
196
+ class TestTimedeltaArray:
197
+ @pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"])
198
+ def test_astype_int(self, dtype):
199
+ arr = TimedeltaArray._from_sequence(
200
+ [Timedelta("1h"), Timedelta("2h")], dtype="m8[ns]"
201
+ )
202
+
203
+ if np.dtype(dtype) != np.int64:
204
+ with pytest.raises(TypeError, match=r"Do obj.astype\('int64'\)"):
205
+ arr.astype(dtype)
206
+ return
207
+
208
+ result = arr.astype(dtype)
209
+ expected = arr._ndarray.view("i8")
210
+ tm.assert_numpy_array_equal(result, expected)
211
+
212
+ def test_setitem_clears_freq(self):
213
+ a = pd.timedelta_range("1h", periods=2, freq="h")._data
214
+ a[0] = Timedelta("1h")
215
+ assert a.freq is None
216
+
217
+ @pytest.mark.parametrize(
218
+ "obj",
219
+ [
220
+ Timedelta(seconds=1),
221
+ Timedelta(seconds=1).to_timedelta64(),
222
+ Timedelta(seconds=1).to_pytimedelta(),
223
+ ],
224
+ )
225
+ def test_setitem_objects(self, obj):
226
+ # make sure we accept timedelta64 and timedelta in addition to Timedelta
227
+ tdi = pd.timedelta_range("2 Days", periods=4, freq="h")
228
+ arr = tdi._data
229
+
230
+ arr[0] = obj
231
+ assert arr[0] == Timedelta(seconds=1)
232
+
233
+ @pytest.mark.parametrize(
234
+ "other",
235
+ [
236
+ 1,
237
+ np.int64(1),
238
+ 1.0,
239
+ np.datetime64("NaT"),
240
+ pd.Timestamp("2021-01-01"),
241
+ "invalid",
242
+ np.arange(10, dtype="i8") * 24 * 3600 * 10**9,
243
+ (np.arange(10) * 24 * 3600 * 10**9).view("datetime64[ns]"),
244
+ pd.Timestamp("2021-01-01").to_period("D"),
245
+ ],
246
+ )
247
+ @pytest.mark.parametrize("index", [True, False])
248
+ def test_searchsorted_invalid_types(self, other, index):
249
+ data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
250
+ arr = pd.TimedeltaIndex(data, freq="D")._data
251
+ if index:
252
+ arr = pd.Index(arr)
253
+
254
+ msg = "|".join(
255
+ [
256
+ "searchsorted requires compatible dtype or scalar",
257
+ "value should be a 'Timedelta', 'NaT', or array of those. Got",
258
+ ]
259
+ )
260
+ with pytest.raises(TypeError, match=msg):
261
+ arr.searchsorted(other)
262
+
263
+
264
+ class TestUnaryOps:
265
+ def test_abs(self):
266
+ vals = np.array([-3600 * 10**9, "NaT", 7200 * 10**9], dtype="m8[ns]")
267
+ arr = TimedeltaArray._from_sequence(vals)
268
+
269
+ evals = np.array([3600 * 10**9, "NaT", 7200 * 10**9], dtype="m8[ns]")
270
+ expected = TimedeltaArray._from_sequence(evals)
271
+
272
+ result = abs(arr)
273
+ tm.assert_timedelta_array_equal(result, expected)
274
+
275
+ result2 = np.abs(arr)
276
+ tm.assert_timedelta_array_equal(result2, expected)
277
+
278
+ def test_pos(self):
279
+ vals = np.array([-3600 * 10**9, "NaT", 7200 * 10**9], dtype="m8[ns]")
280
+ arr = TimedeltaArray._from_sequence(vals)
281
+
282
+ result = +arr
283
+ tm.assert_timedelta_array_equal(result, arr)
284
+ assert not tm.shares_memory(result, arr)
285
+
286
+ result2 = np.positive(arr)
287
+ tm.assert_timedelta_array_equal(result2, arr)
288
+ assert not tm.shares_memory(result2, arr)
289
+
290
+ def test_neg(self):
291
+ vals = np.array([-3600 * 10**9, "NaT", 7200 * 10**9], dtype="m8[ns]")
292
+ arr = TimedeltaArray._from_sequence(vals)
293
+
294
+ evals = np.array([3600 * 10**9, "NaT", -7200 * 10**9], dtype="m8[ns]")
295
+ expected = TimedeltaArray._from_sequence(evals)
296
+
297
+ result = -arr
298
+ tm.assert_timedelta_array_equal(result, expected)
299
+
300
+ result2 = np.negative(arr)
301
+ tm.assert_timedelta_array_equal(result2, expected)
302
+
303
+ def test_neg_freq(self):
304
+ tdi = pd.timedelta_range("2 Days", periods=4, freq="h")
305
+ arr = tdi._data
306
+
307
+ expected = -tdi._data
308
+
309
+ result = -arr
310
+ tm.assert_timedelta_array_equal(result, expected)
311
+
312
+ result2 = np.negative(arr)
313
+ tm.assert_timedelta_array_equal(result2, expected)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/base/__init__.py ADDED
File without changes
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/base/common.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any
2
+
3
+ from pandas import Index
4
+
5
+
6
+ def allow_na_ops(obj: Any) -> bool:
7
+ """Whether to skip test cases including NaN"""
8
+ is_bool_index = isinstance(obj, Index) and obj.inferred_type == "boolean"
9
+ return not is_bool_index and obj._can_hold_na
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/base/test_constructors.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime
2
+ import sys
3
+
4
+ import numpy as np
5
+ import pytest
6
+
7
+ from pandas.compat import PYPY
8
+
9
+ import pandas as pd
10
+ from pandas import (
11
+ DataFrame,
12
+ Index,
13
+ Series,
14
+ )
15
+ import pandas._testing as tm
16
+ from pandas.core.accessor import PandasDelegate
17
+ from pandas.core.base import (
18
+ NoNewAttributesMixin,
19
+ PandasObject,
20
+ )
21
+
22
+
23
+ def series_via_frame_from_dict(x, **kwargs):
24
+ return DataFrame({"a": x}, **kwargs)["a"]
25
+
26
+
27
+ def series_via_frame_from_scalar(x, **kwargs):
28
+ return DataFrame(x, **kwargs)[0]
29
+
30
+
31
+ @pytest.fixture(
32
+ params=[
33
+ Series,
34
+ series_via_frame_from_dict,
35
+ series_via_frame_from_scalar,
36
+ Index,
37
+ ],
38
+ ids=["Series", "DataFrame-dict", "DataFrame-array", "Index"],
39
+ )
40
+ def constructor(request):
41
+ return request.param
42
+
43
+
44
+ class TestPandasDelegate:
45
+ class Delegator:
46
+ _properties = ["prop"]
47
+ _methods = ["test_method"]
48
+
49
+ def _set_prop(self, value):
50
+ self.prop = value
51
+
52
+ def _get_prop(self):
53
+ return self.prop
54
+
55
+ prop = property(_get_prop, _set_prop, doc="foo property")
56
+
57
+ def test_method(self, *args, **kwargs):
58
+ """a test method"""
59
+
60
+ class Delegate(PandasDelegate, PandasObject):
61
+ def __init__(self, obj) -> None:
62
+ self.obj = obj
63
+
64
+ def test_invalid_delegation(self):
65
+ # these show that in order for the delegation to work
66
+ # the _delegate_* methods need to be overridden to not raise
67
+ # a TypeError
68
+
69
+ self.Delegate._add_delegate_accessors(
70
+ delegate=self.Delegator,
71
+ accessors=self.Delegator._properties,
72
+ typ="property",
73
+ )
74
+ self.Delegate._add_delegate_accessors(
75
+ delegate=self.Delegator, accessors=self.Delegator._methods, typ="method"
76
+ )
77
+
78
+ delegate = self.Delegate(self.Delegator())
79
+
80
+ msg = "You cannot access the property prop"
81
+ with pytest.raises(TypeError, match=msg):
82
+ delegate.prop
83
+
84
+ msg = "The property prop cannot be set"
85
+ with pytest.raises(TypeError, match=msg):
86
+ delegate.prop = 5
87
+
88
+ msg = "You cannot access the property prop"
89
+ with pytest.raises(TypeError, match=msg):
90
+ delegate.prop
91
+
92
+ @pytest.mark.skipif(PYPY, reason="not relevant for PyPy")
93
+ def test_memory_usage(self):
94
+ # Delegate does not implement memory_usage.
95
+ # Check that we fall back to in-built `__sizeof__`
96
+ # GH 12924
97
+ delegate = self.Delegate(self.Delegator())
98
+ sys.getsizeof(delegate)
99
+
100
+
101
+ class TestNoNewAttributesMixin:
102
+ def test_mixin(self):
103
+ class T(NoNewAttributesMixin):
104
+ pass
105
+
106
+ t = T()
107
+ assert not hasattr(t, "__frozen")
108
+
109
+ t.a = "test"
110
+ assert t.a == "test"
111
+
112
+ t._freeze()
113
+ assert "__frozen" in dir(t)
114
+ assert getattr(t, "__frozen")
115
+ msg = "You cannot add any new attribute"
116
+ with pytest.raises(AttributeError, match=msg):
117
+ t.b = "test"
118
+
119
+ assert not hasattr(t, "b")
120
+
121
+
122
+ class TestConstruction:
123
+ # test certain constructor behaviours on dtype inference across Series,
124
+ # Index and DataFrame
125
+
126
+ @pytest.mark.parametrize(
127
+ "a",
128
+ [
129
+ np.array(["2263-01-01"], dtype="datetime64[D]"),
130
+ np.array([datetime(2263, 1, 1)], dtype=object),
131
+ np.array([np.datetime64("2263-01-01", "D")], dtype=object),
132
+ np.array(["2263-01-01"], dtype=object),
133
+ ],
134
+ ids=[
135
+ "datetime64[D]",
136
+ "object-datetime.datetime",
137
+ "object-numpy-scalar",
138
+ "object-string",
139
+ ],
140
+ )
141
+ def test_constructor_datetime_outofbound(
142
+ self, a, constructor, request, using_infer_string
143
+ ):
144
+ # GH-26853 (+ bug GH-26206 out of bound non-ns unit)
145
+
146
+ # No dtype specified (dtype inference)
147
+ # datetime64[non-ns] raise error, other cases result in object dtype
148
+ # and preserve original data
149
+ if a.dtype.kind == "M":
150
+ # Can't fit in nanosecond bounds -> get the nearest supported unit
151
+ result = constructor(a)
152
+ assert result.dtype == "M8[s]"
153
+ else:
154
+ result = constructor(a)
155
+ if using_infer_string and "object-string" in request.node.callspec.id:
156
+ assert result.dtype == "string"
157
+ else:
158
+ assert result.dtype == "object"
159
+ tm.assert_numpy_array_equal(result.to_numpy(), a)
160
+
161
+ # Explicit dtype specified
162
+ # Forced conversion fails for all -> all cases raise error
163
+ msg = "Out of bounds|Out of bounds .* present at position 0"
164
+ with pytest.raises(pd.errors.OutOfBoundsDatetime, match=msg):
165
+ constructor(a, dtype="datetime64[ns]")
166
+
167
+ def test_constructor_datetime_nonns(self, constructor):
168
+ arr = np.array(["2020-01-01T00:00:00.000000"], dtype="datetime64[us]")
169
+ dta = pd.core.arrays.DatetimeArray._simple_new(arr, dtype=arr.dtype)
170
+ expected = constructor(dta)
171
+ assert expected.dtype == arr.dtype
172
+
173
+ result = constructor(arr)
174
+ tm.assert_equal(result, expected)
175
+
176
+ # https://github.com/pandas-dev/pandas/issues/34843
177
+ arr.flags.writeable = False
178
+ result = constructor(arr)
179
+ tm.assert_equal(result, expected)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/base/test_conversion.py ADDED
@@ -0,0 +1,562 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ from pandas.core.dtypes.dtypes import DatetimeTZDtype
5
+
6
+ import pandas as pd
7
+ from pandas import (
8
+ CategoricalIndex,
9
+ Series,
10
+ Timedelta,
11
+ Timestamp,
12
+ date_range,
13
+ )
14
+ import pandas._testing as tm
15
+ from pandas.core.arrays import (
16
+ DatetimeArray,
17
+ IntervalArray,
18
+ NumpyExtensionArray,
19
+ PeriodArray,
20
+ SparseArray,
21
+ TimedeltaArray,
22
+ )
23
+ from pandas.core.arrays.string_arrow import ArrowStringArrayNumpySemantics
24
+
25
+
26
+ class TestToIterable:
27
+ # test that we convert an iterable to python types
28
+
29
+ dtypes = [
30
+ ("int8", int),
31
+ ("int16", int),
32
+ ("int32", int),
33
+ ("int64", int),
34
+ ("uint8", int),
35
+ ("uint16", int),
36
+ ("uint32", int),
37
+ ("uint64", int),
38
+ ("float16", float),
39
+ ("float32", float),
40
+ ("float64", float),
41
+ ("datetime64[ns]", Timestamp),
42
+ ("datetime64[ns, US/Eastern]", Timestamp),
43
+ ("timedelta64[ns]", Timedelta),
44
+ ]
45
+
46
+ @pytest.mark.parametrize("dtype, rdtype", dtypes)
47
+ @pytest.mark.parametrize(
48
+ "method",
49
+ [
50
+ lambda x: x.tolist(),
51
+ lambda x: x.to_list(),
52
+ lambda x: list(x),
53
+ lambda x: list(x.__iter__()),
54
+ ],
55
+ ids=["tolist", "to_list", "list", "iter"],
56
+ )
57
+ def test_iterable(self, index_or_series, method, dtype, rdtype):
58
+ # gh-10904
59
+ # gh-13258
60
+ # coerce iteration to underlying python / pandas types
61
+ typ = index_or_series
62
+ if dtype == "float16" and issubclass(typ, pd.Index):
63
+ with pytest.raises(NotImplementedError, match="float16 indexes are not "):
64
+ typ([1], dtype=dtype)
65
+ return
66
+ s = typ([1], dtype=dtype)
67
+ result = method(s)[0]
68
+ assert isinstance(result, rdtype)
69
+
70
+ @pytest.mark.parametrize(
71
+ "dtype, rdtype, obj",
72
+ [
73
+ ("object", object, "a"),
74
+ ("object", int, 1),
75
+ ("category", object, "a"),
76
+ ("category", int, 1),
77
+ ],
78
+ )
79
+ @pytest.mark.parametrize(
80
+ "method",
81
+ [
82
+ lambda x: x.tolist(),
83
+ lambda x: x.to_list(),
84
+ lambda x: list(x),
85
+ lambda x: list(x.__iter__()),
86
+ ],
87
+ ids=["tolist", "to_list", "list", "iter"],
88
+ )
89
+ def test_iterable_object_and_category(
90
+ self, index_or_series, method, dtype, rdtype, obj
91
+ ):
92
+ # gh-10904
93
+ # gh-13258
94
+ # coerce iteration to underlying python / pandas types
95
+ typ = index_or_series
96
+ s = typ([obj], dtype=dtype)
97
+ result = method(s)[0]
98
+ assert isinstance(result, rdtype)
99
+
100
+ @pytest.mark.parametrize("dtype, rdtype", dtypes)
101
+ def test_iterable_items(self, dtype, rdtype):
102
+ # gh-13258
103
+ # test if items yields the correct boxed scalars
104
+ # this only applies to series
105
+ s = Series([1], dtype=dtype)
106
+ _, result = next(iter(s.items()))
107
+ assert isinstance(result, rdtype)
108
+
109
+ _, result = next(iter(s.items()))
110
+ assert isinstance(result, rdtype)
111
+
112
+ @pytest.mark.parametrize(
113
+ "dtype, rdtype", dtypes + [("object", int), ("category", int)]
114
+ )
115
+ def test_iterable_map(self, index_or_series, dtype, rdtype):
116
+ # gh-13236
117
+ # coerce iteration to underlying python / pandas types
118
+ typ = index_or_series
119
+ if dtype == "float16" and issubclass(typ, pd.Index):
120
+ with pytest.raises(NotImplementedError, match="float16 indexes are not "):
121
+ typ([1], dtype=dtype)
122
+ return
123
+ s = typ([1], dtype=dtype)
124
+ result = s.map(type)[0]
125
+ if not isinstance(rdtype, tuple):
126
+ rdtype = (rdtype,)
127
+ assert result in rdtype
128
+
129
+ @pytest.mark.parametrize(
130
+ "method",
131
+ [
132
+ lambda x: x.tolist(),
133
+ lambda x: x.to_list(),
134
+ lambda x: list(x),
135
+ lambda x: list(x.__iter__()),
136
+ ],
137
+ ids=["tolist", "to_list", "list", "iter"],
138
+ )
139
+ def test_categorial_datetimelike(self, method):
140
+ i = CategoricalIndex([Timestamp("1999-12-31"), Timestamp("2000-12-31")])
141
+
142
+ result = method(i)[0]
143
+ assert isinstance(result, Timestamp)
144
+
145
+ def test_iter_box_dt64(self, unit):
146
+ vals = [Timestamp("2011-01-01"), Timestamp("2011-01-02")]
147
+ ser = Series(vals).dt.as_unit(unit)
148
+ assert ser.dtype == f"datetime64[{unit}]"
149
+ for res, exp in zip(ser, vals):
150
+ assert isinstance(res, Timestamp)
151
+ assert res.tz is None
152
+ assert res == exp
153
+ assert res.unit == unit
154
+
155
+ def test_iter_box_dt64tz(self, unit):
156
+ vals = [
157
+ Timestamp("2011-01-01", tz="US/Eastern"),
158
+ Timestamp("2011-01-02", tz="US/Eastern"),
159
+ ]
160
+ ser = Series(vals).dt.as_unit(unit)
161
+
162
+ assert ser.dtype == f"datetime64[{unit}, US/Eastern]"
163
+ for res, exp in zip(ser, vals):
164
+ assert isinstance(res, Timestamp)
165
+ assert res.tz == exp.tz
166
+ assert res == exp
167
+ assert res.unit == unit
168
+
169
+ def test_iter_box_timedelta64(self, unit):
170
+ # timedelta
171
+ vals = [Timedelta("1 days"), Timedelta("2 days")]
172
+ ser = Series(vals).dt.as_unit(unit)
173
+ assert ser.dtype == f"timedelta64[{unit}]"
174
+ for res, exp in zip(ser, vals):
175
+ assert isinstance(res, Timedelta)
176
+ assert res == exp
177
+ assert res.unit == unit
178
+
179
+ def test_iter_box_period(self):
180
+ # period
181
+ vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")]
182
+ s = Series(vals)
183
+ assert s.dtype == "Period[M]"
184
+ for res, exp in zip(s, vals):
185
+ assert isinstance(res, pd.Period)
186
+ assert res.freq == "ME"
187
+ assert res == exp
188
+
189
+
190
+ @pytest.mark.parametrize(
191
+ "arr, expected_type, dtype",
192
+ [
193
+ (np.array([0, 1], dtype=np.int64), np.ndarray, "int64"),
194
+ (np.array(["a", "b"]), np.ndarray, "object"),
195
+ (pd.Categorical(["a", "b"]), pd.Categorical, "category"),
196
+ (
197
+ pd.DatetimeIndex(["2017", "2018"], tz="US/Central"),
198
+ DatetimeArray,
199
+ "datetime64[ns, US/Central]",
200
+ ),
201
+ (
202
+ pd.PeriodIndex([2018, 2019], freq="Y"),
203
+ PeriodArray,
204
+ pd.core.dtypes.dtypes.PeriodDtype("Y-DEC"),
205
+ ),
206
+ (pd.IntervalIndex.from_breaks([0, 1, 2]), IntervalArray, "interval"),
207
+ (
208
+ pd.DatetimeIndex(["2017", "2018"]),
209
+ DatetimeArray,
210
+ "datetime64[ns]",
211
+ ),
212
+ (
213
+ pd.TimedeltaIndex([10**10]),
214
+ TimedeltaArray,
215
+ "m8[ns]",
216
+ ),
217
+ ],
218
+ )
219
+ def test_values_consistent(arr, expected_type, dtype, using_infer_string):
220
+ if using_infer_string and dtype == "object":
221
+ expected_type = ArrowStringArrayNumpySemantics
222
+ l_values = Series(arr)._values
223
+ r_values = pd.Index(arr)._values
224
+ assert type(l_values) is expected_type
225
+ assert type(l_values) is type(r_values)
226
+
227
+ tm.assert_equal(l_values, r_values)
228
+
229
+
230
+ @pytest.mark.parametrize("arr", [np.array([1, 2, 3])])
231
+ def test_numpy_array(arr):
232
+ ser = Series(arr)
233
+ result = ser.array
234
+ expected = NumpyExtensionArray(arr)
235
+ tm.assert_extension_array_equal(result, expected)
236
+
237
+
238
+ def test_numpy_array_all_dtypes(any_numpy_dtype):
239
+ ser = Series(dtype=any_numpy_dtype)
240
+ result = ser.array
241
+ if np.dtype(any_numpy_dtype).kind == "M":
242
+ assert isinstance(result, DatetimeArray)
243
+ elif np.dtype(any_numpy_dtype).kind == "m":
244
+ assert isinstance(result, TimedeltaArray)
245
+ else:
246
+ assert isinstance(result, NumpyExtensionArray)
247
+
248
+
249
+ @pytest.mark.parametrize(
250
+ "arr, attr",
251
+ [
252
+ (pd.Categorical(["a", "b"]), "_codes"),
253
+ (PeriodArray._from_sequence(["2000", "2001"], dtype="period[D]"), "_ndarray"),
254
+ (pd.array([0, np.nan], dtype="Int64"), "_data"),
255
+ (IntervalArray.from_breaks([0, 1]), "_left"),
256
+ (SparseArray([0, 1]), "_sparse_values"),
257
+ (
258
+ DatetimeArray._from_sequence(np.array([1, 2], dtype="datetime64[ns]")),
259
+ "_ndarray",
260
+ ),
261
+ # tz-aware Datetime
262
+ (
263
+ DatetimeArray._from_sequence(
264
+ np.array(
265
+ ["2000-01-01T12:00:00", "2000-01-02T12:00:00"], dtype="M8[ns]"
266
+ ),
267
+ dtype=DatetimeTZDtype(tz="US/Central"),
268
+ ),
269
+ "_ndarray",
270
+ ),
271
+ ],
272
+ )
273
+ def test_array(arr, attr, index_or_series, request):
274
+ box = index_or_series
275
+
276
+ result = box(arr, copy=False).array
277
+
278
+ if attr:
279
+ arr = getattr(arr, attr)
280
+ result = getattr(result, attr)
281
+
282
+ assert result is arr
283
+
284
+
285
+ def test_array_multiindex_raises():
286
+ idx = pd.MultiIndex.from_product([["A"], ["a", "b"]])
287
+ msg = "MultiIndex has no single backing array"
288
+ with pytest.raises(ValueError, match=msg):
289
+ idx.array
290
+
291
+
292
+ @pytest.mark.parametrize(
293
+ "arr, expected",
294
+ [
295
+ (np.array([1, 2], dtype=np.int64), np.array([1, 2], dtype=np.int64)),
296
+ (pd.Categorical(["a", "b"]), np.array(["a", "b"], dtype=object)),
297
+ (
298
+ pd.core.arrays.period_array(["2000", "2001"], freq="D"),
299
+ np.array([pd.Period("2000", freq="D"), pd.Period("2001", freq="D")]),
300
+ ),
301
+ (pd.array([0, np.nan], dtype="Int64"), np.array([0, np.nan])),
302
+ (
303
+ IntervalArray.from_breaks([0, 1, 2]),
304
+ np.array([pd.Interval(0, 1), pd.Interval(1, 2)], dtype=object),
305
+ ),
306
+ (SparseArray([0, 1]), np.array([0, 1], dtype=np.int64)),
307
+ # tz-naive datetime
308
+ (
309
+ DatetimeArray._from_sequence(np.array(["2000", "2001"], dtype="M8[ns]")),
310
+ np.array(["2000", "2001"], dtype="M8[ns]"),
311
+ ),
312
+ # tz-aware stays tz`-aware
313
+ (
314
+ DatetimeArray._from_sequence(
315
+ np.array(["2000-01-01T06:00:00", "2000-01-02T06:00:00"], dtype="M8[ns]")
316
+ )
317
+ .tz_localize("UTC")
318
+ .tz_convert("US/Central"),
319
+ np.array(
320
+ [
321
+ Timestamp("2000-01-01", tz="US/Central"),
322
+ Timestamp("2000-01-02", tz="US/Central"),
323
+ ]
324
+ ),
325
+ ),
326
+ # Timedelta
327
+ (
328
+ TimedeltaArray._from_sequence(
329
+ np.array([0, 3600000000000], dtype="i8").view("m8[ns]")
330
+ ),
331
+ np.array([0, 3600000000000], dtype="m8[ns]"),
332
+ ),
333
+ # GH#26406 tz is preserved in Categorical[dt64tz]
334
+ (
335
+ pd.Categorical(date_range("2016-01-01", periods=2, tz="US/Pacific")),
336
+ np.array(
337
+ [
338
+ Timestamp("2016-01-01", tz="US/Pacific"),
339
+ Timestamp("2016-01-02", tz="US/Pacific"),
340
+ ]
341
+ ),
342
+ ),
343
+ ],
344
+ )
345
+ def test_to_numpy(arr, expected, index_or_series_or_array, request):
346
+ box = index_or_series_or_array
347
+
348
+ with tm.assert_produces_warning(None):
349
+ thing = box(arr)
350
+
351
+ result = thing.to_numpy()
352
+ tm.assert_numpy_array_equal(result, expected)
353
+
354
+ result = np.asarray(thing)
355
+ tm.assert_numpy_array_equal(result, expected)
356
+
357
+
358
+ @pytest.mark.parametrize("as_series", [True, False])
359
+ @pytest.mark.parametrize(
360
+ "arr", [np.array([1, 2, 3], dtype="int64"), np.array(["a", "b", "c"], dtype=object)]
361
+ )
362
+ def test_to_numpy_copy(arr, as_series, using_infer_string):
363
+ obj = pd.Index(arr, copy=False)
364
+ if as_series:
365
+ obj = Series(obj.values, copy=False)
366
+
367
+ # no copy by default
368
+ result = obj.to_numpy()
369
+ if using_infer_string and arr.dtype == object:
370
+ assert np.shares_memory(arr, result) is False
371
+ else:
372
+ assert np.shares_memory(arr, result) is True
373
+
374
+ result = obj.to_numpy(copy=False)
375
+ if using_infer_string and arr.dtype == object:
376
+ assert np.shares_memory(arr, result) is False
377
+ else:
378
+ assert np.shares_memory(arr, result) is True
379
+
380
+ # copy=True
381
+ result = obj.to_numpy(copy=True)
382
+ assert np.shares_memory(arr, result) is False
383
+
384
+
385
+ @pytest.mark.parametrize("as_series", [True, False])
386
+ def test_to_numpy_dtype(as_series, unit):
387
+ tz = "US/Eastern"
388
+ obj = pd.DatetimeIndex(["2000", "2001"], tz=tz)
389
+ if as_series:
390
+ obj = Series(obj)
391
+
392
+ # preserve tz by default
393
+ result = obj.to_numpy()
394
+ expected = np.array(
395
+ [Timestamp("2000", tz=tz), Timestamp("2001", tz=tz)], dtype=object
396
+ )
397
+ tm.assert_numpy_array_equal(result, expected)
398
+
399
+ result = obj.to_numpy(dtype="object")
400
+ tm.assert_numpy_array_equal(result, expected)
401
+
402
+ result = obj.to_numpy(dtype="M8[ns]")
403
+ expected = np.array(["2000-01-01T05", "2001-01-01T05"], dtype="M8[ns]")
404
+ tm.assert_numpy_array_equal(result, expected)
405
+
406
+
407
+ @pytest.mark.parametrize(
408
+ "values, dtype, na_value, expected",
409
+ [
410
+ ([1, 2, None], "float64", 0, [1.0, 2.0, 0.0]),
411
+ (
412
+ [Timestamp("2000"), Timestamp("2000"), pd.NaT],
413
+ None,
414
+ Timestamp("2000"),
415
+ [np.datetime64("2000-01-01T00:00:00.000000000")] * 3,
416
+ ),
417
+ ],
418
+ )
419
+ def test_to_numpy_na_value_numpy_dtype(
420
+ index_or_series, values, dtype, na_value, expected
421
+ ):
422
+ obj = index_or_series(values)
423
+ result = obj.to_numpy(dtype=dtype, na_value=na_value)
424
+ expected = np.array(expected)
425
+ tm.assert_numpy_array_equal(result, expected)
426
+
427
+
428
+ @pytest.mark.parametrize(
429
+ "data, multiindex, dtype, na_value, expected",
430
+ [
431
+ (
432
+ [1, 2, None, 4],
433
+ [(0, "a"), (0, "b"), (1, "b"), (1, "c")],
434
+ float,
435
+ None,
436
+ [1.0, 2.0, np.nan, 4.0],
437
+ ),
438
+ (
439
+ [1, 2, None, 4],
440
+ [(0, "a"), (0, "b"), (1, "b"), (1, "c")],
441
+ float,
442
+ np.nan,
443
+ [1.0, 2.0, np.nan, 4.0],
444
+ ),
445
+ (
446
+ [1.0, 2.0, np.nan, 4.0],
447
+ [("a", 0), ("a", 1), ("a", 2), ("b", 0)],
448
+ int,
449
+ 0,
450
+ [1, 2, 0, 4],
451
+ ),
452
+ (
453
+ [Timestamp("2000"), Timestamp("2000"), pd.NaT],
454
+ [(0, Timestamp("2021")), (0, Timestamp("2022")), (1, Timestamp("2000"))],
455
+ None,
456
+ Timestamp("2000"),
457
+ [np.datetime64("2000-01-01T00:00:00.000000000")] * 3,
458
+ ),
459
+ ],
460
+ )
461
+ def test_to_numpy_multiindex_series_na_value(
462
+ data, multiindex, dtype, na_value, expected
463
+ ):
464
+ index = pd.MultiIndex.from_tuples(multiindex)
465
+ series = Series(data, index=index)
466
+ result = series.to_numpy(dtype=dtype, na_value=na_value)
467
+ expected = np.array(expected)
468
+ tm.assert_numpy_array_equal(result, expected)
469
+
470
+
471
+ def test_to_numpy_kwargs_raises():
472
+ # numpy
473
+ s = Series([1, 2, 3])
474
+ msg = r"to_numpy\(\) got an unexpected keyword argument 'foo'"
475
+ with pytest.raises(TypeError, match=msg):
476
+ s.to_numpy(foo=True)
477
+
478
+ # extension
479
+ s = Series([1, 2, 3], dtype="Int64")
480
+ with pytest.raises(TypeError, match=msg):
481
+ s.to_numpy(foo=True)
482
+
483
+
484
+ @pytest.mark.parametrize(
485
+ "data",
486
+ [
487
+ {"a": [1, 2, 3], "b": [1, 2, None]},
488
+ {"a": np.array([1, 2, 3]), "b": np.array([1, 2, np.nan])},
489
+ {"a": pd.array([1, 2, 3]), "b": pd.array([1, 2, None])},
490
+ ],
491
+ )
492
+ @pytest.mark.parametrize("dtype, na_value", [(float, np.nan), (object, None)])
493
+ def test_to_numpy_dataframe_na_value(data, dtype, na_value):
494
+ # https://github.com/pandas-dev/pandas/issues/33820
495
+ df = pd.DataFrame(data)
496
+ result = df.to_numpy(dtype=dtype, na_value=na_value)
497
+ expected = np.array([[1, 1], [2, 2], [3, na_value]], dtype=dtype)
498
+ tm.assert_numpy_array_equal(result, expected)
499
+
500
+
501
+ @pytest.mark.parametrize(
502
+ "data, expected",
503
+ [
504
+ (
505
+ {"a": pd.array([1, 2, None])},
506
+ np.array([[1.0], [2.0], [np.nan]], dtype=float),
507
+ ),
508
+ (
509
+ {"a": [1, 2, 3], "b": [1, 2, 3]},
510
+ np.array([[1, 1], [2, 2], [3, 3]], dtype=float),
511
+ ),
512
+ ],
513
+ )
514
+ def test_to_numpy_dataframe_single_block(data, expected):
515
+ # https://github.com/pandas-dev/pandas/issues/33820
516
+ df = pd.DataFrame(data)
517
+ result = df.to_numpy(dtype=float, na_value=np.nan)
518
+ tm.assert_numpy_array_equal(result, expected)
519
+
520
+
521
+ def test_to_numpy_dataframe_single_block_no_mutate():
522
+ # https://github.com/pandas-dev/pandas/issues/33820
523
+ result = pd.DataFrame(np.array([1.0, 2.0, np.nan]))
524
+ expected = pd.DataFrame(np.array([1.0, 2.0, np.nan]))
525
+ result.to_numpy(na_value=0.0)
526
+ tm.assert_frame_equal(result, expected)
527
+
528
+
529
+ class TestAsArray:
530
+ @pytest.mark.parametrize("tz", [None, "US/Central"])
531
+ def test_asarray_object_dt64(self, tz):
532
+ ser = Series(date_range("2000", periods=2, tz=tz))
533
+
534
+ with tm.assert_produces_warning(None):
535
+ # Future behavior (for tzaware case) with no warning
536
+ result = np.asarray(ser, dtype=object)
537
+
538
+ expected = np.array(
539
+ [Timestamp("2000-01-01", tz=tz), Timestamp("2000-01-02", tz=tz)]
540
+ )
541
+ tm.assert_numpy_array_equal(result, expected)
542
+
543
+ def test_asarray_tz_naive(self):
544
+ # This shouldn't produce a warning.
545
+ ser = Series(date_range("2000", periods=2))
546
+ expected = np.array(["2000-01-01", "2000-01-02"], dtype="M8[ns]")
547
+ result = np.asarray(ser)
548
+
549
+ tm.assert_numpy_array_equal(result, expected)
550
+
551
+ def test_asarray_tz_aware(self):
552
+ tz = "US/Central"
553
+ ser = Series(date_range("2000", periods=2, tz=tz))
554
+ expected = np.array(["2000-01-01T06", "2000-01-02T06"], dtype="M8[ns]")
555
+ result = np.asarray(ser, dtype="datetime64[ns]")
556
+
557
+ tm.assert_numpy_array_equal(result, expected)
558
+
559
+ # Old behavior with no warning
560
+ result = np.asarray(ser, dtype="M8[ns]")
561
+
562
+ tm.assert_numpy_array_equal(result, expected)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/base/test_fillna.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Though Index.fillna and Series.fillna has separate impl,
3
+ test here to confirm these works as the same
4
+ """
5
+
6
+ import numpy as np
7
+ import pytest
8
+
9
+ from pandas import MultiIndex
10
+ import pandas._testing as tm
11
+ from pandas.tests.base.common import allow_na_ops
12
+
13
+
14
+ def test_fillna(index_or_series_obj):
15
+ # GH 11343
16
+ obj = index_or_series_obj
17
+
18
+ if isinstance(obj, MultiIndex):
19
+ msg = "isna is not defined for MultiIndex"
20
+ with pytest.raises(NotImplementedError, match=msg):
21
+ obj.fillna(0)
22
+ return
23
+
24
+ # values will not be changed
25
+ fill_value = obj.values[0] if len(obj) > 0 else 0
26
+ result = obj.fillna(fill_value)
27
+
28
+ tm.assert_equal(obj, result)
29
+
30
+ # check shallow_copied
31
+ assert obj is not result
32
+
33
+
34
+ @pytest.mark.parametrize("null_obj", [np.nan, None])
35
+ def test_fillna_null(null_obj, index_or_series_obj):
36
+ # GH 11343
37
+ obj = index_or_series_obj
38
+ klass = type(obj)
39
+
40
+ if not allow_na_ops(obj):
41
+ pytest.skip(f"{klass} doesn't allow for NA operations")
42
+ elif len(obj) < 1:
43
+ pytest.skip("Test doesn't make sense on empty data")
44
+ elif isinstance(obj, MultiIndex):
45
+ pytest.skip(f"MultiIndex can't hold '{null_obj}'")
46
+
47
+ values = obj._values
48
+ fill_value = values[0]
49
+ expected = values.copy()
50
+ values[0:2] = null_obj
51
+ expected[0:2] = fill_value
52
+
53
+ expected = klass(expected)
54
+ obj = klass(values)
55
+
56
+ result = obj.fillna(fill_value)
57
+ tm.assert_equal(result, expected)
58
+
59
+ # check shallow_copied
60
+ assert obj is not result
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/base/test_misc.py ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+
3
+ import numpy as np
4
+ import pytest
5
+
6
+ from pandas._config import using_pyarrow_string_dtype
7
+
8
+ from pandas.compat import PYPY
9
+
10
+ from pandas.core.dtypes.common import (
11
+ is_dtype_equal,
12
+ is_object_dtype,
13
+ )
14
+
15
+ import pandas as pd
16
+ from pandas import (
17
+ Index,
18
+ Series,
19
+ )
20
+ import pandas._testing as tm
21
+
22
+
23
+ def test_isnull_notnull_docstrings():
24
+ # GH#41855 make sure its clear these are aliases
25
+ doc = pd.DataFrame.notnull.__doc__
26
+ assert doc.startswith("\nDataFrame.notnull is an alias for DataFrame.notna.\n")
27
+ doc = pd.DataFrame.isnull.__doc__
28
+ assert doc.startswith("\nDataFrame.isnull is an alias for DataFrame.isna.\n")
29
+
30
+ doc = Series.notnull.__doc__
31
+ assert doc.startswith("\nSeries.notnull is an alias for Series.notna.\n")
32
+ doc = Series.isnull.__doc__
33
+ assert doc.startswith("\nSeries.isnull is an alias for Series.isna.\n")
34
+
35
+
36
+ @pytest.mark.parametrize(
37
+ "op_name, op",
38
+ [
39
+ ("add", "+"),
40
+ ("sub", "-"),
41
+ ("mul", "*"),
42
+ ("mod", "%"),
43
+ ("pow", "**"),
44
+ ("truediv", "/"),
45
+ ("floordiv", "//"),
46
+ ],
47
+ )
48
+ def test_binary_ops_docstring(frame_or_series, op_name, op):
49
+ # not using the all_arithmetic_functions fixture with _get_opstr
50
+ # as _get_opstr is used internally in the dynamic implementation of the docstring
51
+ klass = frame_or_series
52
+
53
+ operand1 = klass.__name__.lower()
54
+ operand2 = "other"
55
+ expected_str = " ".join([operand1, op, operand2])
56
+ assert expected_str in getattr(klass, op_name).__doc__
57
+
58
+ # reverse version of the binary ops
59
+ expected_str = " ".join([operand2, op, operand1])
60
+ assert expected_str in getattr(klass, "r" + op_name).__doc__
61
+
62
+
63
+ def test_ndarray_compat_properties(index_or_series_obj):
64
+ obj = index_or_series_obj
65
+
66
+ # Check that we work.
67
+ for p in ["shape", "dtype", "T", "nbytes"]:
68
+ assert getattr(obj, p, None) is not None
69
+
70
+ # deprecated properties
71
+ for p in ["strides", "itemsize", "base", "data"]:
72
+ assert not hasattr(obj, p)
73
+
74
+ msg = "can only convert an array of size 1 to a Python scalar"
75
+ with pytest.raises(ValueError, match=msg):
76
+ obj.item() # len > 1
77
+
78
+ assert obj.ndim == 1
79
+ assert obj.size == len(obj)
80
+
81
+ assert Index([1]).item() == 1
82
+ assert Series([1]).item() == 1
83
+
84
+
85
+ @pytest.mark.skipif(
86
+ PYPY or using_pyarrow_string_dtype(),
87
+ reason="not relevant for PyPy doesn't work properly for arrow strings",
88
+ )
89
+ def test_memory_usage(index_or_series_memory_obj):
90
+ obj = index_or_series_memory_obj
91
+ # Clear index caches so that len(obj) == 0 report 0 memory usage
92
+ if isinstance(obj, Series):
93
+ is_ser = True
94
+ obj.index._engine.clear_mapping()
95
+ else:
96
+ is_ser = False
97
+ obj._engine.clear_mapping()
98
+
99
+ res = obj.memory_usage()
100
+ res_deep = obj.memory_usage(deep=True)
101
+
102
+ is_object = is_object_dtype(obj) or (is_ser and is_object_dtype(obj.index))
103
+ is_categorical = isinstance(obj.dtype, pd.CategoricalDtype) or (
104
+ is_ser and isinstance(obj.index.dtype, pd.CategoricalDtype)
105
+ )
106
+ is_object_string = is_dtype_equal(obj, "string[python]") or (
107
+ is_ser and is_dtype_equal(obj.index.dtype, "string[python]")
108
+ )
109
+
110
+ if len(obj) == 0:
111
+ expected = 0
112
+ assert res_deep == res == expected
113
+ elif is_object or is_categorical or is_object_string:
114
+ # only deep will pick them up
115
+ assert res_deep > res
116
+ else:
117
+ assert res == res_deep
118
+
119
+ # sys.getsizeof will call the .memory_usage with
120
+ # deep=True, and add on some GC overhead
121
+ diff = res_deep - sys.getsizeof(obj)
122
+ assert abs(diff) < 100
123
+
124
+
125
+ def test_memory_usage_components_series(series_with_simple_index):
126
+ series = series_with_simple_index
127
+ total_usage = series.memory_usage(index=True)
128
+ non_index_usage = series.memory_usage(index=False)
129
+ index_usage = series.index.memory_usage()
130
+ assert total_usage == non_index_usage + index_usage
131
+
132
+
133
+ @pytest.mark.parametrize("dtype", tm.NARROW_NP_DTYPES)
134
+ def test_memory_usage_components_narrow_series(dtype):
135
+ series = Series(range(5), dtype=dtype, index=[f"i-{i}" for i in range(5)], name="a")
136
+ total_usage = series.memory_usage(index=True)
137
+ non_index_usage = series.memory_usage(index=False)
138
+ index_usage = series.index.memory_usage()
139
+ assert total_usage == non_index_usage + index_usage
140
+
141
+
142
+ def test_searchsorted(request, index_or_series_obj):
143
+ # numpy.searchsorted calls obj.searchsorted under the hood.
144
+ # See gh-12238
145
+ obj = index_or_series_obj
146
+
147
+ if isinstance(obj, pd.MultiIndex):
148
+ # See gh-14833
149
+ request.applymarker(
150
+ pytest.mark.xfail(
151
+ reason="np.searchsorted doesn't work on pd.MultiIndex: GH 14833"
152
+ )
153
+ )
154
+ elif obj.dtype.kind == "c" and isinstance(obj, Index):
155
+ # TODO: Should Series cases also raise? Looks like they use numpy
156
+ # comparison semantics https://github.com/numpy/numpy/issues/15981
157
+ mark = pytest.mark.xfail(reason="complex objects are not comparable")
158
+ request.applymarker(mark)
159
+
160
+ max_obj = max(obj, default=0)
161
+ index = np.searchsorted(obj, max_obj)
162
+ assert 0 <= index <= len(obj)
163
+
164
+ index = np.searchsorted(obj, max_obj, sorter=range(len(obj)))
165
+ assert 0 <= index <= len(obj)
166
+
167
+
168
+ def test_access_by_position(index_flat):
169
+ index = index_flat
170
+
171
+ if len(index) == 0:
172
+ pytest.skip("Test doesn't make sense on empty data")
173
+
174
+ series = Series(index)
175
+ assert index[0] == series.iloc[0]
176
+ assert index[5] == series.iloc[5]
177
+ assert index[-1] == series.iloc[-1]
178
+
179
+ size = len(index)
180
+ assert index[-1] == index[size - 1]
181
+
182
+ msg = f"index {size} is out of bounds for axis 0 with size {size}"
183
+ if is_dtype_equal(index.dtype, "string[pyarrow]") or is_dtype_equal(
184
+ index.dtype, "string[pyarrow_numpy]"
185
+ ):
186
+ msg = "index out of bounds"
187
+ with pytest.raises(IndexError, match=msg):
188
+ index[size]
189
+ msg = "single positional indexer is out-of-bounds"
190
+ with pytest.raises(IndexError, match=msg):
191
+ series.iloc[size]
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/base/test_transpose.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ from pandas import (
5
+ CategoricalDtype,
6
+ DataFrame,
7
+ )
8
+ import pandas._testing as tm
9
+
10
+
11
+ def test_transpose(index_or_series_obj):
12
+ obj = index_or_series_obj
13
+ tm.assert_equal(obj.transpose(), obj)
14
+
15
+
16
+ def test_transpose_non_default_axes(index_or_series_obj):
17
+ msg = "the 'axes' parameter is not supported"
18
+ obj = index_or_series_obj
19
+ with pytest.raises(ValueError, match=msg):
20
+ obj.transpose(1)
21
+ with pytest.raises(ValueError, match=msg):
22
+ obj.transpose(axes=1)
23
+
24
+
25
+ def test_numpy_transpose(index_or_series_obj):
26
+ msg = "the 'axes' parameter is not supported"
27
+ obj = index_or_series_obj
28
+ tm.assert_equal(np.transpose(obj), obj)
29
+
30
+ with pytest.raises(ValueError, match=msg):
31
+ np.transpose(obj, axes=1)
32
+
33
+
34
+ @pytest.mark.parametrize(
35
+ "data, transposed_data, index, columns, dtype",
36
+ [
37
+ ([[1], [2]], [[1, 2]], ["a", "a"], ["b"], int),
38
+ ([[1], [2]], [[1, 2]], ["a", "a"], ["b"], CategoricalDtype([1, 2])),
39
+ ([[1, 2]], [[1], [2]], ["b"], ["a", "a"], int),
40
+ ([[1, 2]], [[1], [2]], ["b"], ["a", "a"], CategoricalDtype([1, 2])),
41
+ ([[1, 2], [3, 4]], [[1, 3], [2, 4]], ["a", "a"], ["b", "b"], int),
42
+ (
43
+ [[1, 2], [3, 4]],
44
+ [[1, 3], [2, 4]],
45
+ ["a", "a"],
46
+ ["b", "b"],
47
+ CategoricalDtype([1, 2, 3, 4]),
48
+ ),
49
+ ],
50
+ )
51
+ def test_duplicate_labels(data, transposed_data, index, columns, dtype):
52
+ # GH 42380
53
+ df = DataFrame(data, index=index, columns=columns, dtype=dtype)
54
+ result = df.T
55
+ expected = DataFrame(transposed_data, index=columns, columns=index, dtype=dtype)
56
+ tm.assert_frame_equal(result, expected)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/base/test_unique.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ from pandas._config import using_pyarrow_string_dtype
5
+
6
+ import pandas as pd
7
+ import pandas._testing as tm
8
+ from pandas.tests.base.common import allow_na_ops
9
+
10
+
11
+ @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
12
+ def test_unique(index_or_series_obj):
13
+ obj = index_or_series_obj
14
+ obj = np.repeat(obj, range(1, len(obj) + 1))
15
+ result = obj.unique()
16
+
17
+ # dict.fromkeys preserves the order
18
+ unique_values = list(dict.fromkeys(obj.values))
19
+ if isinstance(obj, pd.MultiIndex):
20
+ expected = pd.MultiIndex.from_tuples(unique_values)
21
+ expected.names = obj.names
22
+ tm.assert_index_equal(result, expected, exact=True)
23
+ elif isinstance(obj, pd.Index):
24
+ expected = pd.Index(unique_values, dtype=obj.dtype)
25
+ if isinstance(obj.dtype, pd.DatetimeTZDtype):
26
+ expected = expected.normalize()
27
+ tm.assert_index_equal(result, expected, exact=True)
28
+ else:
29
+ expected = np.array(unique_values)
30
+ tm.assert_numpy_array_equal(result, expected)
31
+
32
+
33
+ @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
34
+ @pytest.mark.parametrize("null_obj", [np.nan, None])
35
+ def test_unique_null(null_obj, index_or_series_obj):
36
+ obj = index_or_series_obj
37
+
38
+ if not allow_na_ops(obj):
39
+ pytest.skip("type doesn't allow for NA operations")
40
+ elif len(obj) < 1:
41
+ pytest.skip("Test doesn't make sense on empty data")
42
+ elif isinstance(obj, pd.MultiIndex):
43
+ pytest.skip(f"MultiIndex can't hold '{null_obj}'")
44
+
45
+ values = obj._values
46
+ values[0:2] = null_obj
47
+
48
+ klass = type(obj)
49
+ repeated_values = np.repeat(values, range(1, len(values) + 1))
50
+ obj = klass(repeated_values, dtype=obj.dtype)
51
+ result = obj.unique()
52
+
53
+ unique_values_raw = dict.fromkeys(obj.values)
54
+ # because np.nan == np.nan is False, but None == None is True
55
+ # np.nan would be duplicated, whereas None wouldn't
56
+ unique_values_not_null = [val for val in unique_values_raw if not pd.isnull(val)]
57
+ unique_values = [null_obj] + unique_values_not_null
58
+
59
+ if isinstance(obj, pd.Index):
60
+ expected = pd.Index(unique_values, dtype=obj.dtype)
61
+ if isinstance(obj.dtype, pd.DatetimeTZDtype):
62
+ result = result.normalize()
63
+ expected = expected.normalize()
64
+ tm.assert_index_equal(result, expected, exact=True)
65
+ else:
66
+ expected = np.array(unique_values, dtype=obj.dtype)
67
+ tm.assert_numpy_array_equal(result, expected)
68
+
69
+
70
+ def test_nunique(index_or_series_obj):
71
+ obj = index_or_series_obj
72
+ obj = np.repeat(obj, range(1, len(obj) + 1))
73
+ expected = len(obj.unique())
74
+ assert obj.nunique(dropna=False) == expected
75
+
76
+
77
+ @pytest.mark.parametrize("null_obj", [np.nan, None])
78
+ def test_nunique_null(null_obj, index_or_series_obj):
79
+ obj = index_or_series_obj
80
+
81
+ if not allow_na_ops(obj):
82
+ pytest.skip("type doesn't allow for NA operations")
83
+ elif isinstance(obj, pd.MultiIndex):
84
+ pytest.skip(f"MultiIndex can't hold '{null_obj}'")
85
+
86
+ values = obj._values
87
+ values[0:2] = null_obj
88
+
89
+ klass = type(obj)
90
+ repeated_values = np.repeat(values, range(1, len(values) + 1))
91
+ obj = klass(repeated_values, dtype=obj.dtype)
92
+
93
+ if isinstance(obj, pd.CategoricalIndex):
94
+ assert obj.nunique() == len(obj.categories)
95
+ assert obj.nunique(dropna=False) == len(obj.categories) + 1
96
+ else:
97
+ num_unique_values = len(obj.unique())
98
+ assert obj.nunique() == max(0, num_unique_values - 1)
99
+ assert obj.nunique(dropna=False) == max(0, num_unique_values)
100
+
101
+
102
+ @pytest.mark.single_cpu
103
+ @pytest.mark.xfail(using_pyarrow_string_dtype(), reason="decoding fails")
104
+ def test_unique_bad_unicode(index_or_series):
105
+ # regression test for #34550
106
+ uval = "\ud83d" # smiley emoji
107
+
108
+ obj = index_or_series([uval] * 2)
109
+ result = obj.unique()
110
+
111
+ if isinstance(obj, pd.Index):
112
+ expected = pd.Index(["\ud83d"], dtype=object)
113
+ tm.assert_index_equal(result, expected, exact=True)
114
+ else:
115
+ expected = np.array(["\ud83d"], dtype=object)
116
+ tm.assert_numpy_array_equal(result, expected)
117
+
118
+
119
+ @pytest.mark.parametrize("dropna", [True, False])
120
+ def test_nunique_dropna(dropna):
121
+ # GH37566
122
+ ser = pd.Series(["yes", "yes", pd.NA, np.nan, None, pd.NaT])
123
+ res = ser.nunique(dropna)
124
+ assert res == 1 if dropna else 5