BryanW commited on
Commit
bfeb483
·
verified ·
1 Parent(s): f5b5a3b

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/algorithms.py +1747 -0
  2. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/flags.py +117 -0
  3. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/roperator.py +62 -0
  4. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__init__.py +0 -0
  5. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_aggregation.py +93 -0
  6. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_algos.py +2041 -0
  7. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_common.py +267 -0
  8. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_downstream.py +362 -0
  9. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_errors.py +112 -0
  10. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_expressions.py +466 -0
  11. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_flags.py +48 -0
  12. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_multilevel.py +355 -0
  13. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_nanops.py +1274 -0
  14. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_optional_dependency.py +100 -0
  15. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_register_accessor.py +103 -0
  16. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_sorting.py +487 -0
  17. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_take.py +307 -0
  18. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/util/__init__.py +29 -0
  19. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/util/_decorators.py +508 -0
  20. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/util/_doctools.py +202 -0
  21. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/util/_exceptions.py +103 -0
  22. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/util/_print_versions.py +158 -0
  23. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/util/_test_decorators.py +173 -0
  24. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/util/_tester.py +53 -0
  25. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/util/_validators.py +456 -0
  26. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_decomp/__pycache__/__init__.cpython-312.pyc +0 -0
  27. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_decomp/__pycache__/decompositions_for_jvp.cpython-312.pyc +0 -0
  28. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_decomp/__pycache__/decompositions_for_rng.cpython-312.pyc +0 -0
  29. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/__init__.py +61 -0
  30. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/assume_constant_result.py +20 -0
  31. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/autograd_function.py +25 -0
  32. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/class_method.py +22 -0
  33. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/cond_branch_class_method.py +44 -0
  34. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/cond_branch_nested_function.py +41 -0
  35. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/cond_branch_nonlocal_variables.py +59 -0
  36. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/cond_closed_over_variable.py +22 -0
  37. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/cond_operands.py +35 -0
  38. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/cond_predicate.py +25 -0
  39. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/constrain_as_size_example.py +23 -0
  40. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/constrain_as_value_example.py +26 -0
  41. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/decorator.py +23 -0
  42. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/dictionary.py +17 -0
  43. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/dynamic_shape_assert.py +18 -0
  44. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/dynamic_shape_constructor.py +15 -0
  45. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/dynamic_shape_if_guard.py +19 -0
  46. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/dynamic_shape_map.py +19 -0
  47. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/dynamic_shape_round.py +21 -0
  48. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/dynamic_shape_slicing.py +15 -0
  49. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/fn_with_kwargs.py +30 -0
  50. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/list_contains.py +17 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/algorithms.py ADDED
@@ -0,0 +1,1747 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Generic data algorithms. This module is experimental at the moment and not
3
+ intended for public consumption
4
+ """
5
+ from __future__ import annotations
6
+
7
+ import decimal
8
+ import operator
9
+ from textwrap import dedent
10
+ from typing import (
11
+ TYPE_CHECKING,
12
+ Literal,
13
+ cast,
14
+ )
15
+ import warnings
16
+
17
+ import numpy as np
18
+
19
+ from pandas._libs import (
20
+ algos,
21
+ hashtable as htable,
22
+ iNaT,
23
+ lib,
24
+ )
25
+ from pandas._typing import (
26
+ AnyArrayLike,
27
+ ArrayLike,
28
+ AxisInt,
29
+ DtypeObj,
30
+ TakeIndexer,
31
+ npt,
32
+ )
33
+ from pandas.util._decorators import doc
34
+ from pandas.util._exceptions import find_stack_level
35
+
36
+ from pandas.core.dtypes.cast import (
37
+ construct_1d_object_array_from_listlike,
38
+ np_find_common_type,
39
+ )
40
+ from pandas.core.dtypes.common import (
41
+ ensure_float64,
42
+ ensure_object,
43
+ ensure_platform_int,
44
+ is_array_like,
45
+ is_bool_dtype,
46
+ is_complex_dtype,
47
+ is_dict_like,
48
+ is_extension_array_dtype,
49
+ is_float_dtype,
50
+ is_integer,
51
+ is_integer_dtype,
52
+ is_list_like,
53
+ is_object_dtype,
54
+ is_signed_integer_dtype,
55
+ needs_i8_conversion,
56
+ )
57
+ from pandas.core.dtypes.concat import concat_compat
58
+ from pandas.core.dtypes.dtypes import (
59
+ BaseMaskedDtype,
60
+ CategoricalDtype,
61
+ ExtensionDtype,
62
+ NumpyEADtype,
63
+ )
64
+ from pandas.core.dtypes.generic import (
65
+ ABCDatetimeArray,
66
+ ABCExtensionArray,
67
+ ABCIndex,
68
+ ABCMultiIndex,
69
+ ABCSeries,
70
+ ABCTimedeltaArray,
71
+ )
72
+ from pandas.core.dtypes.missing import (
73
+ isna,
74
+ na_value_for_dtype,
75
+ )
76
+
77
+ from pandas.core.array_algos.take import take_nd
78
+ from pandas.core.construction import (
79
+ array as pd_array,
80
+ ensure_wrapped_if_datetimelike,
81
+ extract_array,
82
+ )
83
+ from pandas.core.indexers import validate_indices
84
+
85
+ if TYPE_CHECKING:
86
+ from pandas._typing import (
87
+ ListLike,
88
+ NumpySorter,
89
+ NumpyValueArrayLike,
90
+ )
91
+
92
+ from pandas import (
93
+ Categorical,
94
+ Index,
95
+ Series,
96
+ )
97
+ from pandas.core.arrays import (
98
+ BaseMaskedArray,
99
+ ExtensionArray,
100
+ )
101
+
102
+
103
+ # --------------- #
104
+ # dtype access #
105
+ # --------------- #
106
+ def _ensure_data(values: ArrayLike) -> np.ndarray:
107
+ """
108
+ routine to ensure that our data is of the correct
109
+ input dtype for lower-level routines
110
+
111
+ This will coerce:
112
+ - ints -> int64
113
+ - uint -> uint64
114
+ - bool -> uint8
115
+ - datetimelike -> i8
116
+ - datetime64tz -> i8 (in local tz)
117
+ - categorical -> codes
118
+
119
+ Parameters
120
+ ----------
121
+ values : np.ndarray or ExtensionArray
122
+
123
+ Returns
124
+ -------
125
+ np.ndarray
126
+ """
127
+
128
+ if not isinstance(values, ABCMultiIndex):
129
+ # extract_array would raise
130
+ values = extract_array(values, extract_numpy=True)
131
+
132
+ if is_object_dtype(values.dtype):
133
+ return ensure_object(np.asarray(values))
134
+
135
+ elif isinstance(values.dtype, BaseMaskedDtype):
136
+ # i.e. BooleanArray, FloatingArray, IntegerArray
137
+ values = cast("BaseMaskedArray", values)
138
+ if not values._hasna:
139
+ # No pd.NAs -> We can avoid an object-dtype cast (and copy) GH#41816
140
+ # recurse to avoid re-implementing logic for eg bool->uint8
141
+ return _ensure_data(values._data)
142
+ return np.asarray(values)
143
+
144
+ elif isinstance(values.dtype, CategoricalDtype):
145
+ # NB: cases that go through here should NOT be using _reconstruct_data
146
+ # on the back-end.
147
+ values = cast("Categorical", values)
148
+ return values.codes
149
+
150
+ elif is_bool_dtype(values.dtype):
151
+ if isinstance(values, np.ndarray):
152
+ # i.e. actually dtype == np.dtype("bool")
153
+ return np.asarray(values).view("uint8")
154
+ else:
155
+ # e.g. Sparse[bool, False] # TODO: no test cases get here
156
+ return np.asarray(values).astype("uint8", copy=False)
157
+
158
+ elif is_integer_dtype(values.dtype):
159
+ return np.asarray(values)
160
+
161
+ elif is_float_dtype(values.dtype):
162
+ # Note: checking `values.dtype == "float128"` raises on Windows and 32bit
163
+ # error: Item "ExtensionDtype" of "Union[Any, ExtensionDtype, dtype[Any]]"
164
+ # has no attribute "itemsize"
165
+ if values.dtype.itemsize in [2, 12, 16]: # type: ignore[union-attr]
166
+ # we dont (yet) have float128 hashtable support
167
+ return ensure_float64(values)
168
+ return np.asarray(values)
169
+
170
+ elif is_complex_dtype(values.dtype):
171
+ return cast(np.ndarray, values)
172
+
173
+ # datetimelike
174
+ elif needs_i8_conversion(values.dtype):
175
+ npvalues = values.view("i8")
176
+ npvalues = cast(np.ndarray, npvalues)
177
+ return npvalues
178
+
179
+ # we have failed, return object
180
+ values = np.asarray(values, dtype=object)
181
+ return ensure_object(values)
182
+
183
+
184
+ def _reconstruct_data(
185
+ values: ArrayLike, dtype: DtypeObj, original: AnyArrayLike
186
+ ) -> ArrayLike:
187
+ """
188
+ reverse of _ensure_data
189
+
190
+ Parameters
191
+ ----------
192
+ values : np.ndarray or ExtensionArray
193
+ dtype : np.dtype or ExtensionDtype
194
+ original : AnyArrayLike
195
+
196
+ Returns
197
+ -------
198
+ ExtensionArray or np.ndarray
199
+ """
200
+ if isinstance(values, ABCExtensionArray) and values.dtype == dtype:
201
+ # Catch DatetimeArray/TimedeltaArray
202
+ return values
203
+
204
+ if not isinstance(dtype, np.dtype):
205
+ # i.e. ExtensionDtype; note we have ruled out above the possibility
206
+ # that values.dtype == dtype
207
+ cls = dtype.construct_array_type()
208
+
209
+ values = cls._from_sequence(values, dtype=dtype)
210
+
211
+ else:
212
+ values = values.astype(dtype, copy=False)
213
+
214
+ return values
215
+
216
+
217
+ def _ensure_arraylike(values, func_name: str) -> ArrayLike:
218
+ """
219
+ ensure that we are arraylike if not already
220
+ """
221
+ if not isinstance(values, (ABCIndex, ABCSeries, ABCExtensionArray, np.ndarray)):
222
+ # GH#52986
223
+ if func_name != "isin-targets":
224
+ # Make an exception for the comps argument in isin.
225
+ warnings.warn(
226
+ f"{func_name} with argument that is not not a Series, Index, "
227
+ "ExtensionArray, or np.ndarray is deprecated and will raise in a "
228
+ "future version.",
229
+ FutureWarning,
230
+ stacklevel=find_stack_level(),
231
+ )
232
+
233
+ inferred = lib.infer_dtype(values, skipna=False)
234
+ if inferred in ["mixed", "string", "mixed-integer"]:
235
+ # "mixed-integer" to ensure we do not cast ["ss", 42] to str GH#22160
236
+ if isinstance(values, tuple):
237
+ values = list(values)
238
+ values = construct_1d_object_array_from_listlike(values)
239
+ else:
240
+ values = np.asarray(values)
241
+ return values
242
+
243
+
244
+ _hashtables = {
245
+ "complex128": htable.Complex128HashTable,
246
+ "complex64": htable.Complex64HashTable,
247
+ "float64": htable.Float64HashTable,
248
+ "float32": htable.Float32HashTable,
249
+ "uint64": htable.UInt64HashTable,
250
+ "uint32": htable.UInt32HashTable,
251
+ "uint16": htable.UInt16HashTable,
252
+ "uint8": htable.UInt8HashTable,
253
+ "int64": htable.Int64HashTable,
254
+ "int32": htable.Int32HashTable,
255
+ "int16": htable.Int16HashTable,
256
+ "int8": htable.Int8HashTable,
257
+ "string": htable.StringHashTable,
258
+ "object": htable.PyObjectHashTable,
259
+ }
260
+
261
+
262
+ def _get_hashtable_algo(values: np.ndarray):
263
+ """
264
+ Parameters
265
+ ----------
266
+ values : np.ndarray
267
+
268
+ Returns
269
+ -------
270
+ htable : HashTable subclass
271
+ values : ndarray
272
+ """
273
+ values = _ensure_data(values)
274
+
275
+ ndtype = _check_object_for_strings(values)
276
+ hashtable = _hashtables[ndtype]
277
+ return hashtable, values
278
+
279
+
280
+ def _check_object_for_strings(values: np.ndarray) -> str:
281
+ """
282
+ Check if we can use string hashtable instead of object hashtable.
283
+
284
+ Parameters
285
+ ----------
286
+ values : ndarray
287
+
288
+ Returns
289
+ -------
290
+ str
291
+ """
292
+ ndtype = values.dtype.name
293
+ if ndtype == "object":
294
+ # it's cheaper to use a String Hash Table than Object; we infer
295
+ # including nulls because that is the only difference between
296
+ # StringHashTable and ObjectHashtable
297
+ if lib.is_string_array(values, skipna=False):
298
+ ndtype = "string"
299
+ return ndtype
300
+
301
+
302
+ # --------------- #
303
+ # top-level algos #
304
+ # --------------- #
305
+
306
+
307
+ def unique(values):
308
+ """
309
+ Return unique values based on a hash table.
310
+
311
+ Uniques are returned in order of appearance. This does NOT sort.
312
+
313
+ Significantly faster than numpy.unique for long enough sequences.
314
+ Includes NA values.
315
+
316
+ Parameters
317
+ ----------
318
+ values : 1d array-like
319
+
320
+ Returns
321
+ -------
322
+ numpy.ndarray or ExtensionArray
323
+
324
+ The return can be:
325
+
326
+ * Index : when the input is an Index
327
+ * Categorical : when the input is a Categorical dtype
328
+ * ndarray : when the input is a Series/ndarray
329
+
330
+ Return numpy.ndarray or ExtensionArray.
331
+
332
+ See Also
333
+ --------
334
+ Index.unique : Return unique values from an Index.
335
+ Series.unique : Return unique values of Series object.
336
+
337
+ Examples
338
+ --------
339
+ >>> pd.unique(pd.Series([2, 1, 3, 3]))
340
+ array([2, 1, 3])
341
+
342
+ >>> pd.unique(pd.Series([2] + [1] * 5))
343
+ array([2, 1])
344
+
345
+ >>> pd.unique(pd.Series([pd.Timestamp("20160101"), pd.Timestamp("20160101")]))
346
+ array(['2016-01-01T00:00:00.000000000'], dtype='datetime64[ns]')
347
+
348
+ >>> pd.unique(
349
+ ... pd.Series(
350
+ ... [
351
+ ... pd.Timestamp("20160101", tz="US/Eastern"),
352
+ ... pd.Timestamp("20160101", tz="US/Eastern"),
353
+ ... ]
354
+ ... )
355
+ ... )
356
+ <DatetimeArray>
357
+ ['2016-01-01 00:00:00-05:00']
358
+ Length: 1, dtype: datetime64[ns, US/Eastern]
359
+
360
+ >>> pd.unique(
361
+ ... pd.Index(
362
+ ... [
363
+ ... pd.Timestamp("20160101", tz="US/Eastern"),
364
+ ... pd.Timestamp("20160101", tz="US/Eastern"),
365
+ ... ]
366
+ ... )
367
+ ... )
368
+ DatetimeIndex(['2016-01-01 00:00:00-05:00'],
369
+ dtype='datetime64[ns, US/Eastern]',
370
+ freq=None)
371
+
372
+ >>> pd.unique(np.array(list("baabc"), dtype="O"))
373
+ array(['b', 'a', 'c'], dtype=object)
374
+
375
+ An unordered Categorical will return categories in the
376
+ order of appearance.
377
+
378
+ >>> pd.unique(pd.Series(pd.Categorical(list("baabc"))))
379
+ ['b', 'a', 'c']
380
+ Categories (3, object): ['a', 'b', 'c']
381
+
382
+ >>> pd.unique(pd.Series(pd.Categorical(list("baabc"), categories=list("abc"))))
383
+ ['b', 'a', 'c']
384
+ Categories (3, object): ['a', 'b', 'c']
385
+
386
+ An ordered Categorical preserves the category ordering.
387
+
388
+ >>> pd.unique(
389
+ ... pd.Series(
390
+ ... pd.Categorical(list("baabc"), categories=list("abc"), ordered=True)
391
+ ... )
392
+ ... )
393
+ ['b', 'a', 'c']
394
+ Categories (3, object): ['a' < 'b' < 'c']
395
+
396
+ An array of tuples
397
+
398
+ >>> pd.unique(pd.Series([("a", "b"), ("b", "a"), ("a", "c"), ("b", "a")]).values)
399
+ array([('a', 'b'), ('b', 'a'), ('a', 'c')], dtype=object)
400
+ """
401
+ return unique_with_mask(values)
402
+
403
+
404
+ def nunique_ints(values: ArrayLike) -> int:
405
+ """
406
+ Return the number of unique values for integer array-likes.
407
+
408
+ Significantly faster than pandas.unique for long enough sequences.
409
+ No checks are done to ensure input is integral.
410
+
411
+ Parameters
412
+ ----------
413
+ values : 1d array-like
414
+
415
+ Returns
416
+ -------
417
+ int : The number of unique values in ``values``
418
+ """
419
+ if len(values) == 0:
420
+ return 0
421
+ values = _ensure_data(values)
422
+ # bincount requires intp
423
+ result = (np.bincount(values.ravel().astype("intp")) != 0).sum()
424
+ return result
425
+
426
+
427
+ def unique_with_mask(values, mask: npt.NDArray[np.bool_] | None = None):
428
+ """See algorithms.unique for docs. Takes a mask for masked arrays."""
429
+ values = _ensure_arraylike(values, func_name="unique")
430
+
431
+ if isinstance(values.dtype, ExtensionDtype):
432
+ # Dispatch to extension dtype's unique.
433
+ return values.unique()
434
+
435
+ original = values
436
+ hashtable, values = _get_hashtable_algo(values)
437
+
438
+ table = hashtable(len(values))
439
+ if mask is None:
440
+ uniques = table.unique(values)
441
+ uniques = _reconstruct_data(uniques, original.dtype, original)
442
+ return uniques
443
+
444
+ else:
445
+ uniques, mask = table.unique(values, mask=mask)
446
+ uniques = _reconstruct_data(uniques, original.dtype, original)
447
+ assert mask is not None # for mypy
448
+ return uniques, mask.astype("bool")
449
+
450
+
451
+ unique1d = unique
452
+
453
+
454
+ _MINIMUM_COMP_ARR_LEN = 1_000_000
455
+
456
+
457
+ def isin(comps: ListLike, values: ListLike) -> npt.NDArray[np.bool_]:
458
+ """
459
+ Compute the isin boolean array.
460
+
461
+ Parameters
462
+ ----------
463
+ comps : list-like
464
+ values : list-like
465
+
466
+ Returns
467
+ -------
468
+ ndarray[bool]
469
+ Same length as `comps`.
470
+ """
471
+ if not is_list_like(comps):
472
+ raise TypeError(
473
+ "only list-like objects are allowed to be passed "
474
+ f"to isin(), you passed a `{type(comps).__name__}`"
475
+ )
476
+ if not is_list_like(values):
477
+ raise TypeError(
478
+ "only list-like objects are allowed to be passed "
479
+ f"to isin(), you passed a `{type(values).__name__}`"
480
+ )
481
+
482
+ if not isinstance(values, (ABCIndex, ABCSeries, ABCExtensionArray, np.ndarray)):
483
+ orig_values = list(values)
484
+ values = _ensure_arraylike(orig_values, func_name="isin-targets")
485
+
486
+ if (
487
+ len(values) > 0
488
+ and values.dtype.kind in "iufcb"
489
+ and not is_signed_integer_dtype(comps)
490
+ ):
491
+ # GH#46485 Use object to avoid upcast to float64 later
492
+ # TODO: Share with _find_common_type_compat
493
+ values = construct_1d_object_array_from_listlike(orig_values)
494
+
495
+ elif isinstance(values, ABCMultiIndex):
496
+ # Avoid raising in extract_array
497
+ values = np.array(values)
498
+ else:
499
+ values = extract_array(values, extract_numpy=True, extract_range=True)
500
+
501
+ comps_array = _ensure_arraylike(comps, func_name="isin")
502
+ comps_array = extract_array(comps_array, extract_numpy=True)
503
+ if not isinstance(comps_array, np.ndarray):
504
+ # i.e. Extension Array
505
+ return comps_array.isin(values)
506
+
507
+ elif needs_i8_conversion(comps_array.dtype):
508
+ # Dispatch to DatetimeLikeArrayMixin.isin
509
+ return pd_array(comps_array).isin(values)
510
+ elif needs_i8_conversion(values.dtype) and not is_object_dtype(comps_array.dtype):
511
+ # e.g. comps_array are integers and values are datetime64s
512
+ return np.zeros(comps_array.shape, dtype=bool)
513
+ # TODO: not quite right ... Sparse/Categorical
514
+ elif needs_i8_conversion(values.dtype):
515
+ return isin(comps_array, values.astype(object))
516
+
517
+ elif isinstance(values.dtype, ExtensionDtype):
518
+ return isin(np.asarray(comps_array), np.asarray(values))
519
+
520
+ # GH16012
521
+ # Ensure np.isin doesn't get object types or it *may* throw an exception
522
+ # Albeit hashmap has O(1) look-up (vs. O(logn) in sorted array),
523
+ # isin is faster for small sizes
524
+ if (
525
+ len(comps_array) > _MINIMUM_COMP_ARR_LEN
526
+ and len(values) <= 26
527
+ and comps_array.dtype != object
528
+ ):
529
+ # If the values include nan we need to check for nan explicitly
530
+ # since np.nan it not equal to np.nan
531
+ if isna(values).any():
532
+
533
+ def f(c, v):
534
+ return np.logical_or(np.isin(c, v).ravel(), np.isnan(c))
535
+
536
+ else:
537
+ f = lambda a, b: np.isin(a, b).ravel()
538
+
539
+ else:
540
+ common = np_find_common_type(values.dtype, comps_array.dtype)
541
+ values = values.astype(common, copy=False)
542
+ comps_array = comps_array.astype(common, copy=False)
543
+ f = htable.ismember
544
+
545
+ return f(comps_array, values)
546
+
547
+
548
+ def factorize_array(
549
+ values: np.ndarray,
550
+ use_na_sentinel: bool = True,
551
+ size_hint: int | None = None,
552
+ na_value: object = None,
553
+ mask: npt.NDArray[np.bool_] | None = None,
554
+ ) -> tuple[npt.NDArray[np.intp], np.ndarray]:
555
+ """
556
+ Factorize a numpy array to codes and uniques.
557
+
558
+ This doesn't do any coercion of types or unboxing before factorization.
559
+
560
+ Parameters
561
+ ----------
562
+ values : ndarray
563
+ use_na_sentinel : bool, default True
564
+ If True, the sentinel -1 will be used for NaN values. If False,
565
+ NaN values will be encoded as non-negative integers and will not drop the
566
+ NaN from the uniques of the values.
567
+ size_hint : int, optional
568
+ Passed through to the hashtable's 'get_labels' method
569
+ na_value : object, optional
570
+ A value in `values` to consider missing. Note: only use this
571
+ parameter when you know that you don't have any values pandas would
572
+ consider missing in the array (NaN for float data, iNaT for
573
+ datetimes, etc.).
574
+ mask : ndarray[bool], optional
575
+ If not None, the mask is used as indicator for missing values
576
+ (True = missing, False = valid) instead of `na_value` or
577
+ condition "val != val".
578
+
579
+ Returns
580
+ -------
581
+ codes : ndarray[np.intp]
582
+ uniques : ndarray
583
+ """
584
+ original = values
585
+ if values.dtype.kind in "mM":
586
+ # _get_hashtable_algo will cast dt64/td64 to i8 via _ensure_data, so we
587
+ # need to do the same to na_value. We are assuming here that the passed
588
+ # na_value is an appropriately-typed NaT.
589
+ # e.g. test_where_datetimelike_categorical
590
+ na_value = iNaT
591
+
592
+ hash_klass, values = _get_hashtable_algo(values)
593
+
594
+ table = hash_klass(size_hint or len(values))
595
+ uniques, codes = table.factorize(
596
+ values,
597
+ na_sentinel=-1,
598
+ na_value=na_value,
599
+ mask=mask,
600
+ ignore_na=use_na_sentinel,
601
+ )
602
+
603
+ # re-cast e.g. i8->dt64/td64, uint8->bool
604
+ uniques = _reconstruct_data(uniques, original.dtype, original)
605
+
606
+ codes = ensure_platform_int(codes)
607
+ return codes, uniques
608
+
609
+
610
+ @doc(
611
+ values=dedent(
612
+ """\
613
+ values : sequence
614
+ A 1-D sequence. Sequences that aren't pandas objects are
615
+ coerced to ndarrays before factorization.
616
+ """
617
+ ),
618
+ sort=dedent(
619
+ """\
620
+ sort : bool, default False
621
+ Sort `uniques` and shuffle `codes` to maintain the
622
+ relationship.
623
+ """
624
+ ),
625
+ size_hint=dedent(
626
+ """\
627
+ size_hint : int, optional
628
+ Hint to the hashtable sizer.
629
+ """
630
+ ),
631
+ )
632
+ def factorize(
633
+ values,
634
+ sort: bool = False,
635
+ use_na_sentinel: bool = True,
636
+ size_hint: int | None = None,
637
+ ) -> tuple[np.ndarray, np.ndarray | Index]:
638
+ """
639
+ Encode the object as an enumerated type or categorical variable.
640
+
641
+ This method is useful for obtaining a numeric representation of an
642
+ array when all that matters is identifying distinct values. `factorize`
643
+ is available as both a top-level function :func:`pandas.factorize`,
644
+ and as a method :meth:`Series.factorize` and :meth:`Index.factorize`.
645
+
646
+ Parameters
647
+ ----------
648
+ {values}{sort}
649
+ use_na_sentinel : bool, default True
650
+ If True, the sentinel -1 will be used for NaN values. If False,
651
+ NaN values will be encoded as non-negative integers and will not drop the
652
+ NaN from the uniques of the values.
653
+
654
+ .. versionadded:: 1.5.0
655
+ {size_hint}\
656
+
657
+ Returns
658
+ -------
659
+ codes : ndarray
660
+ An integer ndarray that's an indexer into `uniques`.
661
+ ``uniques.take(codes)`` will have the same values as `values`.
662
+ uniques : ndarray, Index, or Categorical
663
+ The unique valid values. When `values` is Categorical, `uniques`
664
+ is a Categorical. When `values` is some other pandas object, an
665
+ `Index` is returned. Otherwise, a 1-D ndarray is returned.
666
+
667
+ .. note::
668
+
669
+ Even if there's a missing value in `values`, `uniques` will
670
+ *not* contain an entry for it.
671
+
672
+ See Also
673
+ --------
674
+ cut : Discretize continuous-valued array.
675
+ unique : Find the unique value in an array.
676
+
677
+ Notes
678
+ -----
679
+ Reference :ref:`the user guide <reshaping.factorize>` for more examples.
680
+
681
+ Examples
682
+ --------
683
+ These examples all show factorize as a top-level method like
684
+ ``pd.factorize(values)``. The results are identical for methods like
685
+ :meth:`Series.factorize`.
686
+
687
+ >>> codes, uniques = pd.factorize(np.array(['b', 'b', 'a', 'c', 'b'], dtype="O"))
688
+ >>> codes
689
+ array([0, 0, 1, 2, 0])
690
+ >>> uniques
691
+ array(['b', 'a', 'c'], dtype=object)
692
+
693
+ With ``sort=True``, the `uniques` will be sorted, and `codes` will be
694
+ shuffled so that the relationship is the maintained.
695
+
696
+ >>> codes, uniques = pd.factorize(np.array(['b', 'b', 'a', 'c', 'b'], dtype="O"),
697
+ ... sort=True)
698
+ >>> codes
699
+ array([1, 1, 0, 2, 1])
700
+ >>> uniques
701
+ array(['a', 'b', 'c'], dtype=object)
702
+
703
+ When ``use_na_sentinel=True`` (the default), missing values are indicated in
704
+ the `codes` with the sentinel value ``-1`` and missing values are not
705
+ included in `uniques`.
706
+
707
+ >>> codes, uniques = pd.factorize(np.array(['b', None, 'a', 'c', 'b'], dtype="O"))
708
+ >>> codes
709
+ array([ 0, -1, 1, 2, 0])
710
+ >>> uniques
711
+ array(['b', 'a', 'c'], dtype=object)
712
+
713
+ Thus far, we've only factorized lists (which are internally coerced to
714
+ NumPy arrays). When factorizing pandas objects, the type of `uniques`
715
+ will differ. For Categoricals, a `Categorical` is returned.
716
+
717
+ >>> cat = pd.Categorical(['a', 'a', 'c'], categories=['a', 'b', 'c'])
718
+ >>> codes, uniques = pd.factorize(cat)
719
+ >>> codes
720
+ array([0, 0, 1])
721
+ >>> uniques
722
+ ['a', 'c']
723
+ Categories (3, object): ['a', 'b', 'c']
724
+
725
+ Notice that ``'b'`` is in ``uniques.categories``, despite not being
726
+ present in ``cat.values``.
727
+
728
+ For all other pandas objects, an Index of the appropriate type is
729
+ returned.
730
+
731
+ >>> cat = pd.Series(['a', 'a', 'c'])
732
+ >>> codes, uniques = pd.factorize(cat)
733
+ >>> codes
734
+ array([0, 0, 1])
735
+ >>> uniques
736
+ Index(['a', 'c'], dtype='object')
737
+
738
+ If NaN is in the values, and we want to include NaN in the uniques of the
739
+ values, it can be achieved by setting ``use_na_sentinel=False``.
740
+
741
+ >>> values = np.array([1, 2, 1, np.nan])
742
+ >>> codes, uniques = pd.factorize(values) # default: use_na_sentinel=True
743
+ >>> codes
744
+ array([ 0, 1, 0, -1])
745
+ >>> uniques
746
+ array([1., 2.])
747
+
748
+ >>> codes, uniques = pd.factorize(values, use_na_sentinel=False)
749
+ >>> codes
750
+ array([0, 1, 0, 2])
751
+ >>> uniques
752
+ array([ 1., 2., nan])
753
+ """
754
+ # Implementation notes: This method is responsible for 3 things
755
+ # 1.) coercing data to array-like (ndarray, Index, extension array)
756
+ # 2.) factorizing codes and uniques
757
+ # 3.) Maybe boxing the uniques in an Index
758
+ #
759
+ # Step 2 is dispatched to extension types (like Categorical). They are
760
+ # responsible only for factorization. All data coercion, sorting and boxing
761
+ # should happen here.
762
+ if isinstance(values, (ABCIndex, ABCSeries)):
763
+ return values.factorize(sort=sort, use_na_sentinel=use_na_sentinel)
764
+
765
+ values = _ensure_arraylike(values, func_name="factorize")
766
+ original = values
767
+
768
+ if (
769
+ isinstance(values, (ABCDatetimeArray, ABCTimedeltaArray))
770
+ and values.freq is not None
771
+ ):
772
+ # The presence of 'freq' means we can fast-path sorting and know there
773
+ # aren't NAs
774
+ codes, uniques = values.factorize(sort=sort)
775
+ return codes, uniques
776
+
777
+ elif not isinstance(values, np.ndarray):
778
+ # i.e. ExtensionArray
779
+ codes, uniques = values.factorize(use_na_sentinel=use_na_sentinel)
780
+
781
+ else:
782
+ values = np.asarray(values) # convert DTA/TDA/MultiIndex
783
+
784
+ if not use_na_sentinel and values.dtype == object:
785
+ # factorize can now handle differentiating various types of null values.
786
+ # These can only occur when the array has object dtype.
787
+ # However, for backwards compatibility we only use the null for the
788
+ # provided dtype. This may be revisited in the future, see GH#48476.
789
+ null_mask = isna(values)
790
+ if null_mask.any():
791
+ na_value = na_value_for_dtype(values.dtype, compat=False)
792
+ # Don't modify (potentially user-provided) array
793
+ values = np.where(null_mask, na_value, values)
794
+
795
+ codes, uniques = factorize_array(
796
+ values,
797
+ use_na_sentinel=use_na_sentinel,
798
+ size_hint=size_hint,
799
+ )
800
+
801
+ if sort and len(uniques) > 0:
802
+ uniques, codes = safe_sort(
803
+ uniques,
804
+ codes,
805
+ use_na_sentinel=use_na_sentinel,
806
+ assume_unique=True,
807
+ verify=False,
808
+ )
809
+
810
+ uniques = _reconstruct_data(uniques, original.dtype, original)
811
+
812
+ return codes, uniques
813
+
814
+
815
+ def value_counts(
816
+ values,
817
+ sort: bool = True,
818
+ ascending: bool = False,
819
+ normalize: bool = False,
820
+ bins=None,
821
+ dropna: bool = True,
822
+ ) -> Series:
823
+ """
824
+ Compute a histogram of the counts of non-null values.
825
+
826
+ Parameters
827
+ ----------
828
+ values : ndarray (1-d)
829
+ sort : bool, default True
830
+ Sort by values
831
+ ascending : bool, default False
832
+ Sort in ascending order
833
+ normalize: bool, default False
834
+ If True then compute a relative histogram
835
+ bins : integer, optional
836
+ Rather than count values, group them into half-open bins,
837
+ convenience for pd.cut, only works with numeric data
838
+ dropna : bool, default True
839
+ Don't include counts of NaN
840
+
841
+ Returns
842
+ -------
843
+ Series
844
+ """
845
+ warnings.warn(
846
+ # GH#53493
847
+ "pandas.value_counts is deprecated and will be removed in a "
848
+ "future version. Use pd.Series(obj).value_counts() instead.",
849
+ FutureWarning,
850
+ stacklevel=find_stack_level(),
851
+ )
852
+ return value_counts_internal(
853
+ values,
854
+ sort=sort,
855
+ ascending=ascending,
856
+ normalize=normalize,
857
+ bins=bins,
858
+ dropna=dropna,
859
+ )
860
+
861
+
862
+ def value_counts_internal(
863
+ values,
864
+ sort: bool = True,
865
+ ascending: bool = False,
866
+ normalize: bool = False,
867
+ bins=None,
868
+ dropna: bool = True,
869
+ ) -> Series:
870
+ from pandas import (
871
+ Index,
872
+ Series,
873
+ )
874
+
875
+ index_name = getattr(values, "name", None)
876
+ name = "proportion" if normalize else "count"
877
+
878
+ if bins is not None:
879
+ from pandas.core.reshape.tile import cut
880
+
881
+ if isinstance(values, Series):
882
+ values = values._values
883
+
884
+ try:
885
+ ii = cut(values, bins, include_lowest=True)
886
+ except TypeError as err:
887
+ raise TypeError("bins argument only works with numeric data.") from err
888
+
889
+ # count, remove nulls (from the index), and but the bins
890
+ result = ii.value_counts(dropna=dropna)
891
+ result.name = name
892
+ result = result[result.index.notna()]
893
+ result.index = result.index.astype("interval")
894
+ result = result.sort_index()
895
+
896
+ # if we are dropna and we have NO values
897
+ if dropna and (result._values == 0).all():
898
+ result = result.iloc[0:0]
899
+
900
+ # normalizing is by len of all (regardless of dropna)
901
+ counts = np.array([len(ii)])
902
+
903
+ else:
904
+ if is_extension_array_dtype(values):
905
+ # handle Categorical and sparse,
906
+ result = Series(values, copy=False)._values.value_counts(dropna=dropna)
907
+ result.name = name
908
+ result.index.name = index_name
909
+ counts = result._values
910
+ if not isinstance(counts, np.ndarray):
911
+ # e.g. ArrowExtensionArray
912
+ counts = np.asarray(counts)
913
+
914
+ elif isinstance(values, ABCMultiIndex):
915
+ # GH49558
916
+ levels = list(range(values.nlevels))
917
+ result = (
918
+ Series(index=values, name=name)
919
+ .groupby(level=levels, dropna=dropna)
920
+ .size()
921
+ )
922
+ result.index.names = values.names
923
+ counts = result._values
924
+
925
+ else:
926
+ values = _ensure_arraylike(values, func_name="value_counts")
927
+ keys, counts, _ = value_counts_arraylike(values, dropna)
928
+ if keys.dtype == np.float16:
929
+ keys = keys.astype(np.float32)
930
+
931
+ # For backwards compatibility, we let Index do its normal type
932
+ # inference, _except_ for if if infers from object to bool.
933
+ idx = Index(keys)
934
+ if idx.dtype == bool and keys.dtype == object:
935
+ idx = idx.astype(object)
936
+ elif (
937
+ idx.dtype != keys.dtype # noqa: PLR1714 # # pylint: disable=R1714
938
+ and idx.dtype != "string[pyarrow_numpy]"
939
+ ):
940
+ warnings.warn(
941
+ # GH#56161
942
+ "The behavior of value_counts with object-dtype is deprecated. "
943
+ "In a future version, this will *not* perform dtype inference "
944
+ "on the resulting index. To retain the old behavior, use "
945
+ "`result.index = result.index.infer_objects()`",
946
+ FutureWarning,
947
+ stacklevel=find_stack_level(),
948
+ )
949
+ idx.name = index_name
950
+
951
+ result = Series(counts, index=idx, name=name, copy=False)
952
+
953
+ if sort:
954
+ result = result.sort_values(ascending=ascending)
955
+
956
+ if normalize:
957
+ result = result / counts.sum()
958
+
959
+ return result
960
+
961
+
962
+ # Called once from SparseArray, otherwise could be private
963
+ def value_counts_arraylike(
964
+ values: np.ndarray, dropna: bool, mask: npt.NDArray[np.bool_] | None = None
965
+ ) -> tuple[ArrayLike, npt.NDArray[np.int64], int]:
966
+ """
967
+ Parameters
968
+ ----------
969
+ values : np.ndarray
970
+ dropna : bool
971
+ mask : np.ndarray[bool] or None, default None
972
+
973
+ Returns
974
+ -------
975
+ uniques : np.ndarray
976
+ counts : np.ndarray[np.int64]
977
+ """
978
+ original = values
979
+ values = _ensure_data(values)
980
+
981
+ keys, counts, na_counter = htable.value_count(values, dropna, mask=mask)
982
+
983
+ if needs_i8_conversion(original.dtype):
984
+ # datetime, timedelta, or period
985
+
986
+ if dropna:
987
+ mask = keys != iNaT
988
+ keys, counts = keys[mask], counts[mask]
989
+
990
+ res_keys = _reconstruct_data(keys, original.dtype, original)
991
+ return res_keys, counts, na_counter
992
+
993
+
994
+ def duplicated(
995
+ values: ArrayLike,
996
+ keep: Literal["first", "last", False] = "first",
997
+ mask: npt.NDArray[np.bool_] | None = None,
998
+ ) -> npt.NDArray[np.bool_]:
999
+ """
1000
+ Return boolean ndarray denoting duplicate values.
1001
+
1002
+ Parameters
1003
+ ----------
1004
+ values : np.ndarray or ExtensionArray
1005
+ Array over which to check for duplicate values.
1006
+ keep : {'first', 'last', False}, default 'first'
1007
+ - ``first`` : Mark duplicates as ``True`` except for the first
1008
+ occurrence.
1009
+ - ``last`` : Mark duplicates as ``True`` except for the last
1010
+ occurrence.
1011
+ - False : Mark all duplicates as ``True``.
1012
+ mask : ndarray[bool], optional
1013
+ array indicating which elements to exclude from checking
1014
+
1015
+ Returns
1016
+ -------
1017
+ duplicated : ndarray[bool]
1018
+ """
1019
+ values = _ensure_data(values)
1020
+ return htable.duplicated(values, keep=keep, mask=mask)
1021
+
1022
+
1023
+ def mode(
1024
+ values: ArrayLike, dropna: bool = True, mask: npt.NDArray[np.bool_] | None = None
1025
+ ) -> ArrayLike:
1026
+ """
1027
+ Returns the mode(s) of an array.
1028
+
1029
+ Parameters
1030
+ ----------
1031
+ values : array-like
1032
+ Array over which to check for duplicate values.
1033
+ dropna : bool, default True
1034
+ Don't consider counts of NaN/NaT.
1035
+
1036
+ Returns
1037
+ -------
1038
+ np.ndarray or ExtensionArray
1039
+ """
1040
+ values = _ensure_arraylike(values, func_name="mode")
1041
+ original = values
1042
+
1043
+ if needs_i8_conversion(values.dtype):
1044
+ # Got here with ndarray; dispatch to DatetimeArray/TimedeltaArray.
1045
+ values = ensure_wrapped_if_datetimelike(values)
1046
+ values = cast("ExtensionArray", values)
1047
+ return values._mode(dropna=dropna)
1048
+
1049
+ values = _ensure_data(values)
1050
+
1051
+ npresult, res_mask = htable.mode(values, dropna=dropna, mask=mask)
1052
+ if res_mask is not None:
1053
+ return npresult, res_mask # type: ignore[return-value]
1054
+
1055
+ try:
1056
+ npresult = np.sort(npresult)
1057
+ except TypeError as err:
1058
+ warnings.warn(
1059
+ f"Unable to sort modes: {err}",
1060
+ stacklevel=find_stack_level(),
1061
+ )
1062
+
1063
+ result = _reconstruct_data(npresult, original.dtype, original)
1064
+ return result
1065
+
1066
+
1067
+ def rank(
1068
+ values: ArrayLike,
1069
+ axis: AxisInt = 0,
1070
+ method: str = "average",
1071
+ na_option: str = "keep",
1072
+ ascending: bool = True,
1073
+ pct: bool = False,
1074
+ ) -> npt.NDArray[np.float64]:
1075
+ """
1076
+ Rank the values along a given axis.
1077
+
1078
+ Parameters
1079
+ ----------
1080
+ values : np.ndarray or ExtensionArray
1081
+ Array whose values will be ranked. The number of dimensions in this
1082
+ array must not exceed 2.
1083
+ axis : int, default 0
1084
+ Axis over which to perform rankings.
1085
+ method : {'average', 'min', 'max', 'first', 'dense'}, default 'average'
1086
+ The method by which tiebreaks are broken during the ranking.
1087
+ na_option : {'keep', 'top'}, default 'keep'
1088
+ The method by which NaNs are placed in the ranking.
1089
+ - ``keep``: rank each NaN value with a NaN ranking
1090
+ - ``top``: replace each NaN with either +/- inf so that they
1091
+ there are ranked at the top
1092
+ ascending : bool, default True
1093
+ Whether or not the elements should be ranked in ascending order.
1094
+ pct : bool, default False
1095
+ Whether or not to the display the returned rankings in integer form
1096
+ (e.g. 1, 2, 3) or in percentile form (e.g. 0.333..., 0.666..., 1).
1097
+ """
1098
+ is_datetimelike = needs_i8_conversion(values.dtype)
1099
+ values = _ensure_data(values)
1100
+
1101
+ if values.ndim == 1:
1102
+ ranks = algos.rank_1d(
1103
+ values,
1104
+ is_datetimelike=is_datetimelike,
1105
+ ties_method=method,
1106
+ ascending=ascending,
1107
+ na_option=na_option,
1108
+ pct=pct,
1109
+ )
1110
+ elif values.ndim == 2:
1111
+ ranks = algos.rank_2d(
1112
+ values,
1113
+ axis=axis,
1114
+ is_datetimelike=is_datetimelike,
1115
+ ties_method=method,
1116
+ ascending=ascending,
1117
+ na_option=na_option,
1118
+ pct=pct,
1119
+ )
1120
+ else:
1121
+ raise TypeError("Array with ndim > 2 are not supported.")
1122
+
1123
+ return ranks
1124
+
1125
+
1126
+ # ---- #
1127
+ # take #
1128
+ # ---- #
1129
+
1130
+
1131
+ def take(
1132
+ arr,
1133
+ indices: TakeIndexer,
1134
+ axis: AxisInt = 0,
1135
+ allow_fill: bool = False,
1136
+ fill_value=None,
1137
+ ):
1138
+ """
1139
+ Take elements from an array.
1140
+
1141
+ Parameters
1142
+ ----------
1143
+ arr : array-like or scalar value
1144
+ Non array-likes (sequences/scalars without a dtype) are coerced
1145
+ to an ndarray.
1146
+
1147
+ .. deprecated:: 2.1.0
1148
+ Passing an argument other than a numpy.ndarray, ExtensionArray,
1149
+ Index, or Series is deprecated.
1150
+
1151
+ indices : sequence of int or one-dimensional np.ndarray of int
1152
+ Indices to be taken.
1153
+ axis : int, default 0
1154
+ The axis over which to select values.
1155
+ allow_fill : bool, default False
1156
+ How to handle negative values in `indices`.
1157
+
1158
+ * False: negative values in `indices` indicate positional indices
1159
+ from the right (the default). This is similar to :func:`numpy.take`.
1160
+
1161
+ * True: negative values in `indices` indicate
1162
+ missing values. These values are set to `fill_value`. Any other
1163
+ negative values raise a ``ValueError``.
1164
+
1165
+ fill_value : any, optional
1166
+ Fill value to use for NA-indices when `allow_fill` is True.
1167
+ This may be ``None``, in which case the default NA value for
1168
+ the type (``self.dtype.na_value``) is used.
1169
+
1170
+ For multi-dimensional `arr`, each *element* is filled with
1171
+ `fill_value`.
1172
+
1173
+ Returns
1174
+ -------
1175
+ ndarray or ExtensionArray
1176
+ Same type as the input.
1177
+
1178
+ Raises
1179
+ ------
1180
+ IndexError
1181
+ When `indices` is out of bounds for the array.
1182
+ ValueError
1183
+ When the indexer contains negative values other than ``-1``
1184
+ and `allow_fill` is True.
1185
+
1186
+ Notes
1187
+ -----
1188
+ When `allow_fill` is False, `indices` may be whatever dimensionality
1189
+ is accepted by NumPy for `arr`.
1190
+
1191
+ When `allow_fill` is True, `indices` should be 1-D.
1192
+
1193
+ See Also
1194
+ --------
1195
+ numpy.take : Take elements from an array along an axis.
1196
+
1197
+ Examples
1198
+ --------
1199
+ >>> import pandas as pd
1200
+
1201
+ With the default ``allow_fill=False``, negative numbers indicate
1202
+ positional indices from the right.
1203
+
1204
+ >>> pd.api.extensions.take(np.array([10, 20, 30]), [0, 0, -1])
1205
+ array([10, 10, 30])
1206
+
1207
+ Setting ``allow_fill=True`` will place `fill_value` in those positions.
1208
+
1209
+ >>> pd.api.extensions.take(np.array([10, 20, 30]), [0, 0, -1], allow_fill=True)
1210
+ array([10., 10., nan])
1211
+
1212
+ >>> pd.api.extensions.take(np.array([10, 20, 30]), [0, 0, -1], allow_fill=True,
1213
+ ... fill_value=-10)
1214
+ array([ 10, 10, -10])
1215
+ """
1216
+ if not isinstance(arr, (np.ndarray, ABCExtensionArray, ABCIndex, ABCSeries)):
1217
+ # GH#52981
1218
+ warnings.warn(
1219
+ "pd.api.extensions.take accepting non-standard inputs is deprecated "
1220
+ "and will raise in a future version. Pass either a numpy.ndarray, "
1221
+ "ExtensionArray, Index, or Series instead.",
1222
+ FutureWarning,
1223
+ stacklevel=find_stack_level(),
1224
+ )
1225
+
1226
+ if not is_array_like(arr):
1227
+ arr = np.asarray(arr)
1228
+
1229
+ indices = ensure_platform_int(indices)
1230
+
1231
+ if allow_fill:
1232
+ # Pandas style, -1 means NA
1233
+ validate_indices(indices, arr.shape[axis])
1234
+ result = take_nd(
1235
+ arr, indices, axis=axis, allow_fill=True, fill_value=fill_value
1236
+ )
1237
+ else:
1238
+ # NumPy style
1239
+ result = arr.take(indices, axis=axis)
1240
+ return result
1241
+
1242
+
1243
+ # ------------ #
1244
+ # searchsorted #
1245
+ # ------------ #
1246
+
1247
+
1248
+ def searchsorted(
1249
+ arr: ArrayLike,
1250
+ value: NumpyValueArrayLike | ExtensionArray,
1251
+ side: Literal["left", "right"] = "left",
1252
+ sorter: NumpySorter | None = None,
1253
+ ) -> npt.NDArray[np.intp] | np.intp:
1254
+ """
1255
+ Find indices where elements should be inserted to maintain order.
1256
+
1257
+ Find the indices into a sorted array `arr` (a) such that, if the
1258
+ corresponding elements in `value` were inserted before the indices,
1259
+ the order of `arr` would be preserved.
1260
+
1261
+ Assuming that `arr` is sorted:
1262
+
1263
+ ====== ================================
1264
+ `side` returned index `i` satisfies
1265
+ ====== ================================
1266
+ left ``arr[i-1] < value <= self[i]``
1267
+ right ``arr[i-1] <= value < self[i]``
1268
+ ====== ================================
1269
+
1270
+ Parameters
1271
+ ----------
1272
+ arr: np.ndarray, ExtensionArray, Series
1273
+ Input array. If `sorter` is None, then it must be sorted in
1274
+ ascending order, otherwise `sorter` must be an array of indices
1275
+ that sort it.
1276
+ value : array-like or scalar
1277
+ Values to insert into `arr`.
1278
+ side : {'left', 'right'}, optional
1279
+ If 'left', the index of the first suitable location found is given.
1280
+ If 'right', return the last such index. If there is no suitable
1281
+ index, return either 0 or N (where N is the length of `self`).
1282
+ sorter : 1-D array-like, optional
1283
+ Optional array of integer indices that sort array a into ascending
1284
+ order. They are typically the result of argsort.
1285
+
1286
+ Returns
1287
+ -------
1288
+ array of ints or int
1289
+ If value is array-like, array of insertion points.
1290
+ If value is scalar, a single integer.
1291
+
1292
+ See Also
1293
+ --------
1294
+ numpy.searchsorted : Similar method from NumPy.
1295
+ """
1296
+ if sorter is not None:
1297
+ sorter = ensure_platform_int(sorter)
1298
+
1299
+ if (
1300
+ isinstance(arr, np.ndarray)
1301
+ and arr.dtype.kind in "iu"
1302
+ and (is_integer(value) or is_integer_dtype(value))
1303
+ ):
1304
+ # if `arr` and `value` have different dtypes, `arr` would be
1305
+ # recast by numpy, causing a slow search.
1306
+ # Before searching below, we therefore try to give `value` the
1307
+ # same dtype as `arr`, while guarding against integer overflows.
1308
+ iinfo = np.iinfo(arr.dtype.type)
1309
+ value_arr = np.array([value]) if is_integer(value) else np.array(value)
1310
+ if (value_arr >= iinfo.min).all() and (value_arr <= iinfo.max).all():
1311
+ # value within bounds, so no overflow, so can convert value dtype
1312
+ # to dtype of arr
1313
+ dtype = arr.dtype
1314
+ else:
1315
+ dtype = value_arr.dtype
1316
+
1317
+ if is_integer(value):
1318
+ # We know that value is int
1319
+ value = cast(int, dtype.type(value))
1320
+ else:
1321
+ value = pd_array(cast(ArrayLike, value), dtype=dtype)
1322
+ else:
1323
+ # E.g. if `arr` is an array with dtype='datetime64[ns]'
1324
+ # and `value` is a pd.Timestamp, we may need to convert value
1325
+ arr = ensure_wrapped_if_datetimelike(arr)
1326
+
1327
+ # Argument 1 to "searchsorted" of "ndarray" has incompatible type
1328
+ # "Union[NumpyValueArrayLike, ExtensionArray]"; expected "NumpyValueArrayLike"
1329
+ return arr.searchsorted(value, side=side, sorter=sorter) # type: ignore[arg-type]
1330
+
1331
+
1332
+ # ---- #
1333
+ # diff #
1334
+ # ---- #
1335
+
1336
+ _diff_special = {"float64", "float32", "int64", "int32", "int16", "int8"}
1337
+
1338
+
1339
+ def diff(arr, n: int, axis: AxisInt = 0):
1340
+ """
1341
+ difference of n between self,
1342
+ analogous to s-s.shift(n)
1343
+
1344
+ Parameters
1345
+ ----------
1346
+ arr : ndarray or ExtensionArray
1347
+ n : int
1348
+ number of periods
1349
+ axis : {0, 1}
1350
+ axis to shift on
1351
+ stacklevel : int, default 3
1352
+ The stacklevel for the lost dtype warning.
1353
+
1354
+ Returns
1355
+ -------
1356
+ shifted
1357
+ """
1358
+
1359
+ n = int(n)
1360
+ na = np.nan
1361
+ dtype = arr.dtype
1362
+
1363
+ is_bool = is_bool_dtype(dtype)
1364
+ if is_bool:
1365
+ op = operator.xor
1366
+ else:
1367
+ op = operator.sub
1368
+
1369
+ if isinstance(dtype, NumpyEADtype):
1370
+ # NumpyExtensionArray cannot necessarily hold shifted versions of itself.
1371
+ arr = arr.to_numpy()
1372
+ dtype = arr.dtype
1373
+
1374
+ if not isinstance(arr, np.ndarray):
1375
+ # i.e ExtensionArray
1376
+ if hasattr(arr, f"__{op.__name__}__"):
1377
+ if axis != 0:
1378
+ raise ValueError(f"cannot diff {type(arr).__name__} on axis={axis}")
1379
+ return op(arr, arr.shift(n))
1380
+ else:
1381
+ raise TypeError(
1382
+ f"{type(arr).__name__} has no 'diff' method. "
1383
+ "Convert to a suitable dtype prior to calling 'diff'."
1384
+ )
1385
+
1386
+ is_timedelta = False
1387
+ if arr.dtype.kind in "mM":
1388
+ dtype = np.int64
1389
+ arr = arr.view("i8")
1390
+ na = iNaT
1391
+ is_timedelta = True
1392
+
1393
+ elif is_bool:
1394
+ # We have to cast in order to be able to hold np.nan
1395
+ dtype = np.object_
1396
+
1397
+ elif dtype.kind in "iu":
1398
+ # We have to cast in order to be able to hold np.nan
1399
+
1400
+ # int8, int16 are incompatible with float64,
1401
+ # see https://github.com/cython/cython/issues/2646
1402
+ if arr.dtype.name in ["int8", "int16"]:
1403
+ dtype = np.float32
1404
+ else:
1405
+ dtype = np.float64
1406
+
1407
+ orig_ndim = arr.ndim
1408
+ if orig_ndim == 1:
1409
+ # reshape so we can always use algos.diff_2d
1410
+ arr = arr.reshape(-1, 1)
1411
+ # TODO: require axis == 0
1412
+
1413
+ dtype = np.dtype(dtype)
1414
+ out_arr = np.empty(arr.shape, dtype=dtype)
1415
+
1416
+ na_indexer = [slice(None)] * 2
1417
+ na_indexer[axis] = slice(None, n) if n >= 0 else slice(n, None)
1418
+ out_arr[tuple(na_indexer)] = na
1419
+
1420
+ if arr.dtype.name in _diff_special:
1421
+ # TODO: can diff_2d dtype specialization troubles be fixed by defining
1422
+ # out_arr inside diff_2d?
1423
+ algos.diff_2d(arr, out_arr, n, axis, datetimelike=is_timedelta)
1424
+ else:
1425
+ # To keep mypy happy, _res_indexer is a list while res_indexer is
1426
+ # a tuple, ditto for lag_indexer.
1427
+ _res_indexer = [slice(None)] * 2
1428
+ _res_indexer[axis] = slice(n, None) if n >= 0 else slice(None, n)
1429
+ res_indexer = tuple(_res_indexer)
1430
+
1431
+ _lag_indexer = [slice(None)] * 2
1432
+ _lag_indexer[axis] = slice(None, -n) if n > 0 else slice(-n, None)
1433
+ lag_indexer = tuple(_lag_indexer)
1434
+
1435
+ out_arr[res_indexer] = op(arr[res_indexer], arr[lag_indexer])
1436
+
1437
+ if is_timedelta:
1438
+ out_arr = out_arr.view("timedelta64[ns]")
1439
+
1440
+ if orig_ndim == 1:
1441
+ out_arr = out_arr[:, 0]
1442
+ return out_arr
1443
+
1444
+
1445
+ # --------------------------------------------------------------------
1446
+ # Helper functions
1447
+
1448
+
1449
+ # Note: safe_sort is in algorithms.py instead of sorting.py because it is
1450
+ # low-dependency, is used in this module, and used private methods from
1451
+ # this module.
1452
+ def safe_sort(
1453
+ values: Index | ArrayLike,
1454
+ codes: npt.NDArray[np.intp] | None = None,
1455
+ use_na_sentinel: bool = True,
1456
+ assume_unique: bool = False,
1457
+ verify: bool = True,
1458
+ ) -> AnyArrayLike | tuple[AnyArrayLike, np.ndarray]:
1459
+ """
1460
+ Sort ``values`` and reorder corresponding ``codes``.
1461
+
1462
+ ``values`` should be unique if ``codes`` is not None.
1463
+ Safe for use with mixed types (int, str), orders ints before strs.
1464
+
1465
+ Parameters
1466
+ ----------
1467
+ values : list-like
1468
+ Sequence; must be unique if ``codes`` is not None.
1469
+ codes : np.ndarray[intp] or None, default None
1470
+ Indices to ``values``. All out of bound indices are treated as
1471
+ "not found" and will be masked with ``-1``.
1472
+ use_na_sentinel : bool, default True
1473
+ If True, the sentinel -1 will be used for NaN values. If False,
1474
+ NaN values will be encoded as non-negative integers and will not drop the
1475
+ NaN from the uniques of the values.
1476
+ assume_unique : bool, default False
1477
+ When True, ``values`` are assumed to be unique, which can speed up
1478
+ the calculation. Ignored when ``codes`` is None.
1479
+ verify : bool, default True
1480
+ Check if codes are out of bound for the values and put out of bound
1481
+ codes equal to ``-1``. If ``verify=False``, it is assumed there
1482
+ are no out of bound codes. Ignored when ``codes`` is None.
1483
+
1484
+ Returns
1485
+ -------
1486
+ ordered : AnyArrayLike
1487
+ Sorted ``values``
1488
+ new_codes : ndarray
1489
+ Reordered ``codes``; returned when ``codes`` is not None.
1490
+
1491
+ Raises
1492
+ ------
1493
+ TypeError
1494
+ * If ``values`` is not list-like or if ``codes`` is neither None
1495
+ nor list-like
1496
+ * If ``values`` cannot be sorted
1497
+ ValueError
1498
+ * If ``codes`` is not None and ``values`` contain duplicates.
1499
+ """
1500
+ if not isinstance(values, (np.ndarray, ABCExtensionArray, ABCIndex)):
1501
+ raise TypeError(
1502
+ "Only np.ndarray, ExtensionArray, and Index objects are allowed to "
1503
+ "be passed to safe_sort as values"
1504
+ )
1505
+
1506
+ sorter = None
1507
+ ordered: AnyArrayLike
1508
+
1509
+ if (
1510
+ not isinstance(values.dtype, ExtensionDtype)
1511
+ and lib.infer_dtype(values, skipna=False) == "mixed-integer"
1512
+ ):
1513
+ ordered = _sort_mixed(values)
1514
+ else:
1515
+ try:
1516
+ sorter = values.argsort()
1517
+ ordered = values.take(sorter)
1518
+ except (TypeError, decimal.InvalidOperation):
1519
+ # Previous sorters failed or were not applicable, try `_sort_mixed`
1520
+ # which would work, but which fails for special case of 1d arrays
1521
+ # with tuples.
1522
+ if values.size and isinstance(values[0], tuple):
1523
+ # error: Argument 1 to "_sort_tuples" has incompatible type
1524
+ # "Union[Index, ExtensionArray, ndarray[Any, Any]]"; expected
1525
+ # "ndarray[Any, Any]"
1526
+ ordered = _sort_tuples(values) # type: ignore[arg-type]
1527
+ else:
1528
+ ordered = _sort_mixed(values)
1529
+
1530
+ # codes:
1531
+
1532
+ if codes is None:
1533
+ return ordered
1534
+
1535
+ if not is_list_like(codes):
1536
+ raise TypeError(
1537
+ "Only list-like objects or None are allowed to "
1538
+ "be passed to safe_sort as codes"
1539
+ )
1540
+ codes = ensure_platform_int(np.asarray(codes))
1541
+
1542
+ if not assume_unique and not len(unique(values)) == len(values):
1543
+ raise ValueError("values should be unique if codes is not None")
1544
+
1545
+ if sorter is None:
1546
+ # mixed types
1547
+ # error: Argument 1 to "_get_hashtable_algo" has incompatible type
1548
+ # "Union[Index, ExtensionArray, ndarray[Any, Any]]"; expected
1549
+ # "ndarray[Any, Any]"
1550
+ hash_klass, values = _get_hashtable_algo(values) # type: ignore[arg-type]
1551
+ t = hash_klass(len(values))
1552
+ t.map_locations(values)
1553
+ sorter = ensure_platform_int(t.lookup(ordered))
1554
+
1555
+ if use_na_sentinel:
1556
+ # take_nd is faster, but only works for na_sentinels of -1
1557
+ order2 = sorter.argsort()
1558
+ if verify:
1559
+ mask = (codes < -len(values)) | (codes >= len(values))
1560
+ codes[mask] = 0
1561
+ else:
1562
+ mask = None
1563
+ new_codes = take_nd(order2, codes, fill_value=-1)
1564
+ else:
1565
+ reverse_indexer = np.empty(len(sorter), dtype=int)
1566
+ reverse_indexer.put(sorter, np.arange(len(sorter)))
1567
+ # Out of bound indices will be masked with `-1` next, so we
1568
+ # may deal with them here without performance loss using `mode='wrap'`
1569
+ new_codes = reverse_indexer.take(codes, mode="wrap")
1570
+
1571
+ if use_na_sentinel:
1572
+ mask = codes == -1
1573
+ if verify:
1574
+ mask = mask | (codes < -len(values)) | (codes >= len(values))
1575
+
1576
+ if use_na_sentinel and mask is not None:
1577
+ np.putmask(new_codes, mask, -1)
1578
+
1579
+ return ordered, ensure_platform_int(new_codes)
1580
+
1581
+
1582
+ def _sort_mixed(values) -> AnyArrayLike:
1583
+ """order ints before strings before nulls in 1d arrays"""
1584
+ str_pos = np.array([isinstance(x, str) for x in values], dtype=bool)
1585
+ null_pos = np.array([isna(x) for x in values], dtype=bool)
1586
+ num_pos = ~str_pos & ~null_pos
1587
+ str_argsort = np.argsort(values[str_pos])
1588
+ num_argsort = np.argsort(values[num_pos])
1589
+ # convert boolean arrays to positional indices, then order by underlying values
1590
+ str_locs = str_pos.nonzero()[0].take(str_argsort)
1591
+ num_locs = num_pos.nonzero()[0].take(num_argsort)
1592
+ null_locs = null_pos.nonzero()[0]
1593
+ locs = np.concatenate([num_locs, str_locs, null_locs])
1594
+ return values.take(locs)
1595
+
1596
+
1597
+ def _sort_tuples(values: np.ndarray) -> np.ndarray:
1598
+ """
1599
+ Convert array of tuples (1d) to array of arrays (2d).
1600
+ We need to keep the columns separately as they contain different types and
1601
+ nans (can't use `np.sort` as it may fail when str and nan are mixed in a
1602
+ column as types cannot be compared).
1603
+ """
1604
+ from pandas.core.internals.construction import to_arrays
1605
+ from pandas.core.sorting import lexsort_indexer
1606
+
1607
+ arrays, _ = to_arrays(values, None)
1608
+ indexer = lexsort_indexer(arrays, orders=True)
1609
+ return values[indexer]
1610
+
1611
+
1612
+ def union_with_duplicates(
1613
+ lvals: ArrayLike | Index, rvals: ArrayLike | Index
1614
+ ) -> ArrayLike | Index:
1615
+ """
1616
+ Extracts the union from lvals and rvals with respect to duplicates and nans in
1617
+ both arrays.
1618
+
1619
+ Parameters
1620
+ ----------
1621
+ lvals: np.ndarray or ExtensionArray
1622
+ left values which is ordered in front.
1623
+ rvals: np.ndarray or ExtensionArray
1624
+ right values ordered after lvals.
1625
+
1626
+ Returns
1627
+ -------
1628
+ np.ndarray or ExtensionArray
1629
+ Containing the unsorted union of both arrays.
1630
+
1631
+ Notes
1632
+ -----
1633
+ Caller is responsible for ensuring lvals.dtype == rvals.dtype.
1634
+ """
1635
+ from pandas import Series
1636
+
1637
+ with warnings.catch_warnings():
1638
+ # filter warning from object dtype inference; we will end up discarding
1639
+ # the index here, so the deprecation does not affect the end result here.
1640
+ warnings.filterwarnings(
1641
+ "ignore",
1642
+ "The behavior of value_counts with object-dtype is deprecated",
1643
+ category=FutureWarning,
1644
+ )
1645
+ l_count = value_counts_internal(lvals, dropna=False)
1646
+ r_count = value_counts_internal(rvals, dropna=False)
1647
+ l_count, r_count = l_count.align(r_count, fill_value=0)
1648
+ final_count = np.maximum(l_count.values, r_count.values)
1649
+ final_count = Series(final_count, index=l_count.index, dtype="int", copy=False)
1650
+ if isinstance(lvals, ABCMultiIndex) and isinstance(rvals, ABCMultiIndex):
1651
+ unique_vals = lvals.append(rvals).unique()
1652
+ else:
1653
+ if isinstance(lvals, ABCIndex):
1654
+ lvals = lvals._values
1655
+ if isinstance(rvals, ABCIndex):
1656
+ rvals = rvals._values
1657
+ # error: List item 0 has incompatible type "Union[ExtensionArray,
1658
+ # ndarray[Any, Any], Index]"; expected "Union[ExtensionArray,
1659
+ # ndarray[Any, Any]]"
1660
+ combined = concat_compat([lvals, rvals]) # type: ignore[list-item]
1661
+ unique_vals = unique(combined)
1662
+ unique_vals = ensure_wrapped_if_datetimelike(unique_vals)
1663
+ repeats = final_count.reindex(unique_vals).values
1664
+ return np.repeat(unique_vals, repeats)
1665
+
1666
+
1667
+ def map_array(
1668
+ arr: ArrayLike,
1669
+ mapper,
1670
+ na_action: Literal["ignore"] | None = None,
1671
+ convert: bool = True,
1672
+ ) -> np.ndarray | ExtensionArray | Index:
1673
+ """
1674
+ Map values using an input mapping or function.
1675
+
1676
+ Parameters
1677
+ ----------
1678
+ mapper : function, dict, or Series
1679
+ Mapping correspondence.
1680
+ na_action : {None, 'ignore'}, default None
1681
+ If 'ignore', propagate NA values, without passing them to the
1682
+ mapping correspondence.
1683
+ convert : bool, default True
1684
+ Try to find better dtype for elementwise function results. If
1685
+ False, leave as dtype=object.
1686
+
1687
+ Returns
1688
+ -------
1689
+ Union[ndarray, Index, ExtensionArray]
1690
+ The output of the mapping function applied to the array.
1691
+ If the function returns a tuple with more than one element
1692
+ a MultiIndex will be returned.
1693
+ """
1694
+ if na_action not in (None, "ignore"):
1695
+ msg = f"na_action must either be 'ignore' or None, {na_action} was passed"
1696
+ raise ValueError(msg)
1697
+
1698
+ # we can fastpath dict/Series to an efficient map
1699
+ # as we know that we are not going to have to yield
1700
+ # python types
1701
+ if is_dict_like(mapper):
1702
+ if isinstance(mapper, dict) and hasattr(mapper, "__missing__"):
1703
+ # If a dictionary subclass defines a default value method,
1704
+ # convert mapper to a lookup function (GH #15999).
1705
+ dict_with_default = mapper
1706
+ mapper = lambda x: dict_with_default[
1707
+ np.nan if isinstance(x, float) and np.isnan(x) else x
1708
+ ]
1709
+ else:
1710
+ # Dictionary does not have a default. Thus it's safe to
1711
+ # convert to an Series for efficiency.
1712
+ # we specify the keys here to handle the
1713
+ # possibility that they are tuples
1714
+
1715
+ # The return value of mapping with an empty mapper is
1716
+ # expected to be pd.Series(np.nan, ...). As np.nan is
1717
+ # of dtype float64 the return value of this method should
1718
+ # be float64 as well
1719
+ from pandas import Series
1720
+
1721
+ if len(mapper) == 0:
1722
+ mapper = Series(mapper, dtype=np.float64)
1723
+ else:
1724
+ mapper = Series(mapper)
1725
+
1726
+ if isinstance(mapper, ABCSeries):
1727
+ if na_action == "ignore":
1728
+ mapper = mapper[mapper.index.notna()]
1729
+
1730
+ # Since values were input this means we came from either
1731
+ # a dict or a series and mapper should be an index
1732
+ indexer = mapper.index.get_indexer(arr)
1733
+ new_values = take_nd(mapper._values, indexer)
1734
+
1735
+ return new_values
1736
+
1737
+ if not len(arr):
1738
+ return arr.copy()
1739
+
1740
+ # we must convert to python types
1741
+ values = arr.astype(object, copy=False)
1742
+ if na_action is None:
1743
+ return lib.map_infer(values, mapper, convert=convert)
1744
+ else:
1745
+ return lib.map_infer_mask(
1746
+ values, mapper, mask=isna(values).view(np.uint8), convert=convert
1747
+ )
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/flags.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+ import weakref
5
+
6
+ if TYPE_CHECKING:
7
+ from pandas.core.generic import NDFrame
8
+
9
+
10
+ class Flags:
11
+ """
12
+ Flags that apply to pandas objects.
13
+
14
+ Parameters
15
+ ----------
16
+ obj : Series or DataFrame
17
+ The object these flags are associated with.
18
+ allows_duplicate_labels : bool, default True
19
+ Whether to allow duplicate labels in this object. By default,
20
+ duplicate labels are permitted. Setting this to ``False`` will
21
+ cause an :class:`errors.DuplicateLabelError` to be raised when
22
+ `index` (or columns for DataFrame) is not unique, or any
23
+ subsequent operation on introduces duplicates.
24
+ See :ref:`duplicates.disallow` for more.
25
+
26
+ .. warning::
27
+
28
+ This is an experimental feature. Currently, many methods fail to
29
+ propagate the ``allows_duplicate_labels`` value. In future versions
30
+ it is expected that every method taking or returning one or more
31
+ DataFrame or Series objects will propagate ``allows_duplicate_labels``.
32
+
33
+ Examples
34
+ --------
35
+ Attributes can be set in two ways:
36
+
37
+ >>> df = pd.DataFrame()
38
+ >>> df.flags
39
+ <Flags(allows_duplicate_labels=True)>
40
+ >>> df.flags.allows_duplicate_labels = False
41
+ >>> df.flags
42
+ <Flags(allows_duplicate_labels=False)>
43
+
44
+ >>> df.flags['allows_duplicate_labels'] = True
45
+ >>> df.flags
46
+ <Flags(allows_duplicate_labels=True)>
47
+ """
48
+
49
+ _keys: set[str] = {"allows_duplicate_labels"}
50
+
51
+ def __init__(self, obj: NDFrame, *, allows_duplicate_labels: bool) -> None:
52
+ self._allows_duplicate_labels = allows_duplicate_labels
53
+ self._obj = weakref.ref(obj)
54
+
55
+ @property
56
+ def allows_duplicate_labels(self) -> bool:
57
+ """
58
+ Whether this object allows duplicate labels.
59
+
60
+ Setting ``allows_duplicate_labels=False`` ensures that the
61
+ index (and columns of a DataFrame) are unique. Most methods
62
+ that accept and return a Series or DataFrame will propagate
63
+ the value of ``allows_duplicate_labels``.
64
+
65
+ See :ref:`duplicates` for more.
66
+
67
+ See Also
68
+ --------
69
+ DataFrame.attrs : Set global metadata on this object.
70
+ DataFrame.set_flags : Set global flags on this object.
71
+
72
+ Examples
73
+ --------
74
+ >>> df = pd.DataFrame({"A": [1, 2]}, index=['a', 'a'])
75
+ >>> df.flags.allows_duplicate_labels
76
+ True
77
+ >>> df.flags.allows_duplicate_labels = False
78
+ Traceback (most recent call last):
79
+ ...
80
+ pandas.errors.DuplicateLabelError: Index has duplicates.
81
+ positions
82
+ label
83
+ a [0, 1]
84
+ """
85
+ return self._allows_duplicate_labels
86
+
87
+ @allows_duplicate_labels.setter
88
+ def allows_duplicate_labels(self, value: bool) -> None:
89
+ value = bool(value)
90
+ obj = self._obj()
91
+ if obj is None:
92
+ raise ValueError("This flag's object has been deleted.")
93
+
94
+ if not value:
95
+ for ax in obj.axes:
96
+ ax._maybe_check_unique()
97
+
98
+ self._allows_duplicate_labels = value
99
+
100
+ def __getitem__(self, key: str):
101
+ if key not in self._keys:
102
+ raise KeyError(key)
103
+
104
+ return getattr(self, key)
105
+
106
+ def __setitem__(self, key: str, value) -> None:
107
+ if key not in self._keys:
108
+ raise ValueError(f"Unknown flag {key}. Must be one of {self._keys}")
109
+ setattr(self, key, value)
110
+
111
+ def __repr__(self) -> str:
112
+ return f"<Flags(allows_duplicate_labels={self.allows_duplicate_labels})>"
113
+
114
+ def __eq__(self, other) -> bool:
115
+ if isinstance(other, type(self)):
116
+ return self.allows_duplicate_labels == other.allows_duplicate_labels
117
+ return False
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/core/roperator.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Reversed Operations not available in the stdlib operator module.
3
+ Defining these instead of using lambdas allows us to reference them by name.
4
+ """
5
+ from __future__ import annotations
6
+
7
+ import operator
8
+
9
+
10
+ def radd(left, right):
11
+ return right + left
12
+
13
+
14
+ def rsub(left, right):
15
+ return right - left
16
+
17
+
18
+ def rmul(left, right):
19
+ return right * left
20
+
21
+
22
+ def rdiv(left, right):
23
+ return right / left
24
+
25
+
26
+ def rtruediv(left, right):
27
+ return right / left
28
+
29
+
30
+ def rfloordiv(left, right):
31
+ return right // left
32
+
33
+
34
+ def rmod(left, right):
35
+ # check if right is a string as % is the string
36
+ # formatting operation; this is a TypeError
37
+ # otherwise perform the op
38
+ if isinstance(right, str):
39
+ typ = type(left).__name__
40
+ raise TypeError(f"{typ} cannot perform the operation mod")
41
+
42
+ return right % left
43
+
44
+
45
+ def rdivmod(left, right):
46
+ return divmod(right, left)
47
+
48
+
49
+ def rpow(left, right):
50
+ return right**left
51
+
52
+
53
+ def rand_(left, right):
54
+ return operator.and_(right, left)
55
+
56
+
57
+ def ror_(left, right):
58
+ return operator.or_(right, left)
59
+
60
+
61
+ def rxor(left, right):
62
+ return operator.xor(right, left)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/__init__.py ADDED
File without changes
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_aggregation.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytest
3
+
4
+ from pandas.core.apply import (
5
+ _make_unique_kwarg_list,
6
+ maybe_mangle_lambdas,
7
+ )
8
+
9
+
10
+ def test_maybe_mangle_lambdas_passthrough():
11
+ assert maybe_mangle_lambdas("mean") == "mean"
12
+ assert maybe_mangle_lambdas(lambda x: x).__name__ == "<lambda>"
13
+ # don't mangel single lambda.
14
+ assert maybe_mangle_lambdas([lambda x: x])[0].__name__ == "<lambda>"
15
+
16
+
17
+ def test_maybe_mangle_lambdas_listlike():
18
+ aggfuncs = [lambda x: 1, lambda x: 2]
19
+ result = maybe_mangle_lambdas(aggfuncs)
20
+ assert result[0].__name__ == "<lambda_0>"
21
+ assert result[1].__name__ == "<lambda_1>"
22
+ assert aggfuncs[0](None) == result[0](None)
23
+ assert aggfuncs[1](None) == result[1](None)
24
+
25
+
26
+ def test_maybe_mangle_lambdas():
27
+ func = {"A": [lambda x: 0, lambda x: 1]}
28
+ result = maybe_mangle_lambdas(func)
29
+ assert result["A"][0].__name__ == "<lambda_0>"
30
+ assert result["A"][1].__name__ == "<lambda_1>"
31
+
32
+
33
+ def test_maybe_mangle_lambdas_args():
34
+ func = {"A": [lambda x, a, b=1: (0, a, b), lambda x: 1]}
35
+ result = maybe_mangle_lambdas(func)
36
+ assert result["A"][0].__name__ == "<lambda_0>"
37
+ assert result["A"][1].__name__ == "<lambda_1>"
38
+
39
+ assert func["A"][0](0, 1) == (0, 1, 1)
40
+ assert func["A"][0](0, 1, 2) == (0, 1, 2)
41
+ assert func["A"][0](0, 2, b=3) == (0, 2, 3)
42
+
43
+
44
+ def test_maybe_mangle_lambdas_named():
45
+ func = {"C": np.mean, "D": {"foo": np.mean, "bar": np.mean}}
46
+ result = maybe_mangle_lambdas(func)
47
+ assert result == func
48
+
49
+
50
+ @pytest.mark.parametrize(
51
+ "order, expected_reorder",
52
+ [
53
+ (
54
+ [
55
+ ("height", "<lambda>"),
56
+ ("height", "max"),
57
+ ("weight", "max"),
58
+ ("height", "<lambda>"),
59
+ ("weight", "<lambda>"),
60
+ ],
61
+ [
62
+ ("height", "<lambda>_0"),
63
+ ("height", "max"),
64
+ ("weight", "max"),
65
+ ("height", "<lambda>_1"),
66
+ ("weight", "<lambda>"),
67
+ ],
68
+ ),
69
+ (
70
+ [
71
+ ("col2", "min"),
72
+ ("col1", "<lambda>"),
73
+ ("col1", "<lambda>"),
74
+ ("col1", "<lambda>"),
75
+ ],
76
+ [
77
+ ("col2", "min"),
78
+ ("col1", "<lambda>_0"),
79
+ ("col1", "<lambda>_1"),
80
+ ("col1", "<lambda>_2"),
81
+ ],
82
+ ),
83
+ (
84
+ [("col", "<lambda>"), ("col", "<lambda>"), ("col", "<lambda>")],
85
+ [("col", "<lambda>_0"), ("col", "<lambda>_1"), ("col", "<lambda>_2")],
86
+ ),
87
+ ],
88
+ )
89
+ def test_make_unique(order, expected_reorder):
90
+ # GH 27519, test if make_unique function reorders correctly
91
+ result = _make_unique_kwarg_list(order)
92
+
93
+ assert result == expected_reorder
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_algos.py ADDED
@@ -0,0 +1,2041 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime
2
+ import struct
3
+
4
+ import numpy as np
5
+ import pytest
6
+
7
+ from pandas._libs import (
8
+ algos as libalgos,
9
+ hashtable as ht,
10
+ )
11
+
12
+ from pandas.core.dtypes.common import (
13
+ is_bool_dtype,
14
+ is_complex_dtype,
15
+ is_float_dtype,
16
+ is_integer_dtype,
17
+ is_object_dtype,
18
+ )
19
+ from pandas.core.dtypes.dtypes import CategoricalDtype
20
+
21
+ import pandas as pd
22
+ from pandas import (
23
+ Categorical,
24
+ CategoricalIndex,
25
+ DataFrame,
26
+ DatetimeIndex,
27
+ Index,
28
+ IntervalIndex,
29
+ MultiIndex,
30
+ NaT,
31
+ Period,
32
+ PeriodIndex,
33
+ Series,
34
+ Timedelta,
35
+ Timestamp,
36
+ cut,
37
+ date_range,
38
+ timedelta_range,
39
+ to_datetime,
40
+ to_timedelta,
41
+ )
42
+ import pandas._testing as tm
43
+ import pandas.core.algorithms as algos
44
+ from pandas.core.arrays import (
45
+ DatetimeArray,
46
+ TimedeltaArray,
47
+ )
48
+ import pandas.core.common as com
49
+
50
+
51
+ class TestFactorize:
52
+ def test_factorize_complex(self):
53
+ # GH#17927
54
+ array = [1, 2, 2 + 1j]
55
+ msg = "factorize with argument that is not not a Series"
56
+ with tm.assert_produces_warning(FutureWarning, match=msg):
57
+ labels, uniques = algos.factorize(array)
58
+
59
+ expected_labels = np.array([0, 1, 2], dtype=np.intp)
60
+ tm.assert_numpy_array_equal(labels, expected_labels)
61
+
62
+ # Should return a complex dtype in the future
63
+ expected_uniques = np.array([(1 + 0j), (2 + 0j), (2 + 1j)], dtype=object)
64
+ tm.assert_numpy_array_equal(uniques, expected_uniques)
65
+
66
+ @pytest.mark.parametrize("sort", [True, False])
67
+ def test_factorize(self, index_or_series_obj, sort):
68
+ obj = index_or_series_obj
69
+ result_codes, result_uniques = obj.factorize(sort=sort)
70
+
71
+ constructor = Index
72
+ if isinstance(obj, MultiIndex):
73
+ constructor = MultiIndex.from_tuples
74
+ expected_arr = obj.unique()
75
+ if expected_arr.dtype == np.float16:
76
+ expected_arr = expected_arr.astype(np.float32)
77
+ expected_uniques = constructor(expected_arr)
78
+ if (
79
+ isinstance(obj, Index)
80
+ and expected_uniques.dtype == bool
81
+ and obj.dtype == object
82
+ ):
83
+ expected_uniques = expected_uniques.astype(object)
84
+
85
+ if sort:
86
+ expected_uniques = expected_uniques.sort_values()
87
+
88
+ # construct an integer ndarray so that
89
+ # `expected_uniques.take(expected_codes)` is equal to `obj`
90
+ expected_uniques_list = list(expected_uniques)
91
+ expected_codes = [expected_uniques_list.index(val) for val in obj]
92
+ expected_codes = np.asarray(expected_codes, dtype=np.intp)
93
+
94
+ tm.assert_numpy_array_equal(result_codes, expected_codes)
95
+ tm.assert_index_equal(result_uniques, expected_uniques, exact=True)
96
+
97
+ def test_series_factorize_use_na_sentinel_false(self):
98
+ # GH#35667
99
+ values = np.array([1, 2, 1, np.nan])
100
+ ser = Series(values)
101
+ codes, uniques = ser.factorize(use_na_sentinel=False)
102
+
103
+ expected_codes = np.array([0, 1, 0, 2], dtype=np.intp)
104
+ expected_uniques = Index([1.0, 2.0, np.nan])
105
+
106
+ tm.assert_numpy_array_equal(codes, expected_codes)
107
+ tm.assert_index_equal(uniques, expected_uniques)
108
+
109
+ def test_basic(self):
110
+ items = np.array(["a", "b", "b", "a", "a", "c", "c", "c"], dtype=object)
111
+ codes, uniques = algos.factorize(items)
112
+ tm.assert_numpy_array_equal(uniques, np.array(["a", "b", "c"], dtype=object))
113
+
114
+ codes, uniques = algos.factorize(items, sort=True)
115
+ exp = np.array([0, 1, 1, 0, 0, 2, 2, 2], dtype=np.intp)
116
+ tm.assert_numpy_array_equal(codes, exp)
117
+ exp = np.array(["a", "b", "c"], dtype=object)
118
+ tm.assert_numpy_array_equal(uniques, exp)
119
+
120
+ arr = np.arange(5, dtype=np.intp)[::-1]
121
+
122
+ codes, uniques = algos.factorize(arr)
123
+ exp = np.array([0, 1, 2, 3, 4], dtype=np.intp)
124
+ tm.assert_numpy_array_equal(codes, exp)
125
+ exp = np.array([4, 3, 2, 1, 0], dtype=arr.dtype)
126
+ tm.assert_numpy_array_equal(uniques, exp)
127
+
128
+ codes, uniques = algos.factorize(arr, sort=True)
129
+ exp = np.array([4, 3, 2, 1, 0], dtype=np.intp)
130
+ tm.assert_numpy_array_equal(codes, exp)
131
+ exp = np.array([0, 1, 2, 3, 4], dtype=arr.dtype)
132
+ tm.assert_numpy_array_equal(uniques, exp)
133
+
134
+ arr = np.arange(5.0)[::-1]
135
+
136
+ codes, uniques = algos.factorize(arr)
137
+ exp = np.array([0, 1, 2, 3, 4], dtype=np.intp)
138
+ tm.assert_numpy_array_equal(codes, exp)
139
+ exp = np.array([4.0, 3.0, 2.0, 1.0, 0.0], dtype=arr.dtype)
140
+ tm.assert_numpy_array_equal(uniques, exp)
141
+
142
+ codes, uniques = algos.factorize(arr, sort=True)
143
+ exp = np.array([4, 3, 2, 1, 0], dtype=np.intp)
144
+ tm.assert_numpy_array_equal(codes, exp)
145
+ exp = np.array([0.0, 1.0, 2.0, 3.0, 4.0], dtype=arr.dtype)
146
+ tm.assert_numpy_array_equal(uniques, exp)
147
+
148
+ def test_mixed(self):
149
+ # doc example reshaping.rst
150
+ x = Series(["A", "A", np.nan, "B", 3.14, np.inf])
151
+ codes, uniques = algos.factorize(x)
152
+
153
+ exp = np.array([0, 0, -1, 1, 2, 3], dtype=np.intp)
154
+ tm.assert_numpy_array_equal(codes, exp)
155
+ exp = Index(["A", "B", 3.14, np.inf])
156
+ tm.assert_index_equal(uniques, exp)
157
+
158
+ codes, uniques = algos.factorize(x, sort=True)
159
+ exp = np.array([2, 2, -1, 3, 0, 1], dtype=np.intp)
160
+ tm.assert_numpy_array_equal(codes, exp)
161
+ exp = Index([3.14, np.inf, "A", "B"])
162
+ tm.assert_index_equal(uniques, exp)
163
+
164
+ def test_factorize_datetime64(self):
165
+ # M8
166
+ v1 = Timestamp("20130101 09:00:00.00004")
167
+ v2 = Timestamp("20130101")
168
+ x = Series([v1, v1, v1, v2, v2, v1])
169
+ codes, uniques = algos.factorize(x)
170
+
171
+ exp = np.array([0, 0, 0, 1, 1, 0], dtype=np.intp)
172
+ tm.assert_numpy_array_equal(codes, exp)
173
+ exp = DatetimeIndex([v1, v2])
174
+ tm.assert_index_equal(uniques, exp)
175
+
176
+ codes, uniques = algos.factorize(x, sort=True)
177
+ exp = np.array([1, 1, 1, 0, 0, 1], dtype=np.intp)
178
+ tm.assert_numpy_array_equal(codes, exp)
179
+ exp = DatetimeIndex([v2, v1])
180
+ tm.assert_index_equal(uniques, exp)
181
+
182
+ def test_factorize_period(self):
183
+ # period
184
+ v1 = Period("201302", freq="M")
185
+ v2 = Period("201303", freq="M")
186
+ x = Series([v1, v1, v1, v2, v2, v1])
187
+
188
+ # periods are not 'sorted' as they are converted back into an index
189
+ codes, uniques = algos.factorize(x)
190
+ exp = np.array([0, 0, 0, 1, 1, 0], dtype=np.intp)
191
+ tm.assert_numpy_array_equal(codes, exp)
192
+ tm.assert_index_equal(uniques, PeriodIndex([v1, v2]))
193
+
194
+ codes, uniques = algos.factorize(x, sort=True)
195
+ exp = np.array([0, 0, 0, 1, 1, 0], dtype=np.intp)
196
+ tm.assert_numpy_array_equal(codes, exp)
197
+ tm.assert_index_equal(uniques, PeriodIndex([v1, v2]))
198
+
199
+ def test_factorize_timedelta(self):
200
+ # GH 5986
201
+ v1 = to_timedelta("1 day 1 min")
202
+ v2 = to_timedelta("1 day")
203
+ x = Series([v1, v2, v1, v1, v2, v2, v1])
204
+ codes, uniques = algos.factorize(x)
205
+ exp = np.array([0, 1, 0, 0, 1, 1, 0], dtype=np.intp)
206
+ tm.assert_numpy_array_equal(codes, exp)
207
+ tm.assert_index_equal(uniques, to_timedelta([v1, v2]))
208
+
209
+ codes, uniques = algos.factorize(x, sort=True)
210
+ exp = np.array([1, 0, 1, 1, 0, 0, 1], dtype=np.intp)
211
+ tm.assert_numpy_array_equal(codes, exp)
212
+ tm.assert_index_equal(uniques, to_timedelta([v2, v1]))
213
+
214
+ def test_factorize_nan(self):
215
+ # nan should map to na_sentinel, not reverse_indexer[na_sentinel]
216
+ # rizer.factorize should not raise an exception if na_sentinel indexes
217
+ # outside of reverse_indexer
218
+ key = np.array([1, 2, 1, np.nan], dtype="O")
219
+ rizer = ht.ObjectFactorizer(len(key))
220
+ for na_sentinel in (-1, 20):
221
+ ids = rizer.factorize(key, na_sentinel=na_sentinel)
222
+ expected = np.array([0, 1, 0, na_sentinel], dtype=np.intp)
223
+ assert len(set(key)) == len(set(expected))
224
+ tm.assert_numpy_array_equal(pd.isna(key), expected == na_sentinel)
225
+ tm.assert_numpy_array_equal(ids, expected)
226
+
227
+ def test_factorizer_with_mask(self):
228
+ # GH#49549
229
+ data = np.array([1, 2, 3, 1, 1, 0], dtype="int64")
230
+ mask = np.array([False, False, False, False, False, True])
231
+ rizer = ht.Int64Factorizer(len(data))
232
+ result = rizer.factorize(data, mask=mask)
233
+ expected = np.array([0, 1, 2, 0, 0, -1], dtype=np.intp)
234
+ tm.assert_numpy_array_equal(result, expected)
235
+ expected_uniques = np.array([1, 2, 3], dtype="int64")
236
+ tm.assert_numpy_array_equal(rizer.uniques.to_array(), expected_uniques)
237
+
238
+ def test_factorizer_object_with_nan(self):
239
+ # GH#49549
240
+ data = np.array([1, 2, 3, 1, np.nan])
241
+ rizer = ht.ObjectFactorizer(len(data))
242
+ result = rizer.factorize(data.astype(object))
243
+ expected = np.array([0, 1, 2, 0, -1], dtype=np.intp)
244
+ tm.assert_numpy_array_equal(result, expected)
245
+ expected_uniques = np.array([1, 2, 3], dtype=object)
246
+ tm.assert_numpy_array_equal(rizer.uniques.to_array(), expected_uniques)
247
+
248
+ @pytest.mark.parametrize(
249
+ "data, expected_codes, expected_uniques",
250
+ [
251
+ (
252
+ [(1, 1), (1, 2), (0, 0), (1, 2), "nonsense"],
253
+ [0, 1, 2, 1, 3],
254
+ [(1, 1), (1, 2), (0, 0), "nonsense"],
255
+ ),
256
+ (
257
+ [(1, 1), (1, 2), (0, 0), (1, 2), (1, 2, 3)],
258
+ [0, 1, 2, 1, 3],
259
+ [(1, 1), (1, 2), (0, 0), (1, 2, 3)],
260
+ ),
261
+ ([(1, 1), (1, 2), (0, 0), (1, 2)], [0, 1, 2, 1], [(1, 1), (1, 2), (0, 0)]),
262
+ ],
263
+ )
264
+ def test_factorize_tuple_list(self, data, expected_codes, expected_uniques):
265
+ # GH9454
266
+ msg = "factorize with argument that is not not a Series"
267
+ with tm.assert_produces_warning(FutureWarning, match=msg):
268
+ codes, uniques = pd.factorize(data)
269
+
270
+ tm.assert_numpy_array_equal(codes, np.array(expected_codes, dtype=np.intp))
271
+
272
+ expected_uniques_array = com.asarray_tuplesafe(expected_uniques, dtype=object)
273
+ tm.assert_numpy_array_equal(uniques, expected_uniques_array)
274
+
275
+ def test_complex_sorting(self):
276
+ # gh 12666 - check no segfault
277
+ x17 = np.array([complex(i) for i in range(17)], dtype=object)
278
+
279
+ msg = "'[<>]' not supported between instances of .*"
280
+ with pytest.raises(TypeError, match=msg):
281
+ algos.factorize(x17[::-1], sort=True)
282
+
283
+ def test_numeric_dtype_factorize(self, any_real_numpy_dtype):
284
+ # GH41132
285
+ dtype = any_real_numpy_dtype
286
+ data = np.array([1, 2, 2, 1], dtype=dtype)
287
+ expected_codes = np.array([0, 1, 1, 0], dtype=np.intp)
288
+ expected_uniques = np.array([1, 2], dtype=dtype)
289
+
290
+ codes, uniques = algos.factorize(data)
291
+ tm.assert_numpy_array_equal(codes, expected_codes)
292
+ tm.assert_numpy_array_equal(uniques, expected_uniques)
293
+
294
+ def test_float64_factorize(self, writable):
295
+ data = np.array([1.0, 1e8, 1.0, 1e-8, 1e8, 1.0], dtype=np.float64)
296
+ data.setflags(write=writable)
297
+ expected_codes = np.array([0, 1, 0, 2, 1, 0], dtype=np.intp)
298
+ expected_uniques = np.array([1.0, 1e8, 1e-8], dtype=np.float64)
299
+
300
+ codes, uniques = algos.factorize(data)
301
+ tm.assert_numpy_array_equal(codes, expected_codes)
302
+ tm.assert_numpy_array_equal(uniques, expected_uniques)
303
+
304
+ def test_uint64_factorize(self, writable):
305
+ data = np.array([2**64 - 1, 1, 2**64 - 1], dtype=np.uint64)
306
+ data.setflags(write=writable)
307
+ expected_codes = np.array([0, 1, 0], dtype=np.intp)
308
+ expected_uniques = np.array([2**64 - 1, 1], dtype=np.uint64)
309
+
310
+ codes, uniques = algos.factorize(data)
311
+ tm.assert_numpy_array_equal(codes, expected_codes)
312
+ tm.assert_numpy_array_equal(uniques, expected_uniques)
313
+
314
+ def test_int64_factorize(self, writable):
315
+ data = np.array([2**63 - 1, -(2**63), 2**63 - 1], dtype=np.int64)
316
+ data.setflags(write=writable)
317
+ expected_codes = np.array([0, 1, 0], dtype=np.intp)
318
+ expected_uniques = np.array([2**63 - 1, -(2**63)], dtype=np.int64)
319
+
320
+ codes, uniques = algos.factorize(data)
321
+ tm.assert_numpy_array_equal(codes, expected_codes)
322
+ tm.assert_numpy_array_equal(uniques, expected_uniques)
323
+
324
+ def test_string_factorize(self, writable):
325
+ data = np.array(["a", "c", "a", "b", "c"], dtype=object)
326
+ data.setflags(write=writable)
327
+ expected_codes = np.array([0, 1, 0, 2, 1], dtype=np.intp)
328
+ expected_uniques = np.array(["a", "c", "b"], dtype=object)
329
+
330
+ codes, uniques = algos.factorize(data)
331
+ tm.assert_numpy_array_equal(codes, expected_codes)
332
+ tm.assert_numpy_array_equal(uniques, expected_uniques)
333
+
334
+ def test_object_factorize(self, writable):
335
+ data = np.array(["a", "c", None, np.nan, "a", "b", NaT, "c"], dtype=object)
336
+ data.setflags(write=writable)
337
+ expected_codes = np.array([0, 1, -1, -1, 0, 2, -1, 1], dtype=np.intp)
338
+ expected_uniques = np.array(["a", "c", "b"], dtype=object)
339
+
340
+ codes, uniques = algos.factorize(data)
341
+ tm.assert_numpy_array_equal(codes, expected_codes)
342
+ tm.assert_numpy_array_equal(uniques, expected_uniques)
343
+
344
+ def test_datetime64_factorize(self, writable):
345
+ # GH35650 Verify whether read-only datetime64 array can be factorized
346
+ data = np.array([np.datetime64("2020-01-01T00:00:00.000")], dtype="M8[ns]")
347
+ data.setflags(write=writable)
348
+ expected_codes = np.array([0], dtype=np.intp)
349
+ expected_uniques = np.array(
350
+ ["2020-01-01T00:00:00.000000000"], dtype="datetime64[ns]"
351
+ )
352
+
353
+ codes, uniques = pd.factorize(data)
354
+ tm.assert_numpy_array_equal(codes, expected_codes)
355
+ tm.assert_numpy_array_equal(uniques, expected_uniques)
356
+
357
+ @pytest.mark.parametrize("sort", [True, False])
358
+ def test_factorize_rangeindex(self, sort):
359
+ # increasing -> sort doesn't matter
360
+ ri = pd.RangeIndex.from_range(range(10))
361
+ expected = np.arange(10, dtype=np.intp), ri
362
+
363
+ result = algos.factorize(ri, sort=sort)
364
+ tm.assert_numpy_array_equal(result[0], expected[0])
365
+ tm.assert_index_equal(result[1], expected[1], exact=True)
366
+
367
+ result = ri.factorize(sort=sort)
368
+ tm.assert_numpy_array_equal(result[0], expected[0])
369
+ tm.assert_index_equal(result[1], expected[1], exact=True)
370
+
371
+ @pytest.mark.parametrize("sort", [True, False])
372
+ def test_factorize_rangeindex_decreasing(self, sort):
373
+ # decreasing -> sort matters
374
+ ri = pd.RangeIndex.from_range(range(10))
375
+ expected = np.arange(10, dtype=np.intp), ri
376
+
377
+ ri2 = ri[::-1]
378
+ expected = expected[0], ri2
379
+ if sort:
380
+ expected = expected[0][::-1], expected[1][::-1]
381
+
382
+ result = algos.factorize(ri2, sort=sort)
383
+ tm.assert_numpy_array_equal(result[0], expected[0])
384
+ tm.assert_index_equal(result[1], expected[1], exact=True)
385
+
386
+ result = ri2.factorize(sort=sort)
387
+ tm.assert_numpy_array_equal(result[0], expected[0])
388
+ tm.assert_index_equal(result[1], expected[1], exact=True)
389
+
390
+ def test_deprecate_order(self):
391
+ # gh 19727 - check warning is raised for deprecated keyword, order.
392
+ # Test not valid once order keyword is removed.
393
+ data = np.array([2**63, 1, 2**63], dtype=np.uint64)
394
+ with pytest.raises(TypeError, match="got an unexpected keyword"):
395
+ algos.factorize(data, order=True)
396
+ with tm.assert_produces_warning(False):
397
+ algos.factorize(data)
398
+
399
+ @pytest.mark.parametrize(
400
+ "data",
401
+ [
402
+ np.array([0, 1, 0], dtype="u8"),
403
+ np.array([-(2**63), 1, -(2**63)], dtype="i8"),
404
+ np.array(["__nan__", "foo", "__nan__"], dtype="object"),
405
+ ],
406
+ )
407
+ def test_parametrized_factorize_na_value_default(self, data):
408
+ # arrays that include the NA default for that type, but isn't used.
409
+ codes, uniques = algos.factorize(data)
410
+ expected_uniques = data[[0, 1]]
411
+ expected_codes = np.array([0, 1, 0], dtype=np.intp)
412
+ tm.assert_numpy_array_equal(codes, expected_codes)
413
+ tm.assert_numpy_array_equal(uniques, expected_uniques)
414
+
415
+ @pytest.mark.parametrize(
416
+ "data, na_value",
417
+ [
418
+ (np.array([0, 1, 0, 2], dtype="u8"), 0),
419
+ (np.array([1, 0, 1, 2], dtype="u8"), 1),
420
+ (np.array([-(2**63), 1, -(2**63), 0], dtype="i8"), -(2**63)),
421
+ (np.array([1, -(2**63), 1, 0], dtype="i8"), 1),
422
+ (np.array(["a", "", "a", "b"], dtype=object), "a"),
423
+ (np.array([(), ("a", 1), (), ("a", 2)], dtype=object), ()),
424
+ (np.array([("a", 1), (), ("a", 1), ("a", 2)], dtype=object), ("a", 1)),
425
+ ],
426
+ )
427
+ def test_parametrized_factorize_na_value(self, data, na_value):
428
+ codes, uniques = algos.factorize_array(data, na_value=na_value)
429
+ expected_uniques = data[[1, 3]]
430
+ expected_codes = np.array([-1, 0, -1, 1], dtype=np.intp)
431
+ tm.assert_numpy_array_equal(codes, expected_codes)
432
+ tm.assert_numpy_array_equal(uniques, expected_uniques)
433
+
434
+ @pytest.mark.parametrize("sort", [True, False])
435
+ @pytest.mark.parametrize(
436
+ "data, uniques",
437
+ [
438
+ (
439
+ np.array(["b", "a", None, "b"], dtype=object),
440
+ np.array(["b", "a"], dtype=object),
441
+ ),
442
+ (
443
+ pd.array([2, 1, np.nan, 2], dtype="Int64"),
444
+ pd.array([2, 1], dtype="Int64"),
445
+ ),
446
+ ],
447
+ ids=["numpy_array", "extension_array"],
448
+ )
449
+ def test_factorize_use_na_sentinel(self, sort, data, uniques):
450
+ codes, uniques = algos.factorize(data, sort=sort, use_na_sentinel=True)
451
+ if sort:
452
+ expected_codes = np.array([1, 0, -1, 1], dtype=np.intp)
453
+ expected_uniques = algos.safe_sort(uniques)
454
+ else:
455
+ expected_codes = np.array([0, 1, -1, 0], dtype=np.intp)
456
+ expected_uniques = uniques
457
+ tm.assert_numpy_array_equal(codes, expected_codes)
458
+ if isinstance(data, np.ndarray):
459
+ tm.assert_numpy_array_equal(uniques, expected_uniques)
460
+ else:
461
+ tm.assert_extension_array_equal(uniques, expected_uniques)
462
+
463
+ @pytest.mark.parametrize(
464
+ "data, expected_codes, expected_uniques",
465
+ [
466
+ (
467
+ ["a", None, "b", "a"],
468
+ np.array([0, 1, 2, 0], dtype=np.dtype("intp")),
469
+ np.array(["a", np.nan, "b"], dtype=object),
470
+ ),
471
+ (
472
+ ["a", np.nan, "b", "a"],
473
+ np.array([0, 1, 2, 0], dtype=np.dtype("intp")),
474
+ np.array(["a", np.nan, "b"], dtype=object),
475
+ ),
476
+ ],
477
+ )
478
+ def test_object_factorize_use_na_sentinel_false(
479
+ self, data, expected_codes, expected_uniques
480
+ ):
481
+ codes, uniques = algos.factorize(
482
+ np.array(data, dtype=object), use_na_sentinel=False
483
+ )
484
+
485
+ tm.assert_numpy_array_equal(uniques, expected_uniques, strict_nan=True)
486
+ tm.assert_numpy_array_equal(codes, expected_codes, strict_nan=True)
487
+
488
+ @pytest.mark.parametrize(
489
+ "data, expected_codes, expected_uniques",
490
+ [
491
+ (
492
+ [1, None, 1, 2],
493
+ np.array([0, 1, 0, 2], dtype=np.dtype("intp")),
494
+ np.array([1, np.nan, 2], dtype="O"),
495
+ ),
496
+ (
497
+ [1, np.nan, 1, 2],
498
+ np.array([0, 1, 0, 2], dtype=np.dtype("intp")),
499
+ np.array([1, np.nan, 2], dtype=np.float64),
500
+ ),
501
+ ],
502
+ )
503
+ def test_int_factorize_use_na_sentinel_false(
504
+ self, data, expected_codes, expected_uniques
505
+ ):
506
+ msg = "factorize with argument that is not not a Series"
507
+ with tm.assert_produces_warning(FutureWarning, match=msg):
508
+ codes, uniques = algos.factorize(data, use_na_sentinel=False)
509
+
510
+ tm.assert_numpy_array_equal(uniques, expected_uniques, strict_nan=True)
511
+ tm.assert_numpy_array_equal(codes, expected_codes, strict_nan=True)
512
+
513
+ @pytest.mark.parametrize(
514
+ "data, expected_codes, expected_uniques",
515
+ [
516
+ (
517
+ Index(Categorical(["a", "a", "b"])),
518
+ np.array([0, 0, 1], dtype=np.intp),
519
+ CategoricalIndex(["a", "b"], categories=["a", "b"], dtype="category"),
520
+ ),
521
+ (
522
+ Series(Categorical(["a", "a", "b"])),
523
+ np.array([0, 0, 1], dtype=np.intp),
524
+ CategoricalIndex(["a", "b"], categories=["a", "b"], dtype="category"),
525
+ ),
526
+ (
527
+ Series(DatetimeIndex(["2017", "2017"], tz="US/Eastern")),
528
+ np.array([0, 0], dtype=np.intp),
529
+ DatetimeIndex(["2017"], tz="US/Eastern"),
530
+ ),
531
+ ],
532
+ )
533
+ def test_factorize_mixed_values(self, data, expected_codes, expected_uniques):
534
+ # GH 19721
535
+ codes, uniques = algos.factorize(data)
536
+ tm.assert_numpy_array_equal(codes, expected_codes)
537
+ tm.assert_index_equal(uniques, expected_uniques)
538
+
539
+ def test_factorize_interval_non_nano(self, unit):
540
+ # GH#56099
541
+ left = DatetimeIndex(["2016-01-01", np.nan, "2015-10-11"]).as_unit(unit)
542
+ right = DatetimeIndex(["2016-01-02", np.nan, "2015-10-15"]).as_unit(unit)
543
+ idx = IntervalIndex.from_arrays(left, right)
544
+ codes, cats = idx.factorize()
545
+ assert cats.dtype == f"interval[datetime64[{unit}], right]"
546
+
547
+ ts = Timestamp(0).as_unit(unit)
548
+ idx2 = IntervalIndex.from_arrays(left - ts, right - ts)
549
+ codes2, cats2 = idx2.factorize()
550
+ assert cats2.dtype == f"interval[timedelta64[{unit}], right]"
551
+
552
+ idx3 = IntervalIndex.from_arrays(
553
+ left.tz_localize("US/Pacific"), right.tz_localize("US/Pacific")
554
+ )
555
+ codes3, cats3 = idx3.factorize()
556
+ assert cats3.dtype == f"interval[datetime64[{unit}, US/Pacific], right]"
557
+
558
+
559
+ class TestUnique:
560
+ def test_ints(self):
561
+ arr = np.random.default_rng(2).integers(0, 100, size=50)
562
+
563
+ result = algos.unique(arr)
564
+ assert isinstance(result, np.ndarray)
565
+
566
+ def test_objects(self):
567
+ arr = np.random.default_rng(2).integers(0, 100, size=50).astype("O")
568
+
569
+ result = algos.unique(arr)
570
+ assert isinstance(result, np.ndarray)
571
+
572
+ def test_object_refcount_bug(self):
573
+ lst = np.array(["A", "B", "C", "D", "E"], dtype=object)
574
+ for i in range(1000):
575
+ len(algos.unique(lst))
576
+
577
+ def test_on_index_object(self):
578
+ mindex = MultiIndex.from_arrays(
579
+ [np.arange(5).repeat(5), np.tile(np.arange(5), 5)]
580
+ )
581
+ expected = mindex.values
582
+ expected.sort()
583
+
584
+ mindex = mindex.repeat(2)
585
+
586
+ result = pd.unique(mindex)
587
+ result.sort()
588
+
589
+ tm.assert_almost_equal(result, expected)
590
+
591
+ def test_dtype_preservation(self, any_numpy_dtype):
592
+ # GH 15442
593
+ if any_numpy_dtype in (tm.BYTES_DTYPES + tm.STRING_DTYPES):
594
+ data = [1, 2, 2]
595
+ uniques = [1, 2]
596
+ elif is_integer_dtype(any_numpy_dtype):
597
+ data = [1, 2, 2]
598
+ uniques = [1, 2]
599
+ elif is_float_dtype(any_numpy_dtype):
600
+ data = [1, 2, 2]
601
+ uniques = [1.0, 2.0]
602
+ elif is_complex_dtype(any_numpy_dtype):
603
+ data = [complex(1, 0), complex(2, 0), complex(2, 0)]
604
+ uniques = [complex(1, 0), complex(2, 0)]
605
+ elif is_bool_dtype(any_numpy_dtype):
606
+ data = [True, True, False]
607
+ uniques = [True, False]
608
+ elif is_object_dtype(any_numpy_dtype):
609
+ data = ["A", "B", "B"]
610
+ uniques = ["A", "B"]
611
+ else:
612
+ # datetime64[ns]/M8[ns]/timedelta64[ns]/m8[ns] tested elsewhere
613
+ data = [1, 2, 2]
614
+ uniques = [1, 2]
615
+
616
+ result = Series(data, dtype=any_numpy_dtype).unique()
617
+ expected = np.array(uniques, dtype=any_numpy_dtype)
618
+
619
+ if any_numpy_dtype in tm.STRING_DTYPES:
620
+ expected = expected.astype(object)
621
+
622
+ if expected.dtype.kind in ["m", "M"]:
623
+ # We get TimedeltaArray/DatetimeArray
624
+ assert isinstance(result, (DatetimeArray, TimedeltaArray))
625
+ result = np.array(result)
626
+ tm.assert_numpy_array_equal(result, expected)
627
+
628
+ def test_datetime64_dtype_array_returned(self):
629
+ # GH 9431
630
+ expected = np.array(
631
+ [
632
+ "2015-01-03T00:00:00.000000000",
633
+ "2015-01-01T00:00:00.000000000",
634
+ ],
635
+ dtype="M8[ns]",
636
+ )
637
+
638
+ dt_index = to_datetime(
639
+ [
640
+ "2015-01-03T00:00:00.000000000",
641
+ "2015-01-01T00:00:00.000000000",
642
+ "2015-01-01T00:00:00.000000000",
643
+ ]
644
+ )
645
+ result = algos.unique(dt_index)
646
+ tm.assert_numpy_array_equal(result, expected)
647
+ assert result.dtype == expected.dtype
648
+
649
+ s = Series(dt_index)
650
+ result = algos.unique(s)
651
+ tm.assert_numpy_array_equal(result, expected)
652
+ assert result.dtype == expected.dtype
653
+
654
+ arr = s.values
655
+ result = algos.unique(arr)
656
+ tm.assert_numpy_array_equal(result, expected)
657
+ assert result.dtype == expected.dtype
658
+
659
+ def test_datetime_non_ns(self):
660
+ a = np.array(["2000", "2000", "2001"], dtype="datetime64[s]")
661
+ result = pd.unique(a)
662
+ expected = np.array(["2000", "2001"], dtype="datetime64[s]")
663
+ tm.assert_numpy_array_equal(result, expected)
664
+
665
+ def test_timedelta_non_ns(self):
666
+ a = np.array(["2000", "2000", "2001"], dtype="timedelta64[s]")
667
+ result = pd.unique(a)
668
+ expected = np.array([2000, 2001], dtype="timedelta64[s]")
669
+ tm.assert_numpy_array_equal(result, expected)
670
+
671
+ def test_timedelta64_dtype_array_returned(self):
672
+ # GH 9431
673
+ expected = np.array([31200, 45678, 10000], dtype="m8[ns]")
674
+
675
+ td_index = to_timedelta([31200, 45678, 31200, 10000, 45678])
676
+ result = algos.unique(td_index)
677
+ tm.assert_numpy_array_equal(result, expected)
678
+ assert result.dtype == expected.dtype
679
+
680
+ s = Series(td_index)
681
+ result = algos.unique(s)
682
+ tm.assert_numpy_array_equal(result, expected)
683
+ assert result.dtype == expected.dtype
684
+
685
+ arr = s.values
686
+ result = algos.unique(arr)
687
+ tm.assert_numpy_array_equal(result, expected)
688
+ assert result.dtype == expected.dtype
689
+
690
+ def test_uint64_overflow(self):
691
+ s = Series([1, 2, 2**63, 2**63], dtype=np.uint64)
692
+ exp = np.array([1, 2, 2**63], dtype=np.uint64)
693
+ tm.assert_numpy_array_equal(algos.unique(s), exp)
694
+
695
+ def test_nan_in_object_array(self):
696
+ duplicated_items = ["a", np.nan, "c", "c"]
697
+ result = pd.unique(np.array(duplicated_items, dtype=object))
698
+ expected = np.array(["a", np.nan, "c"], dtype=object)
699
+ tm.assert_numpy_array_equal(result, expected)
700
+
701
+ def test_categorical(self):
702
+ # we are expecting to return in the order
703
+ # of appearance
704
+ expected = Categorical(list("bac"))
705
+
706
+ # we are expecting to return in the order
707
+ # of the categories
708
+ expected_o = Categorical(list("bac"), categories=list("abc"), ordered=True)
709
+
710
+ # GH 15939
711
+ c = Categorical(list("baabc"))
712
+ result = c.unique()
713
+ tm.assert_categorical_equal(result, expected)
714
+
715
+ result = algos.unique(c)
716
+ tm.assert_categorical_equal(result, expected)
717
+
718
+ c = Categorical(list("baabc"), ordered=True)
719
+ result = c.unique()
720
+ tm.assert_categorical_equal(result, expected_o)
721
+
722
+ result = algos.unique(c)
723
+ tm.assert_categorical_equal(result, expected_o)
724
+
725
+ # Series of categorical dtype
726
+ s = Series(Categorical(list("baabc")), name="foo")
727
+ result = s.unique()
728
+ tm.assert_categorical_equal(result, expected)
729
+
730
+ result = pd.unique(s)
731
+ tm.assert_categorical_equal(result, expected)
732
+
733
+ # CI -> return CI
734
+ ci = CategoricalIndex(Categorical(list("baabc"), categories=list("abc")))
735
+ expected = CategoricalIndex(expected)
736
+ result = ci.unique()
737
+ tm.assert_index_equal(result, expected)
738
+
739
+ result = pd.unique(ci)
740
+ tm.assert_index_equal(result, expected)
741
+
742
+ def test_datetime64tz_aware(self, unit):
743
+ # GH 15939
744
+
745
+ dti = Index(
746
+ [
747
+ Timestamp("20160101", tz="US/Eastern"),
748
+ Timestamp("20160101", tz="US/Eastern"),
749
+ ]
750
+ ).as_unit(unit)
751
+ ser = Series(dti)
752
+
753
+ result = ser.unique()
754
+ expected = dti[:1]._data
755
+ tm.assert_extension_array_equal(result, expected)
756
+
757
+ result = dti.unique()
758
+ expected = dti[:1]
759
+ tm.assert_index_equal(result, expected)
760
+
761
+ result = pd.unique(ser)
762
+ expected = dti[:1]._data
763
+ tm.assert_extension_array_equal(result, expected)
764
+
765
+ result = pd.unique(dti)
766
+ expected = dti[:1]
767
+ tm.assert_index_equal(result, expected)
768
+
769
+ def test_order_of_appearance(self):
770
+ # 9346
771
+ # light testing of guarantee of order of appearance
772
+ # these also are the doc-examples
773
+ result = pd.unique(Series([2, 1, 3, 3]))
774
+ tm.assert_numpy_array_equal(result, np.array([2, 1, 3], dtype="int64"))
775
+
776
+ result = pd.unique(Series([2] + [1] * 5))
777
+ tm.assert_numpy_array_equal(result, np.array([2, 1], dtype="int64"))
778
+
779
+ msg = "unique with argument that is not not a Series, Index,"
780
+ with tm.assert_produces_warning(FutureWarning, match=msg):
781
+ result = pd.unique(list("aabc"))
782
+ expected = np.array(["a", "b", "c"], dtype=object)
783
+ tm.assert_numpy_array_equal(result, expected)
784
+
785
+ result = pd.unique(Series(Categorical(list("aabc"))))
786
+ expected = Categorical(list("abc"))
787
+ tm.assert_categorical_equal(result, expected)
788
+
789
+ def test_order_of_appearance_dt64(self, unit):
790
+ ser = Series([Timestamp("20160101"), Timestamp("20160101")]).dt.as_unit(unit)
791
+ result = pd.unique(ser)
792
+ expected = np.array(["2016-01-01T00:00:00.000000000"], dtype=f"M8[{unit}]")
793
+ tm.assert_numpy_array_equal(result, expected)
794
+
795
+ def test_order_of_appearance_dt64tz(self, unit):
796
+ dti = DatetimeIndex(
797
+ [
798
+ Timestamp("20160101", tz="US/Eastern"),
799
+ Timestamp("20160101", tz="US/Eastern"),
800
+ ]
801
+ ).as_unit(unit)
802
+ result = pd.unique(dti)
803
+ expected = DatetimeIndex(
804
+ ["2016-01-01 00:00:00"], dtype=f"datetime64[{unit}, US/Eastern]", freq=None
805
+ )
806
+ tm.assert_index_equal(result, expected)
807
+
808
+ @pytest.mark.parametrize(
809
+ "arg ,expected",
810
+ [
811
+ (("1", "1", "2"), np.array(["1", "2"], dtype=object)),
812
+ (("foo",), np.array(["foo"], dtype=object)),
813
+ ],
814
+ )
815
+ def test_tuple_with_strings(self, arg, expected):
816
+ # see GH 17108
817
+ msg = "unique with argument that is not not a Series"
818
+ with tm.assert_produces_warning(FutureWarning, match=msg):
819
+ result = pd.unique(arg)
820
+ tm.assert_numpy_array_equal(result, expected)
821
+
822
+ def test_obj_none_preservation(self):
823
+ # GH 20866
824
+ arr = np.array(["foo", None], dtype=object)
825
+ result = pd.unique(arr)
826
+ expected = np.array(["foo", None], dtype=object)
827
+
828
+ tm.assert_numpy_array_equal(result, expected, strict_nan=True)
829
+
830
+ def test_signed_zero(self):
831
+ # GH 21866
832
+ a = np.array([-0.0, 0.0])
833
+ result = pd.unique(a)
834
+ expected = np.array([-0.0]) # 0.0 and -0.0 are equivalent
835
+ tm.assert_numpy_array_equal(result, expected)
836
+
837
+ def test_different_nans(self):
838
+ # GH 21866
839
+ # create different nans from bit-patterns:
840
+ NAN1 = struct.unpack("d", struct.pack("=Q", 0x7FF8000000000000))[0]
841
+ NAN2 = struct.unpack("d", struct.pack("=Q", 0x7FF8000000000001))[0]
842
+ assert NAN1 != NAN1
843
+ assert NAN2 != NAN2
844
+ a = np.array([NAN1, NAN2]) # NAN1 and NAN2 are equivalent
845
+ result = pd.unique(a)
846
+ expected = np.array([np.nan])
847
+ tm.assert_numpy_array_equal(result, expected)
848
+
849
+ @pytest.mark.parametrize("el_type", [np.float64, object])
850
+ def test_first_nan_kept(self, el_type):
851
+ # GH 22295
852
+ # create different nans from bit-patterns:
853
+ bits_for_nan1 = 0xFFF8000000000001
854
+ bits_for_nan2 = 0x7FF8000000000001
855
+ NAN1 = struct.unpack("d", struct.pack("=Q", bits_for_nan1))[0]
856
+ NAN2 = struct.unpack("d", struct.pack("=Q", bits_for_nan2))[0]
857
+ assert NAN1 != NAN1
858
+ assert NAN2 != NAN2
859
+ a = np.array([NAN1, NAN2], dtype=el_type)
860
+ result = pd.unique(a)
861
+ assert result.size == 1
862
+ # use bit patterns to identify which nan was kept:
863
+ result_nan_bits = struct.unpack("=Q", struct.pack("d", result[0]))[0]
864
+ assert result_nan_bits == bits_for_nan1
865
+
866
+ def test_do_not_mangle_na_values(self, unique_nulls_fixture, unique_nulls_fixture2):
867
+ # GH 22295
868
+ if unique_nulls_fixture is unique_nulls_fixture2:
869
+ return # skip it, values not unique
870
+ a = np.array([unique_nulls_fixture, unique_nulls_fixture2], dtype=object)
871
+ result = pd.unique(a)
872
+ assert result.size == 2
873
+ assert a[0] is unique_nulls_fixture
874
+ assert a[1] is unique_nulls_fixture2
875
+
876
+ def test_unique_masked(self, any_numeric_ea_dtype):
877
+ # GH#48019
878
+ ser = Series([1, pd.NA, 2] * 3, dtype=any_numeric_ea_dtype)
879
+ result = pd.unique(ser)
880
+ expected = pd.array([1, pd.NA, 2], dtype=any_numeric_ea_dtype)
881
+ tm.assert_extension_array_equal(result, expected)
882
+
883
+
884
+ def test_nunique_ints(index_or_series_or_array):
885
+ # GH#36327
886
+ values = index_or_series_or_array(np.random.default_rng(2).integers(0, 20, 30))
887
+ result = algos.nunique_ints(values)
888
+ expected = len(algos.unique(values))
889
+ assert result == expected
890
+
891
+
892
+ class TestIsin:
893
+ def test_invalid(self):
894
+ msg = (
895
+ r"only list-like objects are allowed to be passed to isin\(\), "
896
+ r"you passed a `int`"
897
+ )
898
+ with pytest.raises(TypeError, match=msg):
899
+ algos.isin(1, 1)
900
+ with pytest.raises(TypeError, match=msg):
901
+ algos.isin(1, [1])
902
+ with pytest.raises(TypeError, match=msg):
903
+ algos.isin([1], 1)
904
+
905
+ def test_basic(self):
906
+ msg = "isin with argument that is not not a Series"
907
+ with tm.assert_produces_warning(FutureWarning, match=msg):
908
+ result = algos.isin([1, 2], [1])
909
+ expected = np.array([True, False])
910
+ tm.assert_numpy_array_equal(result, expected)
911
+
912
+ result = algos.isin(np.array([1, 2]), [1])
913
+ expected = np.array([True, False])
914
+ tm.assert_numpy_array_equal(result, expected)
915
+
916
+ result = algos.isin(Series([1, 2]), [1])
917
+ expected = np.array([True, False])
918
+ tm.assert_numpy_array_equal(result, expected)
919
+
920
+ result = algos.isin(Series([1, 2]), Series([1]))
921
+ expected = np.array([True, False])
922
+ tm.assert_numpy_array_equal(result, expected)
923
+
924
+ result = algos.isin(Series([1, 2]), {1})
925
+ expected = np.array([True, False])
926
+ tm.assert_numpy_array_equal(result, expected)
927
+
928
+ with tm.assert_produces_warning(FutureWarning, match=msg):
929
+ result = algos.isin(["a", "b"], ["a"])
930
+ expected = np.array([True, False])
931
+ tm.assert_numpy_array_equal(result, expected)
932
+
933
+ result = algos.isin(Series(["a", "b"]), Series(["a"]))
934
+ expected = np.array([True, False])
935
+ tm.assert_numpy_array_equal(result, expected)
936
+
937
+ result = algos.isin(Series(["a", "b"]), {"a"})
938
+ expected = np.array([True, False])
939
+ tm.assert_numpy_array_equal(result, expected)
940
+
941
+ with tm.assert_produces_warning(FutureWarning, match=msg):
942
+ result = algos.isin(["a", "b"], [1])
943
+ expected = np.array([False, False])
944
+ tm.assert_numpy_array_equal(result, expected)
945
+
946
+ def test_i8(self):
947
+ arr = date_range("20130101", periods=3).values
948
+ result = algos.isin(arr, [arr[0]])
949
+ expected = np.array([True, False, False])
950
+ tm.assert_numpy_array_equal(result, expected)
951
+
952
+ result = algos.isin(arr, arr[0:2])
953
+ expected = np.array([True, True, False])
954
+ tm.assert_numpy_array_equal(result, expected)
955
+
956
+ result = algos.isin(arr, set(arr[0:2]))
957
+ expected = np.array([True, True, False])
958
+ tm.assert_numpy_array_equal(result, expected)
959
+
960
+ arr = timedelta_range("1 day", periods=3).values
961
+ result = algos.isin(arr, [arr[0]])
962
+ expected = np.array([True, False, False])
963
+ tm.assert_numpy_array_equal(result, expected)
964
+
965
+ result = algos.isin(arr, arr[0:2])
966
+ expected = np.array([True, True, False])
967
+ tm.assert_numpy_array_equal(result, expected)
968
+
969
+ result = algos.isin(arr, set(arr[0:2]))
970
+ expected = np.array([True, True, False])
971
+ tm.assert_numpy_array_equal(result, expected)
972
+
973
+ @pytest.mark.parametrize("dtype1", ["m8[ns]", "M8[ns]", "M8[ns, UTC]", "period[D]"])
974
+ @pytest.mark.parametrize("dtype", ["i8", "f8", "u8"])
975
+ def test_isin_datetimelike_values_numeric_comps(self, dtype, dtype1):
976
+ # Anything but object and we get all-False shortcut
977
+
978
+ dta = date_range("2013-01-01", periods=3)._values
979
+ arr = Series(dta.view("i8")).array.view(dtype1)
980
+
981
+ comps = arr.view("i8").astype(dtype)
982
+
983
+ result = algos.isin(comps, arr)
984
+ expected = np.zeros(comps.shape, dtype=bool)
985
+ tm.assert_numpy_array_equal(result, expected)
986
+
987
+ def test_large(self):
988
+ s = date_range("20000101", periods=2000000, freq="s").values
989
+ result = algos.isin(s, s[0:2])
990
+ expected = np.zeros(len(s), dtype=bool)
991
+ expected[0] = True
992
+ expected[1] = True
993
+ tm.assert_numpy_array_equal(result, expected)
994
+
995
+ @pytest.mark.parametrize("dtype", ["m8[ns]", "M8[ns]", "M8[ns, UTC]", "period[D]"])
996
+ def test_isin_datetimelike_all_nat(self, dtype):
997
+ # GH#56427
998
+ dta = date_range("2013-01-01", periods=3)._values
999
+ arr = Series(dta.view("i8")).array.view(dtype)
1000
+
1001
+ arr[0] = NaT
1002
+ result = algos.isin(arr, [NaT])
1003
+ expected = np.array([True, False, False], dtype=bool)
1004
+ tm.assert_numpy_array_equal(result, expected)
1005
+
1006
+ @pytest.mark.parametrize("dtype", ["m8[ns]", "M8[ns]", "M8[ns, UTC]"])
1007
+ def test_isin_datetimelike_strings_deprecated(self, dtype):
1008
+ # GH#53111
1009
+ dta = date_range("2013-01-01", periods=3)._values
1010
+ arr = Series(dta.view("i8")).array.view(dtype)
1011
+
1012
+ vals = [str(x) for x in arr]
1013
+ msg = "The behavior of 'isin' with dtype=.* is deprecated"
1014
+ with tm.assert_produces_warning(FutureWarning, match=msg):
1015
+ res = algos.isin(arr, vals)
1016
+ assert res.all()
1017
+
1018
+ vals2 = np.array(vals, dtype=str)
1019
+ with tm.assert_produces_warning(FutureWarning, match=msg):
1020
+ res2 = algos.isin(arr, vals2)
1021
+ assert res2.all()
1022
+
1023
+ def test_isin_dt64tz_with_nat(self):
1024
+ # the all-NaT values used to get inferred to tznaive, which was evaluated
1025
+ # as non-matching GH#56427
1026
+ dti = date_range("2016-01-01", periods=3, tz="UTC")
1027
+ ser = Series(dti)
1028
+ ser[0] = NaT
1029
+
1030
+ res = algos.isin(ser._values, [NaT])
1031
+ exp = np.array([True, False, False], dtype=bool)
1032
+ tm.assert_numpy_array_equal(res, exp)
1033
+
1034
+ def test_categorical_from_codes(self):
1035
+ # GH 16639
1036
+ vals = np.array([0, 1, 2, 0])
1037
+ cats = ["a", "b", "c"]
1038
+ Sd = Series(Categorical([1]).from_codes(vals, cats))
1039
+ St = Series(Categorical([1]).from_codes(np.array([0, 1]), cats))
1040
+ expected = np.array([True, True, False, True])
1041
+ result = algos.isin(Sd, St)
1042
+ tm.assert_numpy_array_equal(expected, result)
1043
+
1044
+ def test_categorical_isin(self):
1045
+ vals = np.array([0, 1, 2, 0])
1046
+ cats = ["a", "b", "c"]
1047
+ cat = Categorical([1]).from_codes(vals, cats)
1048
+ other = Categorical([1]).from_codes(np.array([0, 1]), cats)
1049
+
1050
+ expected = np.array([True, True, False, True])
1051
+ result = algos.isin(cat, other)
1052
+ tm.assert_numpy_array_equal(expected, result)
1053
+
1054
+ def test_same_nan_is_in(self):
1055
+ # GH 22160
1056
+ # nan is special, because from " a is b" doesn't follow "a == b"
1057
+ # at least, isin() should follow python's "np.nan in [nan] == True"
1058
+ # casting to -> np.float64 -> another float-object somewhere on
1059
+ # the way could lead jeopardize this behavior
1060
+ comps = [np.nan] # could be casted to float64
1061
+ values = [np.nan]
1062
+ expected = np.array([True])
1063
+ msg = "isin with argument that is not not a Series"
1064
+ with tm.assert_produces_warning(FutureWarning, match=msg):
1065
+ result = algos.isin(comps, values)
1066
+ tm.assert_numpy_array_equal(expected, result)
1067
+
1068
+ def test_same_nan_is_in_large(self):
1069
+ # https://github.com/pandas-dev/pandas/issues/22205
1070
+ s = np.tile(1.0, 1_000_001)
1071
+ s[0] = np.nan
1072
+ result = algos.isin(s, np.array([np.nan, 1]))
1073
+ expected = np.ones(len(s), dtype=bool)
1074
+ tm.assert_numpy_array_equal(result, expected)
1075
+
1076
+ def test_same_nan_is_in_large_series(self):
1077
+ # https://github.com/pandas-dev/pandas/issues/22205
1078
+ s = np.tile(1.0, 1_000_001)
1079
+ series = Series(s)
1080
+ s[0] = np.nan
1081
+ result = series.isin(np.array([np.nan, 1]))
1082
+ expected = Series(np.ones(len(s), dtype=bool))
1083
+ tm.assert_series_equal(result, expected)
1084
+
1085
+ def test_same_object_is_in(self):
1086
+ # GH 22160
1087
+ # there could be special treatment for nans
1088
+ # the user however could define a custom class
1089
+ # with similar behavior, then we at least should
1090
+ # fall back to usual python's behavior: "a in [a] == True"
1091
+ class LikeNan:
1092
+ def __eq__(self, other) -> bool:
1093
+ return False
1094
+
1095
+ def __hash__(self):
1096
+ return 0
1097
+
1098
+ a, b = LikeNan(), LikeNan()
1099
+
1100
+ msg = "isin with argument that is not not a Series"
1101
+ with tm.assert_produces_warning(FutureWarning, match=msg):
1102
+ # same object -> True
1103
+ tm.assert_numpy_array_equal(algos.isin([a], [a]), np.array([True]))
1104
+ # different objects -> False
1105
+ tm.assert_numpy_array_equal(algos.isin([a], [b]), np.array([False]))
1106
+
1107
+ def test_different_nans(self):
1108
+ # GH 22160
1109
+ # all nans are handled as equivalent
1110
+
1111
+ comps = [float("nan")]
1112
+ values = [float("nan")]
1113
+ assert comps[0] is not values[0] # different nan-objects
1114
+
1115
+ # as list of python-objects:
1116
+ result = algos.isin(np.array(comps), values)
1117
+ tm.assert_numpy_array_equal(np.array([True]), result)
1118
+
1119
+ # as object-array:
1120
+ result = algos.isin(
1121
+ np.asarray(comps, dtype=object), np.asarray(values, dtype=object)
1122
+ )
1123
+ tm.assert_numpy_array_equal(np.array([True]), result)
1124
+
1125
+ # as float64-array:
1126
+ result = algos.isin(
1127
+ np.asarray(comps, dtype=np.float64), np.asarray(values, dtype=np.float64)
1128
+ )
1129
+ tm.assert_numpy_array_equal(np.array([True]), result)
1130
+
1131
+ def test_no_cast(self):
1132
+ # GH 22160
1133
+ # ensure 42 is not casted to a string
1134
+ comps = ["ss", 42]
1135
+ values = ["42"]
1136
+ expected = np.array([False, False])
1137
+ msg = "isin with argument that is not not a Series, Index"
1138
+ with tm.assert_produces_warning(FutureWarning, match=msg):
1139
+ result = algos.isin(comps, values)
1140
+ tm.assert_numpy_array_equal(expected, result)
1141
+
1142
+ @pytest.mark.parametrize("empty", [[], Series(dtype=object), np.array([])])
1143
+ def test_empty(self, empty):
1144
+ # see gh-16991
1145
+ vals = Index(["a", "b"])
1146
+ expected = np.array([False, False])
1147
+
1148
+ result = algos.isin(vals, empty)
1149
+ tm.assert_numpy_array_equal(expected, result)
1150
+
1151
+ def test_different_nan_objects(self):
1152
+ # GH 22119
1153
+ comps = np.array(["nan", np.nan * 1j, float("nan")], dtype=object)
1154
+ vals = np.array([float("nan")], dtype=object)
1155
+ expected = np.array([False, False, True])
1156
+ result = algos.isin(comps, vals)
1157
+ tm.assert_numpy_array_equal(expected, result)
1158
+
1159
+ def test_different_nans_as_float64(self):
1160
+ # GH 21866
1161
+ # create different nans from bit-patterns,
1162
+ # these nans will land in different buckets in the hash-table
1163
+ # if no special care is taken
1164
+ NAN1 = struct.unpack("d", struct.pack("=Q", 0x7FF8000000000000))[0]
1165
+ NAN2 = struct.unpack("d", struct.pack("=Q", 0x7FF8000000000001))[0]
1166
+ assert NAN1 != NAN1
1167
+ assert NAN2 != NAN2
1168
+
1169
+ # check that NAN1 and NAN2 are equivalent:
1170
+ arr = np.array([NAN1, NAN2], dtype=np.float64)
1171
+ lookup1 = np.array([NAN1], dtype=np.float64)
1172
+ result = algos.isin(arr, lookup1)
1173
+ expected = np.array([True, True])
1174
+ tm.assert_numpy_array_equal(result, expected)
1175
+
1176
+ lookup2 = np.array([NAN2], dtype=np.float64)
1177
+ result = algos.isin(arr, lookup2)
1178
+ expected = np.array([True, True])
1179
+ tm.assert_numpy_array_equal(result, expected)
1180
+
1181
+ def test_isin_int_df_string_search(self):
1182
+ """Comparing df with int`s (1,2) with a string at isin() ("1")
1183
+ -> should not match values because int 1 is not equal str 1"""
1184
+ df = DataFrame({"values": [1, 2]})
1185
+ result = df.isin(["1"])
1186
+ expected_false = DataFrame({"values": [False, False]})
1187
+ tm.assert_frame_equal(result, expected_false)
1188
+
1189
+ def test_isin_nan_df_string_search(self):
1190
+ """Comparing df with nan value (np.nan,2) with a string at isin() ("NaN")
1191
+ -> should not match values because np.nan is not equal str NaN"""
1192
+ df = DataFrame({"values": [np.nan, 2]})
1193
+ result = df.isin(np.array(["NaN"], dtype=object))
1194
+ expected_false = DataFrame({"values": [False, False]})
1195
+ tm.assert_frame_equal(result, expected_false)
1196
+
1197
+ def test_isin_float_df_string_search(self):
1198
+ """Comparing df with floats (1.4245,2.32441) with a string at isin() ("1.4245")
1199
+ -> should not match values because float 1.4245 is not equal str 1.4245"""
1200
+ df = DataFrame({"values": [1.4245, 2.32441]})
1201
+ result = df.isin(np.array(["1.4245"], dtype=object))
1202
+ expected_false = DataFrame({"values": [False, False]})
1203
+ tm.assert_frame_equal(result, expected_false)
1204
+
1205
+ def test_isin_unsigned_dtype(self):
1206
+ # GH#46485
1207
+ ser = Series([1378774140726870442], dtype=np.uint64)
1208
+ result = ser.isin([1378774140726870528])
1209
+ expected = Series(False)
1210
+ tm.assert_series_equal(result, expected)
1211
+
1212
+
1213
+ class TestValueCounts:
1214
+ def test_value_counts(self):
1215
+ arr = np.random.default_rng(1234).standard_normal(4)
1216
+ factor = cut(arr, 4)
1217
+
1218
+ # assert isinstance(factor, n)
1219
+ msg = "pandas.value_counts is deprecated"
1220
+ with tm.assert_produces_warning(FutureWarning, match=msg):
1221
+ result = algos.value_counts(factor)
1222
+ breaks = [-1.606, -1.018, -0.431, 0.155, 0.741]
1223
+ index = IntervalIndex.from_breaks(breaks).astype(CategoricalDtype(ordered=True))
1224
+ expected = Series([1, 0, 2, 1], index=index, name="count")
1225
+ tm.assert_series_equal(result.sort_index(), expected.sort_index())
1226
+
1227
+ def test_value_counts_bins(self):
1228
+ s = [1, 2, 3, 4]
1229
+ msg = "pandas.value_counts is deprecated"
1230
+ with tm.assert_produces_warning(FutureWarning, match=msg):
1231
+ result = algos.value_counts(s, bins=1)
1232
+ expected = Series(
1233
+ [4], index=IntervalIndex.from_tuples([(0.996, 4.0)]), name="count"
1234
+ )
1235
+ tm.assert_series_equal(result, expected)
1236
+
1237
+ with tm.assert_produces_warning(FutureWarning, match=msg):
1238
+ result = algos.value_counts(s, bins=2, sort=False)
1239
+ expected = Series(
1240
+ [2, 2],
1241
+ index=IntervalIndex.from_tuples([(0.996, 2.5), (2.5, 4.0)]),
1242
+ name="count",
1243
+ )
1244
+ tm.assert_series_equal(result, expected)
1245
+
1246
+ def test_value_counts_dtypes(self):
1247
+ msg2 = "pandas.value_counts is deprecated"
1248
+ with tm.assert_produces_warning(FutureWarning, match=msg2):
1249
+ result = algos.value_counts(np.array([1, 1.0]))
1250
+ assert len(result) == 1
1251
+
1252
+ with tm.assert_produces_warning(FutureWarning, match=msg2):
1253
+ result = algos.value_counts(np.array([1, 1.0]), bins=1)
1254
+ assert len(result) == 1
1255
+
1256
+ with tm.assert_produces_warning(FutureWarning, match=msg2):
1257
+ result = algos.value_counts(Series([1, 1.0, "1"])) # object
1258
+ assert len(result) == 2
1259
+
1260
+ msg = "bins argument only works with numeric data"
1261
+ with pytest.raises(TypeError, match=msg):
1262
+ with tm.assert_produces_warning(FutureWarning, match=msg2):
1263
+ algos.value_counts(np.array(["1", 1], dtype=object), bins=1)
1264
+
1265
+ def test_value_counts_nat(self):
1266
+ td = Series([np.timedelta64(10000), NaT], dtype="timedelta64[ns]")
1267
+ dt = to_datetime(["NaT", "2014-01-01"])
1268
+
1269
+ msg = "pandas.value_counts is deprecated"
1270
+
1271
+ for ser in [td, dt]:
1272
+ with tm.assert_produces_warning(FutureWarning, match=msg):
1273
+ vc = algos.value_counts(ser)
1274
+ vc_with_na = algos.value_counts(ser, dropna=False)
1275
+ assert len(vc) == 1
1276
+ assert len(vc_with_na) == 2
1277
+
1278
+ exp_dt = Series({Timestamp("2014-01-01 00:00:00"): 1}, name="count")
1279
+ with tm.assert_produces_warning(FutureWarning, match=msg):
1280
+ result_dt = algos.value_counts(dt)
1281
+ tm.assert_series_equal(result_dt, exp_dt)
1282
+
1283
+ exp_td = Series({np.timedelta64(10000): 1}, name="count")
1284
+ with tm.assert_produces_warning(FutureWarning, match=msg):
1285
+ result_td = algos.value_counts(td)
1286
+ tm.assert_series_equal(result_td, exp_td)
1287
+
1288
+ @pytest.mark.parametrize("dtype", [object, "M8[us]"])
1289
+ def test_value_counts_datetime_outofbounds(self, dtype):
1290
+ # GH 13663
1291
+ ser = Series(
1292
+ [
1293
+ datetime(3000, 1, 1),
1294
+ datetime(5000, 1, 1),
1295
+ datetime(5000, 1, 1),
1296
+ datetime(6000, 1, 1),
1297
+ datetime(3000, 1, 1),
1298
+ datetime(3000, 1, 1),
1299
+ ],
1300
+ dtype=dtype,
1301
+ )
1302
+ res = ser.value_counts()
1303
+
1304
+ exp_index = Index(
1305
+ [datetime(3000, 1, 1), datetime(5000, 1, 1), datetime(6000, 1, 1)],
1306
+ dtype=dtype,
1307
+ )
1308
+ exp = Series([3, 2, 1], index=exp_index, name="count")
1309
+ tm.assert_series_equal(res, exp)
1310
+
1311
+ def test_categorical(self):
1312
+ s = Series(Categorical(list("aaabbc")))
1313
+ result = s.value_counts()
1314
+ expected = Series(
1315
+ [3, 2, 1], index=CategoricalIndex(["a", "b", "c"]), name="count"
1316
+ )
1317
+
1318
+ tm.assert_series_equal(result, expected, check_index_type=True)
1319
+
1320
+ # preserve order?
1321
+ s = s.cat.as_ordered()
1322
+ result = s.value_counts()
1323
+ expected.index = expected.index.as_ordered()
1324
+ tm.assert_series_equal(result, expected, check_index_type=True)
1325
+
1326
+ def test_categorical_nans(self):
1327
+ s = Series(Categorical(list("aaaaabbbcc"))) # 4,3,2,1 (nan)
1328
+ s.iloc[1] = np.nan
1329
+ result = s.value_counts()
1330
+ expected = Series(
1331
+ [4, 3, 2],
1332
+ index=CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c"]),
1333
+ name="count",
1334
+ )
1335
+ tm.assert_series_equal(result, expected, check_index_type=True)
1336
+ result = s.value_counts(dropna=False)
1337
+ expected = Series(
1338
+ [4, 3, 2, 1], index=CategoricalIndex(["a", "b", "c", np.nan]), name="count"
1339
+ )
1340
+ tm.assert_series_equal(result, expected, check_index_type=True)
1341
+
1342
+ # out of order
1343
+ s = Series(
1344
+ Categorical(list("aaaaabbbcc"), ordered=True, categories=["b", "a", "c"])
1345
+ )
1346
+ s.iloc[1] = np.nan
1347
+ result = s.value_counts()
1348
+ expected = Series(
1349
+ [4, 3, 2],
1350
+ index=CategoricalIndex(
1351
+ ["a", "b", "c"],
1352
+ categories=["b", "a", "c"],
1353
+ ordered=True,
1354
+ ),
1355
+ name="count",
1356
+ )
1357
+ tm.assert_series_equal(result, expected, check_index_type=True)
1358
+
1359
+ result = s.value_counts(dropna=False)
1360
+ expected = Series(
1361
+ [4, 3, 2, 1],
1362
+ index=CategoricalIndex(
1363
+ ["a", "b", "c", np.nan], categories=["b", "a", "c"], ordered=True
1364
+ ),
1365
+ name="count",
1366
+ )
1367
+ tm.assert_series_equal(result, expected, check_index_type=True)
1368
+
1369
+ def test_categorical_zeroes(self):
1370
+ # keep the `d` category with 0
1371
+ s = Series(Categorical(list("bbbaac"), categories=list("abcd"), ordered=True))
1372
+ result = s.value_counts()
1373
+ expected = Series(
1374
+ [3, 2, 1, 0],
1375
+ index=Categorical(
1376
+ ["b", "a", "c", "d"], categories=list("abcd"), ordered=True
1377
+ ),
1378
+ name="count",
1379
+ )
1380
+ tm.assert_series_equal(result, expected, check_index_type=True)
1381
+
1382
+ def test_value_counts_dropna(self):
1383
+ # https://github.com/pandas-dev/pandas/issues/9443#issuecomment-73719328
1384
+
1385
+ tm.assert_series_equal(
1386
+ Series([True, True, False]).value_counts(dropna=True),
1387
+ Series([2, 1], index=[True, False], name="count"),
1388
+ )
1389
+ tm.assert_series_equal(
1390
+ Series([True, True, False]).value_counts(dropna=False),
1391
+ Series([2, 1], index=[True, False], name="count"),
1392
+ )
1393
+
1394
+ tm.assert_series_equal(
1395
+ Series([True] * 3 + [False] * 2 + [None] * 5).value_counts(dropna=True),
1396
+ Series([3, 2], index=Index([True, False], dtype=object), name="count"),
1397
+ )
1398
+ tm.assert_series_equal(
1399
+ Series([True] * 5 + [False] * 3 + [None] * 2).value_counts(dropna=False),
1400
+ Series([5, 3, 2], index=[True, False, None], name="count"),
1401
+ )
1402
+ tm.assert_series_equal(
1403
+ Series([10.3, 5.0, 5.0]).value_counts(dropna=True),
1404
+ Series([2, 1], index=[5.0, 10.3], name="count"),
1405
+ )
1406
+ tm.assert_series_equal(
1407
+ Series([10.3, 5.0, 5.0]).value_counts(dropna=False),
1408
+ Series([2, 1], index=[5.0, 10.3], name="count"),
1409
+ )
1410
+
1411
+ tm.assert_series_equal(
1412
+ Series([10.3, 5.0, 5.0, None]).value_counts(dropna=True),
1413
+ Series([2, 1], index=[5.0, 10.3], name="count"),
1414
+ )
1415
+
1416
+ result = Series([10.3, 10.3, 5.0, 5.0, 5.0, None]).value_counts(dropna=False)
1417
+ expected = Series([3, 2, 1], index=[5.0, 10.3, None], name="count")
1418
+ tm.assert_series_equal(result, expected)
1419
+
1420
+ @pytest.mark.parametrize("dtype", (np.float64, object, "M8[ns]"))
1421
+ def test_value_counts_normalized(self, dtype):
1422
+ # GH12558
1423
+ s = Series([1] * 2 + [2] * 3 + [np.nan] * 5)
1424
+ s_typed = s.astype(dtype)
1425
+ result = s_typed.value_counts(normalize=True, dropna=False)
1426
+ expected = Series(
1427
+ [0.5, 0.3, 0.2],
1428
+ index=Series([np.nan, 2.0, 1.0], dtype=dtype),
1429
+ name="proportion",
1430
+ )
1431
+ tm.assert_series_equal(result, expected)
1432
+
1433
+ result = s_typed.value_counts(normalize=True, dropna=True)
1434
+ expected = Series(
1435
+ [0.6, 0.4], index=Series([2.0, 1.0], dtype=dtype), name="proportion"
1436
+ )
1437
+ tm.assert_series_equal(result, expected)
1438
+
1439
+ def test_value_counts_uint64(self):
1440
+ arr = np.array([2**63], dtype=np.uint64)
1441
+ expected = Series([1], index=[2**63], name="count")
1442
+ msg = "pandas.value_counts is deprecated"
1443
+ with tm.assert_produces_warning(FutureWarning, match=msg):
1444
+ result = algos.value_counts(arr)
1445
+
1446
+ tm.assert_series_equal(result, expected)
1447
+
1448
+ arr = np.array([-1, 2**63], dtype=object)
1449
+ expected = Series([1, 1], index=[-1, 2**63], name="count")
1450
+ with tm.assert_produces_warning(FutureWarning, match=msg):
1451
+ result = algos.value_counts(arr)
1452
+
1453
+ tm.assert_series_equal(result, expected)
1454
+
1455
+ def test_value_counts_series(self):
1456
+ # GH#54857
1457
+ values = np.array([3, 1, 2, 3, 4, np.nan])
1458
+ result = Series(values).value_counts(bins=3)
1459
+ expected = Series(
1460
+ [2, 2, 1],
1461
+ index=IntervalIndex.from_tuples(
1462
+ [(0.996, 2.0), (2.0, 3.0), (3.0, 4.0)], dtype="interval[float64, right]"
1463
+ ),
1464
+ name="count",
1465
+ )
1466
+ tm.assert_series_equal(result, expected)
1467
+
1468
+
1469
+ class TestDuplicated:
1470
+ def test_duplicated_with_nas(self):
1471
+ keys = np.array([0, 1, np.nan, 0, 2, np.nan], dtype=object)
1472
+
1473
+ result = algos.duplicated(keys)
1474
+ expected = np.array([False, False, False, True, False, True])
1475
+ tm.assert_numpy_array_equal(result, expected)
1476
+
1477
+ result = algos.duplicated(keys, keep="first")
1478
+ expected = np.array([False, False, False, True, False, True])
1479
+ tm.assert_numpy_array_equal(result, expected)
1480
+
1481
+ result = algos.duplicated(keys, keep="last")
1482
+ expected = np.array([True, False, True, False, False, False])
1483
+ tm.assert_numpy_array_equal(result, expected)
1484
+
1485
+ result = algos.duplicated(keys, keep=False)
1486
+ expected = np.array([True, False, True, True, False, True])
1487
+ tm.assert_numpy_array_equal(result, expected)
1488
+
1489
+ keys = np.empty(8, dtype=object)
1490
+ for i, t in enumerate(
1491
+ zip([0, 0, np.nan, np.nan] * 2, [0, np.nan, 0, np.nan] * 2)
1492
+ ):
1493
+ keys[i] = t
1494
+
1495
+ result = algos.duplicated(keys)
1496
+ falses = [False] * 4
1497
+ trues = [True] * 4
1498
+ expected = np.array(falses + trues)
1499
+ tm.assert_numpy_array_equal(result, expected)
1500
+
1501
+ result = algos.duplicated(keys, keep="last")
1502
+ expected = np.array(trues + falses)
1503
+ tm.assert_numpy_array_equal(result, expected)
1504
+
1505
+ result = algos.duplicated(keys, keep=False)
1506
+ expected = np.array(trues + trues)
1507
+ tm.assert_numpy_array_equal(result, expected)
1508
+
1509
+ @pytest.mark.parametrize(
1510
+ "case",
1511
+ [
1512
+ np.array([1, 2, 1, 5, 3, 2, 4, 1, 5, 6]),
1513
+ np.array([1.1, 2.2, 1.1, np.nan, 3.3, 2.2, 4.4, 1.1, np.nan, 6.6]),
1514
+ np.array(
1515
+ [
1516
+ 1 + 1j,
1517
+ 2 + 2j,
1518
+ 1 + 1j,
1519
+ 5 + 5j,
1520
+ 3 + 3j,
1521
+ 2 + 2j,
1522
+ 4 + 4j,
1523
+ 1 + 1j,
1524
+ 5 + 5j,
1525
+ 6 + 6j,
1526
+ ]
1527
+ ),
1528
+ np.array(["a", "b", "a", "e", "c", "b", "d", "a", "e", "f"], dtype=object),
1529
+ np.array(
1530
+ [1, 2**63, 1, 3**5, 10, 2**63, 39, 1, 3**5, 7], dtype=np.uint64
1531
+ ),
1532
+ ],
1533
+ )
1534
+ def test_numeric_object_likes(self, case):
1535
+ exp_first = np.array(
1536
+ [False, False, True, False, False, True, False, True, True, False]
1537
+ )
1538
+ exp_last = np.array(
1539
+ [True, True, True, True, False, False, False, False, False, False]
1540
+ )
1541
+ exp_false = exp_first | exp_last
1542
+
1543
+ res_first = algos.duplicated(case, keep="first")
1544
+ tm.assert_numpy_array_equal(res_first, exp_first)
1545
+
1546
+ res_last = algos.duplicated(case, keep="last")
1547
+ tm.assert_numpy_array_equal(res_last, exp_last)
1548
+
1549
+ res_false = algos.duplicated(case, keep=False)
1550
+ tm.assert_numpy_array_equal(res_false, exp_false)
1551
+
1552
+ # index
1553
+ for idx in [Index(case), Index(case, dtype="category")]:
1554
+ res_first = idx.duplicated(keep="first")
1555
+ tm.assert_numpy_array_equal(res_first, exp_first)
1556
+
1557
+ res_last = idx.duplicated(keep="last")
1558
+ tm.assert_numpy_array_equal(res_last, exp_last)
1559
+
1560
+ res_false = idx.duplicated(keep=False)
1561
+ tm.assert_numpy_array_equal(res_false, exp_false)
1562
+
1563
+ # series
1564
+ for s in [Series(case), Series(case, dtype="category")]:
1565
+ res_first = s.duplicated(keep="first")
1566
+ tm.assert_series_equal(res_first, Series(exp_first))
1567
+
1568
+ res_last = s.duplicated(keep="last")
1569
+ tm.assert_series_equal(res_last, Series(exp_last))
1570
+
1571
+ res_false = s.duplicated(keep=False)
1572
+ tm.assert_series_equal(res_false, Series(exp_false))
1573
+
1574
+ def test_datetime_likes(self):
1575
+ dt = [
1576
+ "2011-01-01",
1577
+ "2011-01-02",
1578
+ "2011-01-01",
1579
+ "NaT",
1580
+ "2011-01-03",
1581
+ "2011-01-02",
1582
+ "2011-01-04",
1583
+ "2011-01-01",
1584
+ "NaT",
1585
+ "2011-01-06",
1586
+ ]
1587
+ td = [
1588
+ "1 days",
1589
+ "2 days",
1590
+ "1 days",
1591
+ "NaT",
1592
+ "3 days",
1593
+ "2 days",
1594
+ "4 days",
1595
+ "1 days",
1596
+ "NaT",
1597
+ "6 days",
1598
+ ]
1599
+
1600
+ cases = [
1601
+ np.array([Timestamp(d) for d in dt]),
1602
+ np.array([Timestamp(d, tz="US/Eastern") for d in dt]),
1603
+ np.array([Period(d, freq="D") for d in dt]),
1604
+ np.array([np.datetime64(d) for d in dt]),
1605
+ np.array([Timedelta(d) for d in td]),
1606
+ ]
1607
+
1608
+ exp_first = np.array(
1609
+ [False, False, True, False, False, True, False, True, True, False]
1610
+ )
1611
+ exp_last = np.array(
1612
+ [True, True, True, True, False, False, False, False, False, False]
1613
+ )
1614
+ exp_false = exp_first | exp_last
1615
+
1616
+ for case in cases:
1617
+ res_first = algos.duplicated(case, keep="first")
1618
+ tm.assert_numpy_array_equal(res_first, exp_first)
1619
+
1620
+ res_last = algos.duplicated(case, keep="last")
1621
+ tm.assert_numpy_array_equal(res_last, exp_last)
1622
+
1623
+ res_false = algos.duplicated(case, keep=False)
1624
+ tm.assert_numpy_array_equal(res_false, exp_false)
1625
+
1626
+ # index
1627
+ for idx in [
1628
+ Index(case),
1629
+ Index(case, dtype="category"),
1630
+ Index(case, dtype=object),
1631
+ ]:
1632
+ res_first = idx.duplicated(keep="first")
1633
+ tm.assert_numpy_array_equal(res_first, exp_first)
1634
+
1635
+ res_last = idx.duplicated(keep="last")
1636
+ tm.assert_numpy_array_equal(res_last, exp_last)
1637
+
1638
+ res_false = idx.duplicated(keep=False)
1639
+ tm.assert_numpy_array_equal(res_false, exp_false)
1640
+
1641
+ # series
1642
+ for s in [
1643
+ Series(case),
1644
+ Series(case, dtype="category"),
1645
+ Series(case, dtype=object),
1646
+ ]:
1647
+ res_first = s.duplicated(keep="first")
1648
+ tm.assert_series_equal(res_first, Series(exp_first))
1649
+
1650
+ res_last = s.duplicated(keep="last")
1651
+ tm.assert_series_equal(res_last, Series(exp_last))
1652
+
1653
+ res_false = s.duplicated(keep=False)
1654
+ tm.assert_series_equal(res_false, Series(exp_false))
1655
+
1656
+ @pytest.mark.parametrize("case", [Index([1, 2, 3]), pd.RangeIndex(0, 3)])
1657
+ def test_unique_index(self, case):
1658
+ assert case.is_unique is True
1659
+ tm.assert_numpy_array_equal(case.duplicated(), np.array([False, False, False]))
1660
+
1661
+ @pytest.mark.parametrize(
1662
+ "arr, uniques",
1663
+ [
1664
+ (
1665
+ [(0, 0), (0, 1), (1, 0), (1, 1), (0, 0), (0, 1), (1, 0), (1, 1)],
1666
+ [(0, 0), (0, 1), (1, 0), (1, 1)],
1667
+ ),
1668
+ (
1669
+ [("b", "c"), ("a", "b"), ("a", "b"), ("b", "c")],
1670
+ [("b", "c"), ("a", "b")],
1671
+ ),
1672
+ ([("a", 1), ("b", 2), ("a", 3), ("a", 1)], [("a", 1), ("b", 2), ("a", 3)]),
1673
+ ],
1674
+ )
1675
+ def test_unique_tuples(self, arr, uniques):
1676
+ # https://github.com/pandas-dev/pandas/issues/16519
1677
+ expected = np.empty(len(uniques), dtype=object)
1678
+ expected[:] = uniques
1679
+
1680
+ msg = "unique with argument that is not not a Series"
1681
+ with tm.assert_produces_warning(FutureWarning, match=msg):
1682
+ result = pd.unique(arr)
1683
+ tm.assert_numpy_array_equal(result, expected)
1684
+
1685
+ @pytest.mark.parametrize(
1686
+ "array,expected",
1687
+ [
1688
+ (
1689
+ [1 + 1j, 0, 1, 1j, 1 + 2j, 1 + 2j],
1690
+ # Should return a complex dtype in the future
1691
+ np.array([(1 + 1j), 0j, (1 + 0j), 1j, (1 + 2j)], dtype=object),
1692
+ )
1693
+ ],
1694
+ )
1695
+ def test_unique_complex_numbers(self, array, expected):
1696
+ # GH 17927
1697
+ msg = "unique with argument that is not not a Series"
1698
+ with tm.assert_produces_warning(FutureWarning, match=msg):
1699
+ result = pd.unique(array)
1700
+ tm.assert_numpy_array_equal(result, expected)
1701
+
1702
+
1703
+ class TestHashTable:
1704
+ @pytest.mark.parametrize(
1705
+ "htable, data",
1706
+ [
1707
+ (ht.PyObjectHashTable, [f"foo_{i}" for i in range(1000)]),
1708
+ (ht.StringHashTable, [f"foo_{i}" for i in range(1000)]),
1709
+ (ht.Float64HashTable, np.arange(1000, dtype=np.float64)),
1710
+ (ht.Int64HashTable, np.arange(1000, dtype=np.int64)),
1711
+ (ht.UInt64HashTable, np.arange(1000, dtype=np.uint64)),
1712
+ ],
1713
+ )
1714
+ def test_hashtable_unique(self, htable, data, writable):
1715
+ # output of maker has guaranteed unique elements
1716
+ s = Series(data)
1717
+ if htable == ht.Float64HashTable:
1718
+ # add NaN for float column
1719
+ s.loc[500] = np.nan
1720
+ elif htable == ht.PyObjectHashTable:
1721
+ # use different NaN types for object column
1722
+ s.loc[500:502] = [np.nan, None, NaT]
1723
+
1724
+ # create duplicated selection
1725
+ s_duplicated = s.sample(frac=3, replace=True).reset_index(drop=True)
1726
+ s_duplicated.values.setflags(write=writable)
1727
+
1728
+ # drop_duplicates has own cython code (hash_table_func_helper.pxi)
1729
+ # and is tested separately; keeps first occurrence like ht.unique()
1730
+ expected_unique = s_duplicated.drop_duplicates(keep="first").values
1731
+ result_unique = htable().unique(s_duplicated.values)
1732
+ tm.assert_numpy_array_equal(result_unique, expected_unique)
1733
+
1734
+ # test return_inverse=True
1735
+ # reconstruction can only succeed if the inverse is correct
1736
+ result_unique, result_inverse = htable().unique(
1737
+ s_duplicated.values, return_inverse=True
1738
+ )
1739
+ tm.assert_numpy_array_equal(result_unique, expected_unique)
1740
+ reconstr = result_unique[result_inverse]
1741
+ tm.assert_numpy_array_equal(reconstr, s_duplicated.values)
1742
+
1743
+ @pytest.mark.parametrize(
1744
+ "htable, data",
1745
+ [
1746
+ (ht.PyObjectHashTable, [f"foo_{i}" for i in range(1000)]),
1747
+ (ht.StringHashTable, [f"foo_{i}" for i in range(1000)]),
1748
+ (ht.Float64HashTable, np.arange(1000, dtype=np.float64)),
1749
+ (ht.Int64HashTable, np.arange(1000, dtype=np.int64)),
1750
+ (ht.UInt64HashTable, np.arange(1000, dtype=np.uint64)),
1751
+ ],
1752
+ )
1753
+ def test_hashtable_factorize(self, htable, writable, data):
1754
+ # output of maker has guaranteed unique elements
1755
+ s = Series(data)
1756
+ if htable == ht.Float64HashTable:
1757
+ # add NaN for float column
1758
+ s.loc[500] = np.nan
1759
+ elif htable == ht.PyObjectHashTable:
1760
+ # use different NaN types for object column
1761
+ s.loc[500:502] = [np.nan, None, NaT]
1762
+
1763
+ # create duplicated selection
1764
+ s_duplicated = s.sample(frac=3, replace=True).reset_index(drop=True)
1765
+ s_duplicated.values.setflags(write=writable)
1766
+ na_mask = s_duplicated.isna().values
1767
+
1768
+ result_unique, result_inverse = htable().factorize(s_duplicated.values)
1769
+
1770
+ # drop_duplicates has own cython code (hash_table_func_helper.pxi)
1771
+ # and is tested separately; keeps first occurrence like ht.factorize()
1772
+ # since factorize removes all NaNs, we do the same here
1773
+ expected_unique = s_duplicated.dropna().drop_duplicates().values
1774
+ tm.assert_numpy_array_equal(result_unique, expected_unique)
1775
+
1776
+ # reconstruction can only succeed if the inverse is correct. Since
1777
+ # factorize removes the NaNs, those have to be excluded here as well
1778
+ result_reconstruct = result_unique[result_inverse[~na_mask]]
1779
+ expected_reconstruct = s_duplicated.dropna().values
1780
+ tm.assert_numpy_array_equal(result_reconstruct, expected_reconstruct)
1781
+
1782
+
1783
+ class TestRank:
1784
+ @pytest.mark.parametrize(
1785
+ "arr",
1786
+ [
1787
+ [np.nan, np.nan, 5.0, 5.0, 5.0, np.nan, 1, 2, 3, np.nan],
1788
+ [4.0, np.nan, 5.0, 5.0, 5.0, np.nan, 1, 2, 4.0, np.nan],
1789
+ ],
1790
+ )
1791
+ def test_scipy_compat(self, arr):
1792
+ sp_stats = pytest.importorskip("scipy.stats")
1793
+
1794
+ arr = np.array(arr)
1795
+
1796
+ mask = ~np.isfinite(arr)
1797
+ arr = arr.copy()
1798
+ result = libalgos.rank_1d(arr)
1799
+ arr[mask] = np.inf
1800
+ exp = sp_stats.rankdata(arr)
1801
+ exp[mask] = np.nan
1802
+ tm.assert_almost_equal(result, exp)
1803
+
1804
+ @pytest.mark.parametrize("dtype", np.typecodes["AllInteger"])
1805
+ def test_basic(self, writable, dtype):
1806
+ exp = np.array([1, 2], dtype=np.float64)
1807
+
1808
+ data = np.array([1, 100], dtype=dtype)
1809
+ data.setflags(write=writable)
1810
+ ser = Series(data)
1811
+ result = algos.rank(ser)
1812
+ tm.assert_numpy_array_equal(result, exp)
1813
+
1814
+ @pytest.mark.parametrize("dtype", [np.float64, np.uint64])
1815
+ def test_uint64_overflow(self, dtype):
1816
+ exp = np.array([1, 2], dtype=np.float64)
1817
+
1818
+ s = Series([1, 2**63], dtype=dtype)
1819
+ tm.assert_numpy_array_equal(algos.rank(s), exp)
1820
+
1821
+ def test_too_many_ndims(self):
1822
+ arr = np.array([[[1, 2, 3], [4, 5, 6], [7, 8, 9]]])
1823
+ msg = "Array with ndim > 2 are not supported"
1824
+
1825
+ with pytest.raises(TypeError, match=msg):
1826
+ algos.rank(arr)
1827
+
1828
+ @pytest.mark.single_cpu
1829
+ def test_pct_max_many_rows(self):
1830
+ # GH 18271
1831
+ values = np.arange(2**24 + 1)
1832
+ result = algos.rank(values, pct=True).max()
1833
+ assert result == 1
1834
+
1835
+ values = np.arange(2**25 + 2).reshape(2**24 + 1, 2)
1836
+ result = algos.rank(values, pct=True).max()
1837
+ assert result == 1
1838
+
1839
+
1840
+ class TestMode:
1841
+ def test_no_mode(self):
1842
+ exp = Series([], dtype=np.float64, index=Index([], dtype=int))
1843
+ tm.assert_numpy_array_equal(algos.mode(np.array([])), exp.values)
1844
+
1845
+ @pytest.mark.parametrize("dt", np.typecodes["AllInteger"] + np.typecodes["Float"])
1846
+ def test_mode_single(self, dt):
1847
+ # GH 15714
1848
+ exp_single = [1]
1849
+ data_single = [1]
1850
+
1851
+ exp_multi = [1]
1852
+ data_multi = [1, 1]
1853
+
1854
+ ser = Series(data_single, dtype=dt)
1855
+ exp = Series(exp_single, dtype=dt)
1856
+ tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
1857
+ tm.assert_series_equal(ser.mode(), exp)
1858
+
1859
+ ser = Series(data_multi, dtype=dt)
1860
+ exp = Series(exp_multi, dtype=dt)
1861
+ tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
1862
+ tm.assert_series_equal(ser.mode(), exp)
1863
+
1864
+ def test_mode_obj_int(self):
1865
+ exp = Series([1], dtype=int)
1866
+ tm.assert_numpy_array_equal(algos.mode(exp.values), exp.values)
1867
+
1868
+ exp = Series(["a", "b", "c"], dtype=object)
1869
+ tm.assert_numpy_array_equal(algos.mode(exp.values), exp.values)
1870
+
1871
+ @pytest.mark.parametrize("dt", np.typecodes["AllInteger"] + np.typecodes["Float"])
1872
+ def test_number_mode(self, dt):
1873
+ exp_single = [1]
1874
+ data_single = [1] * 5 + [2] * 3
1875
+
1876
+ exp_multi = [1, 3]
1877
+ data_multi = [1] * 5 + [2] * 3 + [3] * 5
1878
+
1879
+ ser = Series(data_single, dtype=dt)
1880
+ exp = Series(exp_single, dtype=dt)
1881
+ tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
1882
+ tm.assert_series_equal(ser.mode(), exp)
1883
+
1884
+ ser = Series(data_multi, dtype=dt)
1885
+ exp = Series(exp_multi, dtype=dt)
1886
+ tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
1887
+ tm.assert_series_equal(ser.mode(), exp)
1888
+
1889
+ def test_strobj_mode(self):
1890
+ exp = ["b"]
1891
+ data = ["a"] * 2 + ["b"] * 3
1892
+
1893
+ ser = Series(data, dtype="c")
1894
+ exp = Series(exp, dtype="c")
1895
+ tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
1896
+ tm.assert_series_equal(ser.mode(), exp)
1897
+
1898
+ @pytest.mark.parametrize("dt", [str, object])
1899
+ def test_strobj_multi_char(self, dt):
1900
+ exp = ["bar"]
1901
+ data = ["foo"] * 2 + ["bar"] * 3
1902
+
1903
+ ser = Series(data, dtype=dt)
1904
+ exp = Series(exp, dtype=dt)
1905
+ tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
1906
+ tm.assert_series_equal(ser.mode(), exp)
1907
+
1908
+ def test_datelike_mode(self):
1909
+ exp = Series(["1900-05-03", "2011-01-03", "2013-01-02"], dtype="M8[ns]")
1910
+ ser = Series(["2011-01-03", "2013-01-02", "1900-05-03"], dtype="M8[ns]")
1911
+ tm.assert_extension_array_equal(algos.mode(ser.values), exp._values)
1912
+ tm.assert_series_equal(ser.mode(), exp)
1913
+
1914
+ exp = Series(["2011-01-03", "2013-01-02"], dtype="M8[ns]")
1915
+ ser = Series(
1916
+ ["2011-01-03", "2013-01-02", "1900-05-03", "2011-01-03", "2013-01-02"],
1917
+ dtype="M8[ns]",
1918
+ )
1919
+ tm.assert_extension_array_equal(algos.mode(ser.values), exp._values)
1920
+ tm.assert_series_equal(ser.mode(), exp)
1921
+
1922
+ def test_timedelta_mode(self):
1923
+ exp = Series(["-1 days", "0 days", "1 days"], dtype="timedelta64[ns]")
1924
+ ser = Series(["1 days", "-1 days", "0 days"], dtype="timedelta64[ns]")
1925
+ tm.assert_extension_array_equal(algos.mode(ser.values), exp._values)
1926
+ tm.assert_series_equal(ser.mode(), exp)
1927
+
1928
+ exp = Series(["2 min", "1 day"], dtype="timedelta64[ns]")
1929
+ ser = Series(
1930
+ ["1 day", "1 day", "-1 day", "-1 day 2 min", "2 min", "2 min"],
1931
+ dtype="timedelta64[ns]",
1932
+ )
1933
+ tm.assert_extension_array_equal(algos.mode(ser.values), exp._values)
1934
+ tm.assert_series_equal(ser.mode(), exp)
1935
+
1936
+ def test_mixed_dtype(self):
1937
+ exp = Series(["foo"], dtype=object)
1938
+ ser = Series([1, "foo", "foo"])
1939
+ tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
1940
+ tm.assert_series_equal(ser.mode(), exp)
1941
+
1942
+ def test_uint64_overflow(self):
1943
+ exp = Series([2**63], dtype=np.uint64)
1944
+ ser = Series([1, 2**63, 2**63], dtype=np.uint64)
1945
+ tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
1946
+ tm.assert_series_equal(ser.mode(), exp)
1947
+
1948
+ exp = Series([1, 2**63], dtype=np.uint64)
1949
+ ser = Series([1, 2**63], dtype=np.uint64)
1950
+ tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
1951
+ tm.assert_series_equal(ser.mode(), exp)
1952
+
1953
+ def test_categorical(self):
1954
+ c = Categorical([1, 2])
1955
+ exp = c
1956
+ res = Series(c).mode()._values
1957
+ tm.assert_categorical_equal(res, exp)
1958
+
1959
+ c = Categorical([1, "a", "a"])
1960
+ exp = Categorical(["a"], categories=[1, "a"])
1961
+ res = Series(c).mode()._values
1962
+ tm.assert_categorical_equal(res, exp)
1963
+
1964
+ c = Categorical([1, 1, 2, 3, 3])
1965
+ exp = Categorical([1, 3], categories=[1, 2, 3])
1966
+ res = Series(c).mode()._values
1967
+ tm.assert_categorical_equal(res, exp)
1968
+
1969
+ def test_index(self):
1970
+ idx = Index([1, 2, 3])
1971
+ exp = Series([1, 2, 3], dtype=np.int64)
1972
+ tm.assert_numpy_array_equal(algos.mode(idx), exp.values)
1973
+
1974
+ idx = Index([1, "a", "a"])
1975
+ exp = Series(["a"], dtype=object)
1976
+ tm.assert_numpy_array_equal(algos.mode(idx), exp.values)
1977
+
1978
+ idx = Index([1, 1, 2, 3, 3])
1979
+ exp = Series([1, 3], dtype=np.int64)
1980
+ tm.assert_numpy_array_equal(algos.mode(idx), exp.values)
1981
+
1982
+ idx = Index(
1983
+ ["1 day", "1 day", "-1 day", "-1 day 2 min", "2 min", "2 min"],
1984
+ dtype="timedelta64[ns]",
1985
+ )
1986
+ with pytest.raises(AttributeError, match="TimedeltaIndex"):
1987
+ # algos.mode expects Arraylike, does *not* unwrap TimedeltaIndex
1988
+ algos.mode(idx)
1989
+
1990
+ def test_ser_mode_with_name(self):
1991
+ # GH 46737
1992
+ ser = Series([1, 1, 3], name="foo")
1993
+ result = ser.mode()
1994
+ expected = Series([1], name="foo")
1995
+ tm.assert_series_equal(result, expected)
1996
+
1997
+
1998
+ class TestDiff:
1999
+ @pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"])
2000
+ def test_diff_datetimelike_nat(self, dtype):
2001
+ # NaT - NaT is NaT, not 0
2002
+ arr = np.arange(12).astype(np.int64).view(dtype).reshape(3, 4)
2003
+ arr[:, 2] = arr.dtype.type("NaT", "ns")
2004
+ result = algos.diff(arr, 1, axis=0)
2005
+
2006
+ expected = np.ones(arr.shape, dtype="timedelta64[ns]") * 4
2007
+ expected[:, 2] = np.timedelta64("NaT", "ns")
2008
+ expected[0, :] = np.timedelta64("NaT", "ns")
2009
+
2010
+ tm.assert_numpy_array_equal(result, expected)
2011
+
2012
+ result = algos.diff(arr.T, 1, axis=1)
2013
+ tm.assert_numpy_array_equal(result, expected.T)
2014
+
2015
+ def test_diff_ea_axis(self):
2016
+ dta = date_range("2016-01-01", periods=3, tz="US/Pacific")._data
2017
+
2018
+ msg = "cannot diff DatetimeArray on axis=1"
2019
+ with pytest.raises(ValueError, match=msg):
2020
+ algos.diff(dta, 1, axis=1)
2021
+
2022
+ @pytest.mark.parametrize("dtype", ["int8", "int16"])
2023
+ def test_diff_low_precision_int(self, dtype):
2024
+ arr = np.array([0, 1, 1, 0, 0], dtype=dtype)
2025
+ result = algos.diff(arr, 1)
2026
+ expected = np.array([np.nan, 1, 0, -1, 0], dtype="float32")
2027
+ tm.assert_numpy_array_equal(result, expected)
2028
+
2029
+
2030
+ @pytest.mark.parametrize("op", [np.array, pd.array])
2031
+ def test_union_with_duplicates(op):
2032
+ # GH#36289
2033
+ lvals = op([3, 1, 3, 4])
2034
+ rvals = op([2, 3, 1, 1])
2035
+ expected = op([3, 3, 1, 1, 4, 2])
2036
+ if isinstance(expected, np.ndarray):
2037
+ result = algos.union_with_duplicates(lvals, rvals)
2038
+ tm.assert_numpy_array_equal(result, expected)
2039
+ else:
2040
+ result = algos.union_with_duplicates(lvals, rvals)
2041
+ tm.assert_extension_array_equal(result, expected)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_common.py ADDED
@@ -0,0 +1,267 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import collections
2
+ from functools import partial
3
+ import string
4
+ import subprocess
5
+ import sys
6
+ import textwrap
7
+
8
+ import numpy as np
9
+ import pytest
10
+
11
+ import pandas as pd
12
+ from pandas import Series
13
+ import pandas._testing as tm
14
+ from pandas.core import ops
15
+ import pandas.core.common as com
16
+ from pandas.util.version import Version
17
+
18
+
19
+ def test_get_callable_name():
20
+ getname = com.get_callable_name
21
+
22
+ def fn(x):
23
+ return x
24
+
25
+ lambda_ = lambda x: x
26
+ part1 = partial(fn)
27
+ part2 = partial(part1)
28
+
29
+ class somecall:
30
+ def __call__(self):
31
+ # This shouldn't actually get called below; somecall.__init__
32
+ # should.
33
+ raise NotImplementedError
34
+
35
+ assert getname(fn) == "fn"
36
+ assert getname(lambda_)
37
+ assert getname(part1) == "fn"
38
+ assert getname(part2) == "fn"
39
+ assert getname(somecall()) == "somecall"
40
+ assert getname(1) is None
41
+
42
+
43
+ def test_any_none():
44
+ assert com.any_none(1, 2, 3, None)
45
+ assert not com.any_none(1, 2, 3, 4)
46
+
47
+
48
+ def test_all_not_none():
49
+ assert com.all_not_none(1, 2, 3, 4)
50
+ assert not com.all_not_none(1, 2, 3, None)
51
+ assert not com.all_not_none(None, None, None, None)
52
+
53
+
54
+ def test_random_state():
55
+ # Check with seed
56
+ state = com.random_state(5)
57
+ assert state.uniform() == np.random.RandomState(5).uniform()
58
+
59
+ # Check with random state object
60
+ state2 = np.random.RandomState(10)
61
+ assert com.random_state(state2).uniform() == np.random.RandomState(10).uniform()
62
+
63
+ # check with no arg random state
64
+ assert com.random_state() is np.random
65
+
66
+ # check array-like
67
+ # GH32503
68
+ state_arr_like = np.random.default_rng(None).integers(
69
+ 0, 2**31, size=624, dtype="uint32"
70
+ )
71
+ assert (
72
+ com.random_state(state_arr_like).uniform()
73
+ == np.random.RandomState(state_arr_like).uniform()
74
+ )
75
+
76
+ # Check BitGenerators
77
+ # GH32503
78
+ assert (
79
+ com.random_state(np.random.MT19937(3)).uniform()
80
+ == np.random.RandomState(np.random.MT19937(3)).uniform()
81
+ )
82
+ assert (
83
+ com.random_state(np.random.PCG64(11)).uniform()
84
+ == np.random.RandomState(np.random.PCG64(11)).uniform()
85
+ )
86
+
87
+ # Error for floats or strings
88
+ msg = (
89
+ "random_state must be an integer, array-like, a BitGenerator, Generator, "
90
+ "a numpy RandomState, or None"
91
+ )
92
+ with pytest.raises(ValueError, match=msg):
93
+ com.random_state("test")
94
+
95
+ with pytest.raises(ValueError, match=msg):
96
+ com.random_state(5.5)
97
+
98
+
99
+ @pytest.mark.parametrize(
100
+ "left, right, expected",
101
+ [
102
+ (Series([1], name="x"), Series([2], name="x"), "x"),
103
+ (Series([1], name="x"), Series([2], name="y"), None),
104
+ (Series([1]), Series([2], name="x"), None),
105
+ (Series([1], name="x"), Series([2]), None),
106
+ (Series([1], name="x"), [2], "x"),
107
+ ([1], Series([2], name="y"), "y"),
108
+ # matching NAs
109
+ (Series([1], name=np.nan), pd.Index([], name=np.nan), np.nan),
110
+ (Series([1], name=np.nan), pd.Index([], name=pd.NaT), None),
111
+ (Series([1], name=pd.NA), pd.Index([], name=pd.NA), pd.NA),
112
+ # tuple name GH#39757
113
+ (
114
+ Series([1], name=np.int64(1)),
115
+ pd.Index([], name=(np.int64(1), np.int64(2))),
116
+ None,
117
+ ),
118
+ (
119
+ Series([1], name=(np.int64(1), np.int64(2))),
120
+ pd.Index([], name=(np.int64(1), np.int64(2))),
121
+ (np.int64(1), np.int64(2)),
122
+ ),
123
+ pytest.param(
124
+ Series([1], name=(np.float64("nan"), np.int64(2))),
125
+ pd.Index([], name=(np.float64("nan"), np.int64(2))),
126
+ (np.float64("nan"), np.int64(2)),
127
+ marks=pytest.mark.xfail(
128
+ reason="Not checking for matching NAs inside tuples."
129
+ ),
130
+ ),
131
+ ],
132
+ )
133
+ def test_maybe_match_name(left, right, expected):
134
+ res = ops.common._maybe_match_name(left, right)
135
+ assert res is expected or res == expected
136
+
137
+
138
+ def test_standardize_mapping():
139
+ # No uninitialized defaultdicts
140
+ msg = r"to_dict\(\) only accepts initialized defaultdicts"
141
+ with pytest.raises(TypeError, match=msg):
142
+ com.standardize_mapping(collections.defaultdict)
143
+
144
+ # No non-mapping subtypes, instance
145
+ msg = "unsupported type: <class 'list'>"
146
+ with pytest.raises(TypeError, match=msg):
147
+ com.standardize_mapping([])
148
+
149
+ # No non-mapping subtypes, class
150
+ with pytest.raises(TypeError, match=msg):
151
+ com.standardize_mapping(list)
152
+
153
+ fill = {"bad": "data"}
154
+ assert com.standardize_mapping(fill) == dict
155
+
156
+ # Convert instance to type
157
+ assert com.standardize_mapping({}) == dict
158
+
159
+ dd = collections.defaultdict(list)
160
+ assert isinstance(com.standardize_mapping(dd), partial)
161
+
162
+
163
+ def test_git_version():
164
+ # GH 21295
165
+ git_version = pd.__git_version__
166
+ assert len(git_version) == 40
167
+ assert all(c in string.hexdigits for c in git_version)
168
+
169
+
170
+ def test_version_tag():
171
+ version = Version(pd.__version__)
172
+ try:
173
+ version > Version("0.0.1")
174
+ except TypeError:
175
+ raise ValueError(
176
+ "No git tags exist, please sync tags between upstream and your repo"
177
+ )
178
+
179
+
180
+ @pytest.mark.parametrize(
181
+ "obj", [(obj,) for obj in pd.__dict__.values() if callable(obj)]
182
+ )
183
+ def test_serializable(obj):
184
+ # GH 35611
185
+ unpickled = tm.round_trip_pickle(obj)
186
+ assert type(obj) == type(unpickled)
187
+
188
+
189
+ class TestIsBoolIndexer:
190
+ def test_non_bool_array_with_na(self):
191
+ # in particular, this should not raise
192
+ arr = np.array(["A", "B", np.nan], dtype=object)
193
+ assert not com.is_bool_indexer(arr)
194
+
195
+ def test_list_subclass(self):
196
+ # GH#42433
197
+
198
+ class MyList(list):
199
+ pass
200
+
201
+ val = MyList(["a"])
202
+
203
+ assert not com.is_bool_indexer(val)
204
+
205
+ val = MyList([True])
206
+ assert com.is_bool_indexer(val)
207
+
208
+ def test_frozenlist(self):
209
+ # GH#42461
210
+ data = {"col1": [1, 2], "col2": [3, 4]}
211
+ df = pd.DataFrame(data=data)
212
+
213
+ frozen = df.index.names[1:]
214
+ assert not com.is_bool_indexer(frozen)
215
+
216
+ result = df[frozen]
217
+ expected = df[[]]
218
+ tm.assert_frame_equal(result, expected)
219
+
220
+
221
+ @pytest.mark.parametrize("with_exception", [True, False])
222
+ def test_temp_setattr(with_exception):
223
+ # GH#45954
224
+ ser = Series(dtype=object)
225
+ ser.name = "first"
226
+ # Raise a ValueError in either case to satisfy pytest.raises
227
+ match = "Inside exception raised" if with_exception else "Outside exception raised"
228
+ with pytest.raises(ValueError, match=match):
229
+ with com.temp_setattr(ser, "name", "second"):
230
+ assert ser.name == "second"
231
+ if with_exception:
232
+ raise ValueError("Inside exception raised")
233
+ raise ValueError("Outside exception raised")
234
+ assert ser.name == "first"
235
+
236
+
237
+ @pytest.mark.single_cpu
238
+ def test_str_size():
239
+ # GH#21758
240
+ a = "a"
241
+ expected = sys.getsizeof(a)
242
+ pyexe = sys.executable.replace("\\", "/")
243
+ call = [
244
+ pyexe,
245
+ "-c",
246
+ "a='a';import sys;sys.getsizeof(a);import pandas;print(sys.getsizeof(a));",
247
+ ]
248
+ result = subprocess.check_output(call).decode()[-4:-1].strip("\n")
249
+ assert int(result) == int(expected)
250
+
251
+
252
+ @pytest.mark.single_cpu
253
+ def test_bz2_missing_import():
254
+ # Check whether bz2 missing import is handled correctly (issue #53857)
255
+ code = """
256
+ import sys
257
+ sys.modules['bz2'] = None
258
+ import pytest
259
+ import pandas as pd
260
+ from pandas.compat import get_bz2_file
261
+ msg = 'bz2 module not available.'
262
+ with pytest.raises(RuntimeError, match=msg):
263
+ get_bz2_file()
264
+ """
265
+ code = textwrap.dedent(code)
266
+ call = [sys.executable, "-c", code]
267
+ subprocess.check_output(call)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_downstream.py ADDED
@@ -0,0 +1,362 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Testing that we work in the downstream packages
3
+ """
4
+ import array
5
+ import subprocess
6
+ import sys
7
+
8
+ import numpy as np
9
+ import pytest
10
+
11
+ from pandas.errors import IntCastingNaNError
12
+ import pandas.util._test_decorators as td
13
+
14
+ import pandas as pd
15
+ from pandas import (
16
+ DataFrame,
17
+ DatetimeIndex,
18
+ Series,
19
+ TimedeltaIndex,
20
+ )
21
+ import pandas._testing as tm
22
+ from pandas.core.arrays import (
23
+ DatetimeArray,
24
+ TimedeltaArray,
25
+ )
26
+
27
+
28
+ @pytest.fixture
29
+ def df():
30
+ return DataFrame({"A": [1, 2, 3]})
31
+
32
+
33
+ def test_dask(df):
34
+ # dask sets "compute.use_numexpr" to False, so catch the current value
35
+ # and ensure to reset it afterwards to avoid impacting other tests
36
+ olduse = pd.get_option("compute.use_numexpr")
37
+
38
+ try:
39
+ pytest.importorskip("toolz")
40
+ dd = pytest.importorskip("dask.dataframe")
41
+
42
+ ddf = dd.from_pandas(df, npartitions=3)
43
+ assert ddf.A is not None
44
+ assert ddf.compute() is not None
45
+ finally:
46
+ pd.set_option("compute.use_numexpr", olduse)
47
+
48
+
49
+ def test_dask_ufunc():
50
+ # dask sets "compute.use_numexpr" to False, so catch the current value
51
+ # and ensure to reset it afterwards to avoid impacting other tests
52
+ olduse = pd.get_option("compute.use_numexpr")
53
+
54
+ try:
55
+ da = pytest.importorskip("dask.array")
56
+ dd = pytest.importorskip("dask.dataframe")
57
+
58
+ s = Series([1.5, 2.3, 3.7, 4.0])
59
+ ds = dd.from_pandas(s, npartitions=2)
60
+
61
+ result = da.fix(ds).compute()
62
+ expected = np.fix(s)
63
+ tm.assert_series_equal(result, expected)
64
+ finally:
65
+ pd.set_option("compute.use_numexpr", olduse)
66
+
67
+
68
+ def test_construct_dask_float_array_int_dtype_match_ndarray():
69
+ # GH#40110 make sure we treat a float-dtype dask array with the same
70
+ # rules we would for an ndarray
71
+ dd = pytest.importorskip("dask.dataframe")
72
+
73
+ arr = np.array([1, 2.5, 3])
74
+ darr = dd.from_array(arr)
75
+
76
+ res = Series(darr)
77
+ expected = Series(arr)
78
+ tm.assert_series_equal(res, expected)
79
+
80
+ # GH#49599 in 2.0 we raise instead of silently ignoring the dtype
81
+ msg = "Trying to coerce float values to integers"
82
+ with pytest.raises(ValueError, match=msg):
83
+ Series(darr, dtype="i8")
84
+
85
+ msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
86
+ arr[2] = np.nan
87
+ with pytest.raises(IntCastingNaNError, match=msg):
88
+ Series(darr, dtype="i8")
89
+ # which is the same as we get with a numpy input
90
+ with pytest.raises(IntCastingNaNError, match=msg):
91
+ Series(arr, dtype="i8")
92
+
93
+
94
+ def test_xarray(df):
95
+ pytest.importorskip("xarray")
96
+
97
+ assert df.to_xarray() is not None
98
+
99
+
100
+ def test_xarray_cftimeindex_nearest():
101
+ # https://github.com/pydata/xarray/issues/3751
102
+ cftime = pytest.importorskip("cftime")
103
+ xarray = pytest.importorskip("xarray")
104
+
105
+ times = xarray.cftime_range("0001", periods=2)
106
+ key = cftime.DatetimeGregorian(2000, 1, 1)
107
+ result = times.get_indexer([key], method="nearest")
108
+ expected = 1
109
+ assert result == expected
110
+
111
+
112
+ @pytest.mark.single_cpu
113
+ def test_oo_optimizable():
114
+ # GH 21071
115
+ subprocess.check_call([sys.executable, "-OO", "-c", "import pandas"])
116
+
117
+
118
+ @pytest.mark.single_cpu
119
+ def test_oo_optimized_datetime_index_unpickle():
120
+ # GH 42866
121
+ subprocess.check_call(
122
+ [
123
+ sys.executable,
124
+ "-OO",
125
+ "-c",
126
+ (
127
+ "import pandas as pd, pickle; "
128
+ "pickle.loads(pickle.dumps(pd.date_range('2021-01-01', periods=1)))"
129
+ ),
130
+ ]
131
+ )
132
+
133
+
134
+ def test_statsmodels():
135
+ smf = pytest.importorskip("statsmodels.formula.api")
136
+
137
+ df = DataFrame(
138
+ {"Lottery": range(5), "Literacy": range(5), "Pop1831": range(100, 105)}
139
+ )
140
+ smf.ols("Lottery ~ Literacy + np.log(Pop1831)", data=df).fit()
141
+
142
+
143
+ def test_scikit_learn():
144
+ pytest.importorskip("sklearn")
145
+ from sklearn import (
146
+ datasets,
147
+ svm,
148
+ )
149
+
150
+ digits = datasets.load_digits()
151
+ clf = svm.SVC(gamma=0.001, C=100.0)
152
+ clf.fit(digits.data[:-1], digits.target[:-1])
153
+ clf.predict(digits.data[-1:])
154
+
155
+
156
+ def test_seaborn():
157
+ seaborn = pytest.importorskip("seaborn")
158
+ tips = DataFrame(
159
+ {"day": pd.date_range("2023", freq="D", periods=5), "total_bill": range(5)}
160
+ )
161
+ seaborn.stripplot(x="day", y="total_bill", data=tips)
162
+
163
+
164
+ def test_pandas_datareader():
165
+ pytest.importorskip("pandas_datareader")
166
+
167
+
168
+ @pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
169
+ def test_pyarrow(df):
170
+ pyarrow = pytest.importorskip("pyarrow")
171
+ table = pyarrow.Table.from_pandas(df)
172
+ result = table.to_pandas()
173
+ tm.assert_frame_equal(result, df)
174
+
175
+
176
+ def test_yaml_dump(df):
177
+ # GH#42748
178
+ yaml = pytest.importorskip("yaml")
179
+
180
+ dumped = yaml.dump(df)
181
+
182
+ loaded = yaml.load(dumped, Loader=yaml.Loader)
183
+ tm.assert_frame_equal(df, loaded)
184
+
185
+ loaded2 = yaml.load(dumped, Loader=yaml.UnsafeLoader)
186
+ tm.assert_frame_equal(df, loaded2)
187
+
188
+
189
+ @pytest.mark.single_cpu
190
+ def test_missing_required_dependency():
191
+ # GH 23868
192
+ # To ensure proper isolation, we pass these flags
193
+ # -S : disable site-packages
194
+ # -s : disable user site-packages
195
+ # -E : disable PYTHON* env vars, especially PYTHONPATH
196
+ # https://github.com/MacPython/pandas-wheels/pull/50
197
+
198
+ pyexe = sys.executable.replace("\\", "/")
199
+
200
+ # We skip this test if pandas is installed as a site package. We first
201
+ # import the package normally and check the path to the module before
202
+ # executing the test which imports pandas with site packages disabled.
203
+ call = [pyexe, "-c", "import pandas;print(pandas.__file__)"]
204
+ output = subprocess.check_output(call).decode()
205
+ if "site-packages" in output:
206
+ pytest.skip("pandas installed as site package")
207
+
208
+ # This test will fail if pandas is installed as a site package. The flags
209
+ # prevent pandas being imported and the test will report Failed: DID NOT
210
+ # RAISE <class 'subprocess.CalledProcessError'>
211
+ call = [pyexe, "-sSE", "-c", "import pandas"]
212
+
213
+ msg = (
214
+ rf"Command '\['{pyexe}', '-sSE', '-c', 'import pandas'\]' "
215
+ "returned non-zero exit status 1."
216
+ )
217
+
218
+ with pytest.raises(subprocess.CalledProcessError, match=msg) as exc:
219
+ subprocess.check_output(call, stderr=subprocess.STDOUT)
220
+
221
+ output = exc.value.stdout.decode()
222
+ for name in ["numpy", "pytz", "dateutil"]:
223
+ assert name in output
224
+
225
+
226
+ def test_frame_setitem_dask_array_into_new_col():
227
+ # GH#47128
228
+
229
+ # dask sets "compute.use_numexpr" to False, so catch the current value
230
+ # and ensure to reset it afterwards to avoid impacting other tests
231
+ olduse = pd.get_option("compute.use_numexpr")
232
+
233
+ try:
234
+ da = pytest.importorskip("dask.array")
235
+
236
+ dda = da.array([1, 2])
237
+ df = DataFrame({"a": ["a", "b"]})
238
+ df["b"] = dda
239
+ df["c"] = dda
240
+ df.loc[[False, True], "b"] = 100
241
+ result = df.loc[[1], :]
242
+ expected = DataFrame({"a": ["b"], "b": [100], "c": [2]}, index=[1])
243
+ tm.assert_frame_equal(result, expected)
244
+ finally:
245
+ pd.set_option("compute.use_numexpr", olduse)
246
+
247
+
248
+ def test_pandas_priority():
249
+ # GH#48347
250
+
251
+ class MyClass:
252
+ __pandas_priority__ = 5000
253
+
254
+ def __radd__(self, other):
255
+ return self
256
+
257
+ left = MyClass()
258
+ right = Series(range(3))
259
+
260
+ assert right.__add__(left) is NotImplemented
261
+ assert right + left is left
262
+
263
+
264
+ @pytest.fixture(
265
+ params=[
266
+ "memoryview",
267
+ "array",
268
+ pytest.param("dask", marks=td.skip_if_no("dask.array")),
269
+ pytest.param("xarray", marks=td.skip_if_no("xarray")),
270
+ ]
271
+ )
272
+ def array_likes(request):
273
+ """
274
+ Fixture giving a numpy array and a parametrized 'data' object, which can
275
+ be a memoryview, array, dask or xarray object created from the numpy array.
276
+ """
277
+ # GH#24539 recognize e.g xarray, dask, ...
278
+ arr = np.array([1, 2, 3], dtype=np.int64)
279
+
280
+ name = request.param
281
+ if name == "memoryview":
282
+ data = memoryview(arr)
283
+ elif name == "array":
284
+ data = array.array("i", arr)
285
+ elif name == "dask":
286
+ import dask.array
287
+
288
+ data = dask.array.array(arr)
289
+ elif name == "xarray":
290
+ import xarray as xr
291
+
292
+ data = xr.DataArray(arr)
293
+
294
+ return arr, data
295
+
296
+
297
+ @pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"])
298
+ def test_from_obscure_array(dtype, array_likes):
299
+ # GH#24539 recognize e.g xarray, dask, ...
300
+ # Note: we dont do this for PeriodArray bc _from_sequence won't accept
301
+ # an array of integers
302
+ # TODO: could check with arraylike of Period objects
303
+ arr, data = array_likes
304
+
305
+ cls = {"M8[ns]": DatetimeArray, "m8[ns]": TimedeltaArray}[dtype]
306
+
307
+ depr_msg = f"{cls.__name__}.__init__ is deprecated"
308
+ with tm.assert_produces_warning(FutureWarning, match=depr_msg):
309
+ expected = cls(arr)
310
+ result = cls._from_sequence(data, dtype=dtype)
311
+ tm.assert_extension_array_equal(result, expected)
312
+
313
+ if not isinstance(data, memoryview):
314
+ # FIXME(GH#44431) these raise on memoryview and attempted fix
315
+ # fails on py3.10
316
+ func = {"M8[ns]": pd.to_datetime, "m8[ns]": pd.to_timedelta}[dtype]
317
+ result = func(arr).array
318
+ expected = func(data).array
319
+ tm.assert_equal(result, expected)
320
+
321
+ # Let's check the Indexes while we're here
322
+ idx_cls = {"M8[ns]": DatetimeIndex, "m8[ns]": TimedeltaIndex}[dtype]
323
+ result = idx_cls(arr)
324
+ expected = idx_cls(data)
325
+ tm.assert_index_equal(result, expected)
326
+
327
+
328
+ def test_dataframe_consortium() -> None:
329
+ """
330
+ Test some basic methods of the dataframe consortium standard.
331
+
332
+ Full testing is done at https://github.com/data-apis/dataframe-api-compat,
333
+ this is just to check that the entry point works as expected.
334
+ """
335
+ pytest.importorskip("dataframe_api_compat")
336
+ df_pd = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
337
+ df = df_pd.__dataframe_consortium_standard__()
338
+ result_1 = df.get_column_names()
339
+ expected_1 = ["a", "b"]
340
+ assert result_1 == expected_1
341
+
342
+ ser = Series([1, 2, 3], name="a")
343
+ col = ser.__column_consortium_standard__()
344
+ assert col.name == "a"
345
+
346
+
347
+ def test_xarray_coerce_unit():
348
+ # GH44053
349
+ xr = pytest.importorskip("xarray")
350
+
351
+ arr = xr.DataArray([1, 2, 3])
352
+ result = pd.to_datetime(arr, unit="ns")
353
+ expected = DatetimeIndex(
354
+ [
355
+ "1970-01-01 00:00:00.000000001",
356
+ "1970-01-01 00:00:00.000000002",
357
+ "1970-01-01 00:00:00.000000003",
358
+ ],
359
+ dtype="datetime64[ns]",
360
+ freq=None,
361
+ )
362
+ tm.assert_index_equal(result, expected)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_errors.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+
3
+ from pandas.errors import (
4
+ AbstractMethodError,
5
+ UndefinedVariableError,
6
+ )
7
+
8
+ import pandas as pd
9
+
10
+
11
+ @pytest.mark.parametrize(
12
+ "exc",
13
+ [
14
+ "AttributeConflictWarning",
15
+ "CSSWarning",
16
+ "CategoricalConversionWarning",
17
+ "ClosedFileError",
18
+ "DataError",
19
+ "DatabaseError",
20
+ "DtypeWarning",
21
+ "EmptyDataError",
22
+ "IncompatibilityWarning",
23
+ "IndexingError",
24
+ "InvalidColumnName",
25
+ "InvalidComparison",
26
+ "InvalidVersion",
27
+ "LossySetitemError",
28
+ "MergeError",
29
+ "NoBufferPresent",
30
+ "NumExprClobberingError",
31
+ "NumbaUtilError",
32
+ "OptionError",
33
+ "OutOfBoundsDatetime",
34
+ "ParserError",
35
+ "ParserWarning",
36
+ "PerformanceWarning",
37
+ "PossibleDataLossError",
38
+ "PossiblePrecisionLoss",
39
+ "PyperclipException",
40
+ "SettingWithCopyError",
41
+ "SettingWithCopyWarning",
42
+ "SpecificationError",
43
+ "UnsortedIndexError",
44
+ "UnsupportedFunctionCall",
45
+ "ValueLabelTypeMismatch",
46
+ ],
47
+ )
48
+ def test_exception_importable(exc):
49
+ from pandas import errors
50
+
51
+ err = getattr(errors, exc)
52
+ assert err is not None
53
+
54
+ # check that we can raise on them
55
+
56
+ msg = "^$"
57
+
58
+ with pytest.raises(err, match=msg):
59
+ raise err()
60
+
61
+
62
+ def test_catch_oob():
63
+ from pandas import errors
64
+
65
+ msg = "Cannot cast 1500-01-01 00:00:00 to unit='ns' without overflow"
66
+ with pytest.raises(errors.OutOfBoundsDatetime, match=msg):
67
+ pd.Timestamp("15000101").as_unit("ns")
68
+
69
+
70
+ @pytest.mark.parametrize(
71
+ "is_local",
72
+ [
73
+ True,
74
+ False,
75
+ ],
76
+ )
77
+ def test_catch_undefined_variable_error(is_local):
78
+ variable_name = "x"
79
+ if is_local:
80
+ msg = f"local variable '{variable_name}' is not defined"
81
+ else:
82
+ msg = f"name '{variable_name}' is not defined"
83
+
84
+ with pytest.raises(UndefinedVariableError, match=msg):
85
+ raise UndefinedVariableError(variable_name, is_local)
86
+
87
+
88
+ class Foo:
89
+ @classmethod
90
+ def classmethod(cls):
91
+ raise AbstractMethodError(cls, methodtype="classmethod")
92
+
93
+ @property
94
+ def property(self):
95
+ raise AbstractMethodError(self, methodtype="property")
96
+
97
+ def method(self):
98
+ raise AbstractMethodError(self)
99
+
100
+
101
+ def test_AbstractMethodError_classmethod():
102
+ xpr = "This classmethod must be defined in the concrete class Foo"
103
+ with pytest.raises(AbstractMethodError, match=xpr):
104
+ Foo.classmethod()
105
+
106
+ xpr = "This property must be defined in the concrete class Foo"
107
+ with pytest.raises(AbstractMethodError, match=xpr):
108
+ Foo().property
109
+
110
+ xpr = "This method must be defined in the concrete class Foo"
111
+ with pytest.raises(AbstractMethodError, match=xpr):
112
+ Foo().method()
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_expressions.py ADDED
@@ -0,0 +1,466 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import operator
2
+ import re
3
+
4
+ import numpy as np
5
+ import pytest
6
+
7
+ from pandas import option_context
8
+ import pandas._testing as tm
9
+ from pandas.core.api import (
10
+ DataFrame,
11
+ Index,
12
+ Series,
13
+ )
14
+ from pandas.core.computation import expressions as expr
15
+
16
+
17
+ @pytest.fixture
18
+ def _frame():
19
+ return DataFrame(
20
+ np.random.default_rng(2).standard_normal((10001, 4)),
21
+ columns=list("ABCD"),
22
+ dtype="float64",
23
+ )
24
+
25
+
26
+ @pytest.fixture
27
+ def _frame2():
28
+ return DataFrame(
29
+ np.random.default_rng(2).standard_normal((100, 4)),
30
+ columns=list("ABCD"),
31
+ dtype="float64",
32
+ )
33
+
34
+
35
+ @pytest.fixture
36
+ def _mixed(_frame):
37
+ return DataFrame(
38
+ {
39
+ "A": _frame["A"].copy(),
40
+ "B": _frame["B"].astype("float32"),
41
+ "C": _frame["C"].astype("int64"),
42
+ "D": _frame["D"].astype("int32"),
43
+ }
44
+ )
45
+
46
+
47
+ @pytest.fixture
48
+ def _mixed2(_frame2):
49
+ return DataFrame(
50
+ {
51
+ "A": _frame2["A"].copy(),
52
+ "B": _frame2["B"].astype("float32"),
53
+ "C": _frame2["C"].astype("int64"),
54
+ "D": _frame2["D"].astype("int32"),
55
+ }
56
+ )
57
+
58
+
59
+ @pytest.fixture
60
+ def _integer():
61
+ return DataFrame(
62
+ np.random.default_rng(2).integers(1, 100, size=(10001, 4)),
63
+ columns=list("ABCD"),
64
+ dtype="int64",
65
+ )
66
+
67
+
68
+ @pytest.fixture
69
+ def _integer_integers(_integer):
70
+ # integers to get a case with zeros
71
+ return _integer * np.random.default_rng(2).integers(0, 2, size=np.shape(_integer))
72
+
73
+
74
+ @pytest.fixture
75
+ def _integer2():
76
+ return DataFrame(
77
+ np.random.default_rng(2).integers(1, 100, size=(101, 4)),
78
+ columns=list("ABCD"),
79
+ dtype="int64",
80
+ )
81
+
82
+
83
+ @pytest.fixture
84
+ def _array(_frame):
85
+ return _frame["A"].values.copy()
86
+
87
+
88
+ @pytest.fixture
89
+ def _array2(_frame2):
90
+ return _frame2["A"].values.copy()
91
+
92
+
93
+ @pytest.fixture
94
+ def _array_mixed(_mixed):
95
+ return _mixed["D"].values.copy()
96
+
97
+
98
+ @pytest.fixture
99
+ def _array_mixed2(_mixed2):
100
+ return _mixed2["D"].values.copy()
101
+
102
+
103
+ @pytest.mark.skipif(not expr.USE_NUMEXPR, reason="not using numexpr")
104
+ class TestExpressions:
105
+ @staticmethod
106
+ def call_op(df, other, flex: bool, opname: str):
107
+ if flex:
108
+ op = lambda x, y: getattr(x, opname)(y)
109
+ op.__name__ = opname
110
+ else:
111
+ op = getattr(operator, opname)
112
+
113
+ with option_context("compute.use_numexpr", False):
114
+ expected = op(df, other)
115
+
116
+ expr.get_test_result()
117
+
118
+ result = op(df, other)
119
+ return result, expected
120
+
121
+ @pytest.mark.parametrize(
122
+ "fixture",
123
+ [
124
+ "_integer",
125
+ "_integer2",
126
+ "_integer_integers",
127
+ "_frame",
128
+ "_frame2",
129
+ "_mixed",
130
+ "_mixed2",
131
+ ],
132
+ )
133
+ @pytest.mark.parametrize("flex", [True, False])
134
+ @pytest.mark.parametrize(
135
+ "arith", ["add", "sub", "mul", "mod", "truediv", "floordiv"]
136
+ )
137
+ def test_run_arithmetic(self, request, fixture, flex, arith, monkeypatch):
138
+ df = request.getfixturevalue(fixture)
139
+ with monkeypatch.context() as m:
140
+ m.setattr(expr, "_MIN_ELEMENTS", 0)
141
+ result, expected = self.call_op(df, df, flex, arith)
142
+
143
+ if arith == "truediv":
144
+ assert all(x.kind == "f" for x in expected.dtypes.values)
145
+ tm.assert_equal(expected, result)
146
+
147
+ for i in range(len(df.columns)):
148
+ result, expected = self.call_op(
149
+ df.iloc[:, i], df.iloc[:, i], flex, arith
150
+ )
151
+ if arith == "truediv":
152
+ assert expected.dtype.kind == "f"
153
+ tm.assert_equal(expected, result)
154
+
155
+ @pytest.mark.parametrize(
156
+ "fixture",
157
+ [
158
+ "_integer",
159
+ "_integer2",
160
+ "_integer_integers",
161
+ "_frame",
162
+ "_frame2",
163
+ "_mixed",
164
+ "_mixed2",
165
+ ],
166
+ )
167
+ @pytest.mark.parametrize("flex", [True, False])
168
+ def test_run_binary(self, request, fixture, flex, comparison_op, monkeypatch):
169
+ """
170
+ tests solely that the result is the same whether or not numexpr is
171
+ enabled. Need to test whether the function does the correct thing
172
+ elsewhere.
173
+ """
174
+ df = request.getfixturevalue(fixture)
175
+ arith = comparison_op.__name__
176
+ with option_context("compute.use_numexpr", False):
177
+ other = df.copy() + 1
178
+
179
+ with monkeypatch.context() as m:
180
+ m.setattr(expr, "_MIN_ELEMENTS", 0)
181
+ expr.set_test_mode(True)
182
+
183
+ result, expected = self.call_op(df, other, flex, arith)
184
+
185
+ used_numexpr = expr.get_test_result()
186
+ assert used_numexpr, "Did not use numexpr as expected."
187
+ tm.assert_equal(expected, result)
188
+
189
+ for i in range(len(df.columns)):
190
+ binary_comp = other.iloc[:, i] + 1
191
+ self.call_op(df.iloc[:, i], binary_comp, flex, "add")
192
+
193
+ def test_invalid(self):
194
+ array = np.random.default_rng(2).standard_normal(1_000_001)
195
+ array2 = np.random.default_rng(2).standard_normal(100)
196
+
197
+ # no op
198
+ result = expr._can_use_numexpr(operator.add, None, array, array, "evaluate")
199
+ assert not result
200
+
201
+ # min elements
202
+ result = expr._can_use_numexpr(operator.add, "+", array2, array2, "evaluate")
203
+ assert not result
204
+
205
+ # ok, we only check on first part of expression
206
+ result = expr._can_use_numexpr(operator.add, "+", array, array2, "evaluate")
207
+ assert result
208
+
209
+ @pytest.mark.filterwarnings("ignore:invalid value encountered in:RuntimeWarning")
210
+ @pytest.mark.parametrize(
211
+ "opname,op_str",
212
+ [("add", "+"), ("sub", "-"), ("mul", "*"), ("truediv", "/"), ("pow", "**")],
213
+ )
214
+ @pytest.mark.parametrize(
215
+ "left_fix,right_fix", [("_array", "_array2"), ("_array_mixed", "_array_mixed2")]
216
+ )
217
+ def test_binary_ops(self, request, opname, op_str, left_fix, right_fix):
218
+ left = request.getfixturevalue(left_fix)
219
+ right = request.getfixturevalue(right_fix)
220
+
221
+ def testit(left, right, opname, op_str):
222
+ if opname == "pow":
223
+ left = np.abs(left)
224
+
225
+ op = getattr(operator, opname)
226
+
227
+ # array has 0s
228
+ result = expr.evaluate(op, left, left, use_numexpr=True)
229
+ expected = expr.evaluate(op, left, left, use_numexpr=False)
230
+ tm.assert_numpy_array_equal(result, expected)
231
+
232
+ result = expr._can_use_numexpr(op, op_str, right, right, "evaluate")
233
+ assert not result
234
+
235
+ with option_context("compute.use_numexpr", False):
236
+ testit(left, right, opname, op_str)
237
+
238
+ expr.set_numexpr_threads(1)
239
+ testit(left, right, opname, op_str)
240
+ expr.set_numexpr_threads()
241
+ testit(left, right, opname, op_str)
242
+
243
+ @pytest.mark.parametrize(
244
+ "left_fix,right_fix", [("_array", "_array2"), ("_array_mixed", "_array_mixed2")]
245
+ )
246
+ def test_comparison_ops(self, request, comparison_op, left_fix, right_fix):
247
+ left = request.getfixturevalue(left_fix)
248
+ right = request.getfixturevalue(right_fix)
249
+
250
+ def testit():
251
+ f12 = left + 1
252
+ f22 = right + 1
253
+
254
+ op = comparison_op
255
+
256
+ result = expr.evaluate(op, left, f12, use_numexpr=True)
257
+ expected = expr.evaluate(op, left, f12, use_numexpr=False)
258
+ tm.assert_numpy_array_equal(result, expected)
259
+
260
+ result = expr._can_use_numexpr(op, op, right, f22, "evaluate")
261
+ assert not result
262
+
263
+ with option_context("compute.use_numexpr", False):
264
+ testit()
265
+
266
+ expr.set_numexpr_threads(1)
267
+ testit()
268
+ expr.set_numexpr_threads()
269
+ testit()
270
+
271
+ @pytest.mark.parametrize("cond", [True, False])
272
+ @pytest.mark.parametrize("fixture", ["_frame", "_frame2", "_mixed", "_mixed2"])
273
+ def test_where(self, request, cond, fixture):
274
+ df = request.getfixturevalue(fixture)
275
+
276
+ def testit():
277
+ c = np.empty(df.shape, dtype=np.bool_)
278
+ c.fill(cond)
279
+ result = expr.where(c, df.values, df.values + 1)
280
+ expected = np.where(c, df.values, df.values + 1)
281
+ tm.assert_numpy_array_equal(result, expected)
282
+
283
+ with option_context("compute.use_numexpr", False):
284
+ testit()
285
+
286
+ expr.set_numexpr_threads(1)
287
+ testit()
288
+ expr.set_numexpr_threads()
289
+ testit()
290
+
291
+ @pytest.mark.parametrize(
292
+ "op_str,opname", [("/", "truediv"), ("//", "floordiv"), ("**", "pow")]
293
+ )
294
+ def test_bool_ops_raise_on_arithmetic(self, op_str, opname):
295
+ df = DataFrame(
296
+ {
297
+ "a": np.random.default_rng(2).random(10) > 0.5,
298
+ "b": np.random.default_rng(2).random(10) > 0.5,
299
+ }
300
+ )
301
+
302
+ msg = f"operator '{opname}' not implemented for bool dtypes"
303
+ f = getattr(operator, opname)
304
+ err_msg = re.escape(msg)
305
+
306
+ with pytest.raises(NotImplementedError, match=err_msg):
307
+ f(df, df)
308
+
309
+ with pytest.raises(NotImplementedError, match=err_msg):
310
+ f(df.a, df.b)
311
+
312
+ with pytest.raises(NotImplementedError, match=err_msg):
313
+ f(df.a, True)
314
+
315
+ with pytest.raises(NotImplementedError, match=err_msg):
316
+ f(False, df.a)
317
+
318
+ with pytest.raises(NotImplementedError, match=err_msg):
319
+ f(False, df)
320
+
321
+ with pytest.raises(NotImplementedError, match=err_msg):
322
+ f(df, True)
323
+
324
+ @pytest.mark.parametrize(
325
+ "op_str,opname", [("+", "add"), ("*", "mul"), ("-", "sub")]
326
+ )
327
+ def test_bool_ops_warn_on_arithmetic(self, op_str, opname):
328
+ n = 10
329
+ df = DataFrame(
330
+ {
331
+ "a": np.random.default_rng(2).random(n) > 0.5,
332
+ "b": np.random.default_rng(2).random(n) > 0.5,
333
+ }
334
+ )
335
+
336
+ subs = {"+": "|", "*": "&", "-": "^"}
337
+ sub_funcs = {"|": "or_", "&": "and_", "^": "xor"}
338
+
339
+ f = getattr(operator, opname)
340
+ fe = getattr(operator, sub_funcs[subs[op_str]])
341
+
342
+ if op_str == "-":
343
+ # raises TypeError
344
+ return
345
+
346
+ with tm.use_numexpr(True, min_elements=5):
347
+ with tm.assert_produces_warning():
348
+ r = f(df, df)
349
+ e = fe(df, df)
350
+ tm.assert_frame_equal(r, e)
351
+
352
+ with tm.assert_produces_warning():
353
+ r = f(df.a, df.b)
354
+ e = fe(df.a, df.b)
355
+ tm.assert_series_equal(r, e)
356
+
357
+ with tm.assert_produces_warning():
358
+ r = f(df.a, True)
359
+ e = fe(df.a, True)
360
+ tm.assert_series_equal(r, e)
361
+
362
+ with tm.assert_produces_warning():
363
+ r = f(False, df.a)
364
+ e = fe(False, df.a)
365
+ tm.assert_series_equal(r, e)
366
+
367
+ with tm.assert_produces_warning():
368
+ r = f(False, df)
369
+ e = fe(False, df)
370
+ tm.assert_frame_equal(r, e)
371
+
372
+ with tm.assert_produces_warning():
373
+ r = f(df, True)
374
+ e = fe(df, True)
375
+ tm.assert_frame_equal(r, e)
376
+
377
+ @pytest.mark.parametrize(
378
+ "test_input,expected",
379
+ [
380
+ (
381
+ DataFrame(
382
+ [[0, 1, 2, "aa"], [0, 1, 2, "aa"]], columns=["a", "b", "c", "dtype"]
383
+ ),
384
+ DataFrame([[False, False], [False, False]], columns=["a", "dtype"]),
385
+ ),
386
+ (
387
+ DataFrame(
388
+ [[0, 3, 2, "aa"], [0, 4, 2, "aa"], [0, 1, 1, "bb"]],
389
+ columns=["a", "b", "c", "dtype"],
390
+ ),
391
+ DataFrame(
392
+ [[False, False], [False, False], [False, False]],
393
+ columns=["a", "dtype"],
394
+ ),
395
+ ),
396
+ ],
397
+ )
398
+ def test_bool_ops_column_name_dtype(self, test_input, expected):
399
+ # GH 22383 - .ne fails if columns containing column name 'dtype'
400
+ result = test_input.loc[:, ["a", "dtype"]].ne(test_input.loc[:, ["a", "dtype"]])
401
+ tm.assert_frame_equal(result, expected)
402
+
403
+ @pytest.mark.parametrize(
404
+ "arith", ("add", "sub", "mul", "mod", "truediv", "floordiv")
405
+ )
406
+ @pytest.mark.parametrize("axis", (0, 1))
407
+ def test_frame_series_axis(self, axis, arith, _frame, monkeypatch):
408
+ # GH#26736 Dataframe.floordiv(Series, axis=1) fails
409
+
410
+ df = _frame
411
+ if axis == 1:
412
+ other = df.iloc[0, :]
413
+ else:
414
+ other = df.iloc[:, 0]
415
+
416
+ with monkeypatch.context() as m:
417
+ m.setattr(expr, "_MIN_ELEMENTS", 0)
418
+
419
+ op_func = getattr(df, arith)
420
+
421
+ with option_context("compute.use_numexpr", False):
422
+ expected = op_func(other, axis=axis)
423
+
424
+ result = op_func(other, axis=axis)
425
+ tm.assert_frame_equal(expected, result)
426
+
427
+ @pytest.mark.parametrize(
428
+ "op",
429
+ [
430
+ "__mod__",
431
+ "__rmod__",
432
+ "__floordiv__",
433
+ "__rfloordiv__",
434
+ ],
435
+ )
436
+ @pytest.mark.parametrize("box", [DataFrame, Series, Index])
437
+ @pytest.mark.parametrize("scalar", [-5, 5])
438
+ def test_python_semantics_with_numexpr_installed(
439
+ self, op, box, scalar, monkeypatch
440
+ ):
441
+ # https://github.com/pandas-dev/pandas/issues/36047
442
+ with monkeypatch.context() as m:
443
+ m.setattr(expr, "_MIN_ELEMENTS", 0)
444
+ data = np.arange(-50, 50)
445
+ obj = box(data)
446
+ method = getattr(obj, op)
447
+ result = method(scalar)
448
+
449
+ # compare result with numpy
450
+ with option_context("compute.use_numexpr", False):
451
+ expected = method(scalar)
452
+
453
+ tm.assert_equal(result, expected)
454
+
455
+ # compare result element-wise with Python
456
+ for i, elem in enumerate(data):
457
+ if box == DataFrame:
458
+ scalar_result = result.iloc[i, 0]
459
+ else:
460
+ scalar_result = result[i]
461
+ try:
462
+ expected = getattr(int(elem), op)(scalar)
463
+ except ZeroDivisionError:
464
+ pass
465
+ else:
466
+ assert scalar_result == expected
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_flags.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+
3
+ import pandas as pd
4
+
5
+
6
+ class TestFlags:
7
+ def test_equality(self):
8
+ a = pd.DataFrame().set_flags(allows_duplicate_labels=True).flags
9
+ b = pd.DataFrame().set_flags(allows_duplicate_labels=False).flags
10
+
11
+ assert a == a
12
+ assert b == b
13
+ assert a != b
14
+ assert a != 2
15
+
16
+ def test_set(self):
17
+ df = pd.DataFrame().set_flags(allows_duplicate_labels=True)
18
+ a = df.flags
19
+ a.allows_duplicate_labels = False
20
+ assert a.allows_duplicate_labels is False
21
+ a["allows_duplicate_labels"] = True
22
+ assert a.allows_duplicate_labels is True
23
+
24
+ def test_repr(self):
25
+ a = repr(pd.DataFrame({"A"}).set_flags(allows_duplicate_labels=True).flags)
26
+ assert a == "<Flags(allows_duplicate_labels=True)>"
27
+ a = repr(pd.DataFrame({"A"}).set_flags(allows_duplicate_labels=False).flags)
28
+ assert a == "<Flags(allows_duplicate_labels=False)>"
29
+
30
+ def test_obj_ref(self):
31
+ df = pd.DataFrame()
32
+ flags = df.flags
33
+ del df
34
+ with pytest.raises(ValueError, match="object has been deleted"):
35
+ flags.allows_duplicate_labels = True
36
+
37
+ def test_getitem(self):
38
+ df = pd.DataFrame()
39
+ flags = df.flags
40
+ assert flags["allows_duplicate_labels"] is True
41
+ flags["allows_duplicate_labels"] = False
42
+ assert flags["allows_duplicate_labels"] is False
43
+
44
+ with pytest.raises(KeyError, match="a"):
45
+ flags["a"]
46
+
47
+ with pytest.raises(ValueError, match="a"):
48
+ flags["a"] = 10
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_multilevel.py ADDED
@@ -0,0 +1,355 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime
2
+
3
+ import numpy as np
4
+ import pytest
5
+
6
+ import pandas as pd
7
+ from pandas import (
8
+ DataFrame,
9
+ MultiIndex,
10
+ Series,
11
+ )
12
+ import pandas._testing as tm
13
+
14
+
15
+ class TestMultiLevel:
16
+ def test_reindex_level(self, multiindex_year_month_day_dataframe_random_data):
17
+ # axis=0
18
+ ymd = multiindex_year_month_day_dataframe_random_data
19
+
20
+ month_sums = ymd.groupby("month").sum()
21
+ result = month_sums.reindex(ymd.index, level=1)
22
+ expected = ymd.groupby(level="month").transform("sum")
23
+
24
+ tm.assert_frame_equal(result, expected)
25
+
26
+ # Series
27
+ result = month_sums["A"].reindex(ymd.index, level=1)
28
+ expected = ymd["A"].groupby(level="month").transform("sum")
29
+ tm.assert_series_equal(result, expected, check_names=False)
30
+
31
+ # axis=1
32
+ msg = "DataFrame.groupby with axis=1 is deprecated"
33
+ with tm.assert_produces_warning(FutureWarning, match=msg):
34
+ gb = ymd.T.groupby("month", axis=1)
35
+
36
+ month_sums = gb.sum()
37
+ result = month_sums.reindex(columns=ymd.index, level=1)
38
+ expected = ymd.groupby(level="month").transform("sum").T
39
+ tm.assert_frame_equal(result, expected)
40
+
41
+ def test_reindex(self, multiindex_dataframe_random_data):
42
+ frame = multiindex_dataframe_random_data
43
+
44
+ expected = frame.iloc[[0, 3]]
45
+ reindexed = frame.loc[[("foo", "one"), ("bar", "one")]]
46
+ tm.assert_frame_equal(reindexed, expected)
47
+
48
+ def test_reindex_preserve_levels(
49
+ self, multiindex_year_month_day_dataframe_random_data, using_copy_on_write
50
+ ):
51
+ ymd = multiindex_year_month_day_dataframe_random_data
52
+
53
+ new_index = ymd.index[::10]
54
+ chunk = ymd.reindex(new_index)
55
+ if using_copy_on_write:
56
+ assert chunk.index.is_(new_index)
57
+ else:
58
+ assert chunk.index is new_index
59
+
60
+ chunk = ymd.loc[new_index]
61
+ assert chunk.index.equals(new_index)
62
+
63
+ ymdT = ymd.T
64
+ chunk = ymdT.reindex(columns=new_index)
65
+ if using_copy_on_write:
66
+ assert chunk.columns.is_(new_index)
67
+ else:
68
+ assert chunk.columns is new_index
69
+
70
+ chunk = ymdT.loc[:, new_index]
71
+ assert chunk.columns.equals(new_index)
72
+
73
+ def test_groupby_transform(self, multiindex_dataframe_random_data):
74
+ frame = multiindex_dataframe_random_data
75
+
76
+ s = frame["A"]
77
+ grouper = s.index.get_level_values(0)
78
+
79
+ grouped = s.groupby(grouper, group_keys=False)
80
+
81
+ applied = grouped.apply(lambda x: x * 2)
82
+ expected = grouped.transform(lambda x: x * 2)
83
+ result = applied.reindex(expected.index)
84
+ tm.assert_series_equal(result, expected, check_names=False)
85
+
86
+ def test_groupby_corner(self):
87
+ midx = MultiIndex(
88
+ levels=[["foo"], ["bar"], ["baz"]],
89
+ codes=[[0], [0], [0]],
90
+ names=["one", "two", "three"],
91
+ )
92
+ df = DataFrame(
93
+ [np.random.default_rng(2).random(4)],
94
+ columns=["a", "b", "c", "d"],
95
+ index=midx,
96
+ )
97
+ # should work
98
+ df.groupby(level="three")
99
+
100
+ def test_groupby_level_no_obs(self):
101
+ # #1697
102
+ midx = MultiIndex.from_tuples(
103
+ [
104
+ ("f1", "s1"),
105
+ ("f1", "s2"),
106
+ ("f2", "s1"),
107
+ ("f2", "s2"),
108
+ ("f3", "s1"),
109
+ ("f3", "s2"),
110
+ ]
111
+ )
112
+ df = DataFrame([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]], columns=midx)
113
+ df1 = df.loc(axis=1)[df.columns.map(lambda u: u[0] in ["f2", "f3"])]
114
+
115
+ msg = "DataFrame.groupby with axis=1 is deprecated"
116
+ with tm.assert_produces_warning(FutureWarning, match=msg):
117
+ grouped = df1.groupby(axis=1, level=0)
118
+ result = grouped.sum()
119
+ assert (result.columns == ["f2", "f3"]).all()
120
+
121
+ def test_setitem_with_expansion_multiindex_columns(
122
+ self, multiindex_year_month_day_dataframe_random_data
123
+ ):
124
+ ymd = multiindex_year_month_day_dataframe_random_data
125
+
126
+ df = ymd[:5].T
127
+ df[2000, 1, 10] = df[2000, 1, 7]
128
+ assert isinstance(df.columns, MultiIndex)
129
+ assert (df[2000, 1, 10] == df[2000, 1, 7]).all()
130
+
131
+ def test_alignment(self):
132
+ x = Series(
133
+ data=[1, 2, 3], index=MultiIndex.from_tuples([("A", 1), ("A", 2), ("B", 3)])
134
+ )
135
+
136
+ y = Series(
137
+ data=[4, 5, 6], index=MultiIndex.from_tuples([("Z", 1), ("Z", 2), ("B", 3)])
138
+ )
139
+
140
+ res = x - y
141
+ exp_index = x.index.union(y.index)
142
+ exp = x.reindex(exp_index) - y.reindex(exp_index)
143
+ tm.assert_series_equal(res, exp)
144
+
145
+ # hit non-monotonic code path
146
+ res = x[::-1] - y[::-1]
147
+ exp_index = x.index.union(y.index)
148
+ exp = x.reindex(exp_index) - y.reindex(exp_index)
149
+ tm.assert_series_equal(res, exp)
150
+
151
+ def test_groupby_multilevel(self, multiindex_year_month_day_dataframe_random_data):
152
+ ymd = multiindex_year_month_day_dataframe_random_data
153
+
154
+ result = ymd.groupby(level=[0, 1]).mean()
155
+
156
+ k1 = ymd.index.get_level_values(0)
157
+ k2 = ymd.index.get_level_values(1)
158
+
159
+ expected = ymd.groupby([k1, k2]).mean()
160
+
161
+ # TODO groupby with level_values drops names
162
+ tm.assert_frame_equal(result, expected, check_names=False)
163
+ assert result.index.names == ymd.index.names[:2]
164
+
165
+ result2 = ymd.groupby(level=ymd.index.names[:2]).mean()
166
+ tm.assert_frame_equal(result, result2)
167
+
168
+ def test_multilevel_consolidate(self):
169
+ index = MultiIndex.from_tuples(
170
+ [("foo", "one"), ("foo", "two"), ("bar", "one"), ("bar", "two")]
171
+ )
172
+ df = DataFrame(
173
+ np.random.default_rng(2).standard_normal((4, 4)), index=index, columns=index
174
+ )
175
+ df["Totals", ""] = df.sum(1)
176
+ df = df._consolidate()
177
+
178
+ def test_level_with_tuples(self):
179
+ index = MultiIndex(
180
+ levels=[[("foo", "bar", 0), ("foo", "baz", 0), ("foo", "qux", 0)], [0, 1]],
181
+ codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
182
+ )
183
+
184
+ series = Series(np.random.default_rng(2).standard_normal(6), index=index)
185
+ frame = DataFrame(np.random.default_rng(2).standard_normal((6, 4)), index=index)
186
+
187
+ result = series[("foo", "bar", 0)]
188
+ result2 = series.loc[("foo", "bar", 0)]
189
+ expected = series[:2]
190
+ expected.index = expected.index.droplevel(0)
191
+ tm.assert_series_equal(result, expected)
192
+ tm.assert_series_equal(result2, expected)
193
+
194
+ with pytest.raises(KeyError, match=r"^\(\('foo', 'bar', 0\), 2\)$"):
195
+ series[("foo", "bar", 0), 2]
196
+
197
+ result = frame.loc[("foo", "bar", 0)]
198
+ result2 = frame.xs(("foo", "bar", 0))
199
+ expected = frame[:2]
200
+ expected.index = expected.index.droplevel(0)
201
+ tm.assert_frame_equal(result, expected)
202
+ tm.assert_frame_equal(result2, expected)
203
+
204
+ index = MultiIndex(
205
+ levels=[[("foo", "bar"), ("foo", "baz"), ("foo", "qux")], [0, 1]],
206
+ codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
207
+ )
208
+
209
+ series = Series(np.random.default_rng(2).standard_normal(6), index=index)
210
+ frame = DataFrame(np.random.default_rng(2).standard_normal((6, 4)), index=index)
211
+
212
+ result = series[("foo", "bar")]
213
+ result2 = series.loc[("foo", "bar")]
214
+ expected = series[:2]
215
+ expected.index = expected.index.droplevel(0)
216
+ tm.assert_series_equal(result, expected)
217
+ tm.assert_series_equal(result2, expected)
218
+
219
+ result = frame.loc[("foo", "bar")]
220
+ result2 = frame.xs(("foo", "bar"))
221
+ expected = frame[:2]
222
+ expected.index = expected.index.droplevel(0)
223
+ tm.assert_frame_equal(result, expected)
224
+ tm.assert_frame_equal(result2, expected)
225
+
226
+ def test_reindex_level_partial_selection(self, multiindex_dataframe_random_data):
227
+ frame = multiindex_dataframe_random_data
228
+
229
+ result = frame.reindex(["foo", "qux"], level=0)
230
+ expected = frame.iloc[[0, 1, 2, 7, 8, 9]]
231
+ tm.assert_frame_equal(result, expected)
232
+
233
+ result = frame.T.reindex(["foo", "qux"], axis=1, level=0)
234
+ tm.assert_frame_equal(result, expected.T)
235
+
236
+ result = frame.loc[["foo", "qux"]]
237
+ tm.assert_frame_equal(result, expected)
238
+
239
+ result = frame["A"].loc[["foo", "qux"]]
240
+ tm.assert_series_equal(result, expected["A"])
241
+
242
+ result = frame.T.loc[:, ["foo", "qux"]]
243
+ tm.assert_frame_equal(result, expected.T)
244
+
245
+ @pytest.mark.parametrize("d", [4, "d"])
246
+ def test_empty_frame_groupby_dtypes_consistency(self, d):
247
+ # GH 20888
248
+ group_keys = ["a", "b", "c"]
249
+ df = DataFrame({"a": [1], "b": [2], "c": [3], "d": [d]})
250
+
251
+ g = df[df.a == 2].groupby(group_keys)
252
+ result = g.first().index
253
+ expected = MultiIndex(
254
+ levels=[[1], [2], [3]], codes=[[], [], []], names=["a", "b", "c"]
255
+ )
256
+
257
+ tm.assert_index_equal(result, expected)
258
+
259
+ def test_duplicate_groupby_issues(self):
260
+ idx_tp = [
261
+ ("600809", "20061231"),
262
+ ("600809", "20070331"),
263
+ ("600809", "20070630"),
264
+ ("600809", "20070331"),
265
+ ]
266
+ dt = ["demo", "demo", "demo", "demo"]
267
+
268
+ idx = MultiIndex.from_tuples(idx_tp, names=["STK_ID", "RPT_Date"])
269
+ s = Series(dt, index=idx)
270
+
271
+ result = s.groupby(s.index).first()
272
+ assert len(result) == 3
273
+
274
+ def test_subsets_multiindex_dtype(self):
275
+ # GH 20757
276
+ data = [["x", 1]]
277
+ columns = [("a", "b", np.nan), ("a", "c", 0.0)]
278
+ df = DataFrame(data, columns=MultiIndex.from_tuples(columns))
279
+ expected = df.dtypes.a.b
280
+ result = df.a.b.dtypes
281
+ tm.assert_series_equal(result, expected)
282
+
283
+ def test_datetime_object_multiindex(self):
284
+ data_dic = {
285
+ (0, datetime.date(2018, 3, 3)): {"A": 1, "B": 10},
286
+ (0, datetime.date(2018, 3, 4)): {"A": 2, "B": 11},
287
+ (1, datetime.date(2018, 3, 3)): {"A": 3, "B": 12},
288
+ (1, datetime.date(2018, 3, 4)): {"A": 4, "B": 13},
289
+ }
290
+ result = DataFrame.from_dict(data_dic, orient="index")
291
+ data = {"A": [1, 2, 3, 4], "B": [10, 11, 12, 13]}
292
+ index = [
293
+ [0, 0, 1, 1],
294
+ [
295
+ datetime.date(2018, 3, 3),
296
+ datetime.date(2018, 3, 4),
297
+ datetime.date(2018, 3, 3),
298
+ datetime.date(2018, 3, 4),
299
+ ],
300
+ ]
301
+ expected = DataFrame(data=data, index=index)
302
+
303
+ tm.assert_frame_equal(result, expected)
304
+
305
+ def test_multiindex_with_na(self):
306
+ df = DataFrame(
307
+ [
308
+ ["A", np.nan, 1.23, 4.56],
309
+ ["A", "G", 1.23, 4.56],
310
+ ["A", "D", 9.87, 10.54],
311
+ ],
312
+ columns=["pivot_0", "pivot_1", "col_1", "col_2"],
313
+ ).set_index(["pivot_0", "pivot_1"])
314
+
315
+ df.at[("A", "F"), "col_2"] = 0.0
316
+
317
+ expected = DataFrame(
318
+ [
319
+ ["A", np.nan, 1.23, 4.56],
320
+ ["A", "G", 1.23, 4.56],
321
+ ["A", "D", 9.87, 10.54],
322
+ ["A", "F", np.nan, 0.0],
323
+ ],
324
+ columns=["pivot_0", "pivot_1", "col_1", "col_2"],
325
+ ).set_index(["pivot_0", "pivot_1"])
326
+
327
+ tm.assert_frame_equal(df, expected)
328
+
329
+
330
+ class TestSorted:
331
+ """everything you wanted to test about sorting"""
332
+
333
+ def test_sort_non_lexsorted(self):
334
+ # degenerate case where we sort but don't
335
+ # have a satisfying result :<
336
+ # GH 15797
337
+ idx = MultiIndex(
338
+ [["A", "B", "C"], ["c", "b", "a"]], [[0, 1, 2, 0, 1, 2], [0, 2, 1, 1, 0, 2]]
339
+ )
340
+
341
+ df = DataFrame({"col": range(len(idx))}, index=idx, dtype="int64")
342
+ assert df.index.is_monotonic_increasing is False
343
+
344
+ sorted = df.sort_index()
345
+ assert sorted.index.is_monotonic_increasing is True
346
+
347
+ expected = DataFrame(
348
+ {"col": [1, 4, 5, 2]},
349
+ index=MultiIndex.from_tuples(
350
+ [("B", "a"), ("B", "c"), ("C", "a"), ("C", "b")]
351
+ ),
352
+ dtype="int64",
353
+ )
354
+ result = sorted.loc[pd.IndexSlice["B":"C", "a":"c"], :]
355
+ tm.assert_frame_equal(result, expected)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_nanops.py ADDED
@@ -0,0 +1,1274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from functools import partial
2
+
3
+ import numpy as np
4
+ import pytest
5
+
6
+ import pandas.util._test_decorators as td
7
+
8
+ from pandas.core.dtypes.common import is_integer_dtype
9
+
10
+ import pandas as pd
11
+ from pandas import (
12
+ Series,
13
+ isna,
14
+ )
15
+ import pandas._testing as tm
16
+ from pandas.core import nanops
17
+
18
+ use_bn = nanops._USE_BOTTLENECK
19
+
20
+
21
+ @pytest.fixture
22
+ def disable_bottleneck(monkeypatch):
23
+ with monkeypatch.context() as m:
24
+ m.setattr(nanops, "_USE_BOTTLENECK", False)
25
+ yield
26
+
27
+
28
+ @pytest.fixture
29
+ def arr_shape():
30
+ return 11, 7
31
+
32
+
33
+ @pytest.fixture
34
+ def arr_float(arr_shape):
35
+ return np.random.default_rng(2).standard_normal(arr_shape)
36
+
37
+
38
+ @pytest.fixture
39
+ def arr_complex(arr_float):
40
+ return arr_float + arr_float * 1j
41
+
42
+
43
+ @pytest.fixture
44
+ def arr_int(arr_shape):
45
+ return np.random.default_rng(2).integers(-10, 10, arr_shape)
46
+
47
+
48
+ @pytest.fixture
49
+ def arr_bool(arr_shape):
50
+ return np.random.default_rng(2).integers(0, 2, arr_shape) == 0
51
+
52
+
53
+ @pytest.fixture
54
+ def arr_str(arr_float):
55
+ return np.abs(arr_float).astype("S")
56
+
57
+
58
+ @pytest.fixture
59
+ def arr_utf(arr_float):
60
+ return np.abs(arr_float).astype("U")
61
+
62
+
63
+ @pytest.fixture
64
+ def arr_date(arr_shape):
65
+ return np.random.default_rng(2).integers(0, 20000, arr_shape).astype("M8[ns]")
66
+
67
+
68
+ @pytest.fixture
69
+ def arr_tdelta(arr_shape):
70
+ return np.random.default_rng(2).integers(0, 20000, arr_shape).astype("m8[ns]")
71
+
72
+
73
+ @pytest.fixture
74
+ def arr_nan(arr_shape):
75
+ return np.tile(np.nan, arr_shape)
76
+
77
+
78
+ @pytest.fixture
79
+ def arr_float_nan(arr_float, arr_nan):
80
+ return np.vstack([arr_float, arr_nan])
81
+
82
+
83
+ @pytest.fixture
84
+ def arr_nan_float1(arr_nan, arr_float):
85
+ return np.vstack([arr_nan, arr_float])
86
+
87
+
88
+ @pytest.fixture
89
+ def arr_nan_nan(arr_nan):
90
+ return np.vstack([arr_nan, arr_nan])
91
+
92
+
93
+ @pytest.fixture
94
+ def arr_inf(arr_float):
95
+ return arr_float * np.inf
96
+
97
+
98
+ @pytest.fixture
99
+ def arr_float_inf(arr_float, arr_inf):
100
+ return np.vstack([arr_float, arr_inf])
101
+
102
+
103
+ @pytest.fixture
104
+ def arr_nan_inf(arr_nan, arr_inf):
105
+ return np.vstack([arr_nan, arr_inf])
106
+
107
+
108
+ @pytest.fixture
109
+ def arr_float_nan_inf(arr_float, arr_nan, arr_inf):
110
+ return np.vstack([arr_float, arr_nan, arr_inf])
111
+
112
+
113
+ @pytest.fixture
114
+ def arr_nan_nan_inf(arr_nan, arr_inf):
115
+ return np.vstack([arr_nan, arr_nan, arr_inf])
116
+
117
+
118
+ @pytest.fixture
119
+ def arr_obj(
120
+ arr_float, arr_int, arr_bool, arr_complex, arr_str, arr_utf, arr_date, arr_tdelta
121
+ ):
122
+ return np.vstack(
123
+ [
124
+ arr_float.astype("O"),
125
+ arr_int.astype("O"),
126
+ arr_bool.astype("O"),
127
+ arr_complex.astype("O"),
128
+ arr_str.astype("O"),
129
+ arr_utf.astype("O"),
130
+ arr_date.astype("O"),
131
+ arr_tdelta.astype("O"),
132
+ ]
133
+ )
134
+
135
+
136
+ @pytest.fixture
137
+ def arr_nan_nanj(arr_nan):
138
+ with np.errstate(invalid="ignore"):
139
+ return arr_nan + arr_nan * 1j
140
+
141
+
142
+ @pytest.fixture
143
+ def arr_complex_nan(arr_complex, arr_nan_nanj):
144
+ with np.errstate(invalid="ignore"):
145
+ return np.vstack([arr_complex, arr_nan_nanj])
146
+
147
+
148
+ @pytest.fixture
149
+ def arr_nan_infj(arr_inf):
150
+ with np.errstate(invalid="ignore"):
151
+ return arr_inf * 1j
152
+
153
+
154
+ @pytest.fixture
155
+ def arr_complex_nan_infj(arr_complex, arr_nan_infj):
156
+ with np.errstate(invalid="ignore"):
157
+ return np.vstack([arr_complex, arr_nan_infj])
158
+
159
+
160
+ @pytest.fixture
161
+ def arr_float_1d(arr_float):
162
+ return arr_float[:, 0]
163
+
164
+
165
+ @pytest.fixture
166
+ def arr_nan_1d(arr_nan):
167
+ return arr_nan[:, 0]
168
+
169
+
170
+ @pytest.fixture
171
+ def arr_float_nan_1d(arr_float_nan):
172
+ return arr_float_nan[:, 0]
173
+
174
+
175
+ @pytest.fixture
176
+ def arr_float1_nan_1d(arr_float1_nan):
177
+ return arr_float1_nan[:, 0]
178
+
179
+
180
+ @pytest.fixture
181
+ def arr_nan_float1_1d(arr_nan_float1):
182
+ return arr_nan_float1[:, 0]
183
+
184
+
185
+ class TestnanopsDataFrame:
186
+ def setup_method(self):
187
+ nanops._USE_BOTTLENECK = False
188
+
189
+ arr_shape = (11, 7)
190
+
191
+ self.arr_float = np.random.default_rng(2).standard_normal(arr_shape)
192
+ self.arr_float1 = np.random.default_rng(2).standard_normal(arr_shape)
193
+ self.arr_complex = self.arr_float + self.arr_float1 * 1j
194
+ self.arr_int = np.random.default_rng(2).integers(-10, 10, arr_shape)
195
+ self.arr_bool = np.random.default_rng(2).integers(0, 2, arr_shape) == 0
196
+ self.arr_str = np.abs(self.arr_float).astype("S")
197
+ self.arr_utf = np.abs(self.arr_float).astype("U")
198
+ self.arr_date = (
199
+ np.random.default_rng(2).integers(0, 20000, arr_shape).astype("M8[ns]")
200
+ )
201
+ self.arr_tdelta = (
202
+ np.random.default_rng(2).integers(0, 20000, arr_shape).astype("m8[ns]")
203
+ )
204
+
205
+ self.arr_nan = np.tile(np.nan, arr_shape)
206
+ self.arr_float_nan = np.vstack([self.arr_float, self.arr_nan])
207
+ self.arr_float1_nan = np.vstack([self.arr_float1, self.arr_nan])
208
+ self.arr_nan_float1 = np.vstack([self.arr_nan, self.arr_float1])
209
+ self.arr_nan_nan = np.vstack([self.arr_nan, self.arr_nan])
210
+
211
+ self.arr_inf = self.arr_float * np.inf
212
+ self.arr_float_inf = np.vstack([self.arr_float, self.arr_inf])
213
+
214
+ self.arr_nan_inf = np.vstack([self.arr_nan, self.arr_inf])
215
+ self.arr_float_nan_inf = np.vstack([self.arr_float, self.arr_nan, self.arr_inf])
216
+ self.arr_nan_nan_inf = np.vstack([self.arr_nan, self.arr_nan, self.arr_inf])
217
+ self.arr_obj = np.vstack(
218
+ [
219
+ self.arr_float.astype("O"),
220
+ self.arr_int.astype("O"),
221
+ self.arr_bool.astype("O"),
222
+ self.arr_complex.astype("O"),
223
+ self.arr_str.astype("O"),
224
+ self.arr_utf.astype("O"),
225
+ self.arr_date.astype("O"),
226
+ self.arr_tdelta.astype("O"),
227
+ ]
228
+ )
229
+
230
+ with np.errstate(invalid="ignore"):
231
+ self.arr_nan_nanj = self.arr_nan + self.arr_nan * 1j
232
+ self.arr_complex_nan = np.vstack([self.arr_complex, self.arr_nan_nanj])
233
+
234
+ self.arr_nan_infj = self.arr_inf * 1j
235
+ self.arr_complex_nan_infj = np.vstack([self.arr_complex, self.arr_nan_infj])
236
+
237
+ self.arr_float_2d = self.arr_float
238
+ self.arr_float1_2d = self.arr_float1
239
+
240
+ self.arr_nan_2d = self.arr_nan
241
+ self.arr_float_nan_2d = self.arr_float_nan
242
+ self.arr_float1_nan_2d = self.arr_float1_nan
243
+ self.arr_nan_float1_2d = self.arr_nan_float1
244
+
245
+ self.arr_float_1d = self.arr_float[:, 0]
246
+ self.arr_float1_1d = self.arr_float1[:, 0]
247
+
248
+ self.arr_nan_1d = self.arr_nan[:, 0]
249
+ self.arr_float_nan_1d = self.arr_float_nan[:, 0]
250
+ self.arr_float1_nan_1d = self.arr_float1_nan[:, 0]
251
+ self.arr_nan_float1_1d = self.arr_nan_float1[:, 0]
252
+
253
+ def teardown_method(self):
254
+ nanops._USE_BOTTLENECK = use_bn
255
+
256
+ def check_results(self, targ, res, axis, check_dtype=True):
257
+ res = getattr(res, "asm8", res)
258
+
259
+ if (
260
+ axis != 0
261
+ and hasattr(targ, "shape")
262
+ and targ.ndim
263
+ and targ.shape != res.shape
264
+ ):
265
+ res = np.split(res, [targ.shape[0]], axis=0)[0]
266
+
267
+ try:
268
+ tm.assert_almost_equal(targ, res, check_dtype=check_dtype)
269
+ except AssertionError:
270
+ # handle timedelta dtypes
271
+ if hasattr(targ, "dtype") and targ.dtype == "m8[ns]":
272
+ raise
273
+
274
+ # There are sometimes rounding errors with
275
+ # complex and object dtypes.
276
+ # If it isn't one of those, re-raise the error.
277
+ if not hasattr(res, "dtype") or res.dtype.kind not in ["c", "O"]:
278
+ raise
279
+ # convert object dtypes to something that can be split into
280
+ # real and imaginary parts
281
+ if res.dtype.kind == "O":
282
+ if targ.dtype.kind != "O":
283
+ res = res.astype(targ.dtype)
284
+ else:
285
+ cast_dtype = "c16" if hasattr(np, "complex128") else "f8"
286
+ res = res.astype(cast_dtype)
287
+ targ = targ.astype(cast_dtype)
288
+ # there should never be a case where numpy returns an object
289
+ # but nanops doesn't, so make that an exception
290
+ elif targ.dtype.kind == "O":
291
+ raise
292
+ tm.assert_almost_equal(np.real(targ), np.real(res), check_dtype=check_dtype)
293
+ tm.assert_almost_equal(np.imag(targ), np.imag(res), check_dtype=check_dtype)
294
+
295
+ def check_fun_data(
296
+ self,
297
+ testfunc,
298
+ targfunc,
299
+ testarval,
300
+ targarval,
301
+ skipna,
302
+ check_dtype=True,
303
+ empty_targfunc=None,
304
+ **kwargs,
305
+ ):
306
+ for axis in list(range(targarval.ndim)) + [None]:
307
+ targartempval = targarval if skipna else testarval
308
+ if skipna and empty_targfunc and isna(targartempval).all():
309
+ targ = empty_targfunc(targartempval, axis=axis, **kwargs)
310
+ else:
311
+ targ = targfunc(targartempval, axis=axis, **kwargs)
312
+
313
+ if targartempval.dtype == object and (
314
+ targfunc is np.any or targfunc is np.all
315
+ ):
316
+ # GH#12863 the numpy functions will retain e.g. floatiness
317
+ if isinstance(targ, np.ndarray):
318
+ targ = targ.astype(bool)
319
+ else:
320
+ targ = bool(targ)
321
+
322
+ res = testfunc(testarval, axis=axis, skipna=skipna, **kwargs)
323
+
324
+ if (
325
+ isinstance(targ, np.complex128)
326
+ and isinstance(res, float)
327
+ and np.isnan(targ)
328
+ and np.isnan(res)
329
+ ):
330
+ # GH#18463
331
+ targ = res
332
+
333
+ self.check_results(targ, res, axis, check_dtype=check_dtype)
334
+ if skipna:
335
+ res = testfunc(testarval, axis=axis, **kwargs)
336
+ self.check_results(targ, res, axis, check_dtype=check_dtype)
337
+ if axis is None:
338
+ res = testfunc(testarval, skipna=skipna, **kwargs)
339
+ self.check_results(targ, res, axis, check_dtype=check_dtype)
340
+ if skipna and axis is None:
341
+ res = testfunc(testarval, **kwargs)
342
+ self.check_results(targ, res, axis, check_dtype=check_dtype)
343
+
344
+ if testarval.ndim <= 1:
345
+ return
346
+
347
+ # Recurse on lower-dimension
348
+ testarval2 = np.take(testarval, 0, axis=-1)
349
+ targarval2 = np.take(targarval, 0, axis=-1)
350
+ self.check_fun_data(
351
+ testfunc,
352
+ targfunc,
353
+ testarval2,
354
+ targarval2,
355
+ skipna=skipna,
356
+ check_dtype=check_dtype,
357
+ empty_targfunc=empty_targfunc,
358
+ **kwargs,
359
+ )
360
+
361
+ def check_fun(
362
+ self, testfunc, targfunc, testar, skipna, empty_targfunc=None, **kwargs
363
+ ):
364
+ targar = testar
365
+ if testar.endswith("_nan") and hasattr(self, testar[:-4]):
366
+ targar = testar[:-4]
367
+
368
+ testarval = getattr(self, testar)
369
+ targarval = getattr(self, targar)
370
+ self.check_fun_data(
371
+ testfunc,
372
+ targfunc,
373
+ testarval,
374
+ targarval,
375
+ skipna=skipna,
376
+ empty_targfunc=empty_targfunc,
377
+ **kwargs,
378
+ )
379
+
380
+ def check_funs(
381
+ self,
382
+ testfunc,
383
+ targfunc,
384
+ skipna,
385
+ allow_complex=True,
386
+ allow_all_nan=True,
387
+ allow_date=True,
388
+ allow_tdelta=True,
389
+ allow_obj=True,
390
+ **kwargs,
391
+ ):
392
+ self.check_fun(testfunc, targfunc, "arr_float", skipna, **kwargs)
393
+ self.check_fun(testfunc, targfunc, "arr_float_nan", skipna, **kwargs)
394
+ self.check_fun(testfunc, targfunc, "arr_int", skipna, **kwargs)
395
+ self.check_fun(testfunc, targfunc, "arr_bool", skipna, **kwargs)
396
+ objs = [
397
+ self.arr_float.astype("O"),
398
+ self.arr_int.astype("O"),
399
+ self.arr_bool.astype("O"),
400
+ ]
401
+
402
+ if allow_all_nan:
403
+ self.check_fun(testfunc, targfunc, "arr_nan", skipna, **kwargs)
404
+
405
+ if allow_complex:
406
+ self.check_fun(testfunc, targfunc, "arr_complex", skipna, **kwargs)
407
+ self.check_fun(testfunc, targfunc, "arr_complex_nan", skipna, **kwargs)
408
+ if allow_all_nan:
409
+ self.check_fun(testfunc, targfunc, "arr_nan_nanj", skipna, **kwargs)
410
+ objs += [self.arr_complex.astype("O")]
411
+
412
+ if allow_date:
413
+ targfunc(self.arr_date)
414
+ self.check_fun(testfunc, targfunc, "arr_date", skipna, **kwargs)
415
+ objs += [self.arr_date.astype("O")]
416
+
417
+ if allow_tdelta:
418
+ try:
419
+ targfunc(self.arr_tdelta)
420
+ except TypeError:
421
+ pass
422
+ else:
423
+ self.check_fun(testfunc, targfunc, "arr_tdelta", skipna, **kwargs)
424
+ objs += [self.arr_tdelta.astype("O")]
425
+
426
+ if allow_obj:
427
+ self.arr_obj = np.vstack(objs)
428
+ # some nanops handle object dtypes better than their numpy
429
+ # counterparts, so the numpy functions need to be given something
430
+ # else
431
+ if allow_obj == "convert":
432
+ targfunc = partial(
433
+ self._badobj_wrap, func=targfunc, allow_complex=allow_complex
434
+ )
435
+ self.check_fun(testfunc, targfunc, "arr_obj", skipna, **kwargs)
436
+
437
+ def _badobj_wrap(self, value, func, allow_complex=True, **kwargs):
438
+ if value.dtype.kind == "O":
439
+ if allow_complex:
440
+ value = value.astype("c16")
441
+ else:
442
+ value = value.astype("f8")
443
+ return func(value, **kwargs)
444
+
445
+ @pytest.mark.parametrize(
446
+ "nan_op,np_op", [(nanops.nanany, np.any), (nanops.nanall, np.all)]
447
+ )
448
+ def test_nan_funcs(self, nan_op, np_op, skipna):
449
+ self.check_funs(nan_op, np_op, skipna, allow_all_nan=False, allow_date=False)
450
+
451
+ def test_nansum(self, skipna):
452
+ self.check_funs(
453
+ nanops.nansum,
454
+ np.sum,
455
+ skipna,
456
+ allow_date=False,
457
+ check_dtype=False,
458
+ empty_targfunc=np.nansum,
459
+ )
460
+
461
+ def test_nanmean(self, skipna):
462
+ self.check_funs(
463
+ nanops.nanmean, np.mean, skipna, allow_obj=False, allow_date=False
464
+ )
465
+
466
+ @pytest.mark.filterwarnings("ignore::RuntimeWarning")
467
+ def test_nanmedian(self, skipna):
468
+ self.check_funs(
469
+ nanops.nanmedian,
470
+ np.median,
471
+ skipna,
472
+ allow_complex=False,
473
+ allow_date=False,
474
+ allow_obj="convert",
475
+ )
476
+
477
+ @pytest.mark.parametrize("ddof", range(3))
478
+ def test_nanvar(self, ddof, skipna):
479
+ self.check_funs(
480
+ nanops.nanvar,
481
+ np.var,
482
+ skipna,
483
+ allow_complex=False,
484
+ allow_date=False,
485
+ allow_obj="convert",
486
+ ddof=ddof,
487
+ )
488
+
489
+ @pytest.mark.parametrize("ddof", range(3))
490
+ def test_nanstd(self, ddof, skipna):
491
+ self.check_funs(
492
+ nanops.nanstd,
493
+ np.std,
494
+ skipna,
495
+ allow_complex=False,
496
+ allow_date=False,
497
+ allow_obj="convert",
498
+ ddof=ddof,
499
+ )
500
+
501
+ @pytest.mark.parametrize("ddof", range(3))
502
+ def test_nansem(self, ddof, skipna):
503
+ sp_stats = pytest.importorskip("scipy.stats")
504
+
505
+ with np.errstate(invalid="ignore"):
506
+ self.check_funs(
507
+ nanops.nansem,
508
+ sp_stats.sem,
509
+ skipna,
510
+ allow_complex=False,
511
+ allow_date=False,
512
+ allow_tdelta=False,
513
+ allow_obj="convert",
514
+ ddof=ddof,
515
+ )
516
+
517
+ @pytest.mark.filterwarnings("ignore::RuntimeWarning")
518
+ @pytest.mark.parametrize(
519
+ "nan_op,np_op", [(nanops.nanmin, np.min), (nanops.nanmax, np.max)]
520
+ )
521
+ def test_nanops_with_warnings(self, nan_op, np_op, skipna):
522
+ self.check_funs(nan_op, np_op, skipna, allow_obj=False)
523
+
524
+ def _argminmax_wrap(self, value, axis=None, func=None):
525
+ res = func(value, axis)
526
+ nans = np.min(value, axis)
527
+ nullnan = isna(nans)
528
+ if res.ndim:
529
+ res[nullnan] = -1
530
+ elif (
531
+ hasattr(nullnan, "all")
532
+ and nullnan.all()
533
+ or not hasattr(nullnan, "all")
534
+ and nullnan
535
+ ):
536
+ res = -1
537
+ return res
538
+
539
+ @pytest.mark.filterwarnings("ignore::RuntimeWarning")
540
+ def test_nanargmax(self, skipna):
541
+ func = partial(self._argminmax_wrap, func=np.argmax)
542
+ self.check_funs(nanops.nanargmax, func, skipna, allow_obj=False)
543
+
544
+ @pytest.mark.filterwarnings("ignore::RuntimeWarning")
545
+ def test_nanargmin(self, skipna):
546
+ func = partial(self._argminmax_wrap, func=np.argmin)
547
+ self.check_funs(nanops.nanargmin, func, skipna, allow_obj=False)
548
+
549
+ def _skew_kurt_wrap(self, values, axis=None, func=None):
550
+ if not isinstance(values.dtype.type, np.floating):
551
+ values = values.astype("f8")
552
+ result = func(values, axis=axis, bias=False)
553
+ # fix for handling cases where all elements in an axis are the same
554
+ if isinstance(result, np.ndarray):
555
+ result[np.max(values, axis=axis) == np.min(values, axis=axis)] = 0
556
+ return result
557
+ elif np.max(values) == np.min(values):
558
+ return 0.0
559
+ return result
560
+
561
+ def test_nanskew(self, skipna):
562
+ sp_stats = pytest.importorskip("scipy.stats")
563
+
564
+ func = partial(self._skew_kurt_wrap, func=sp_stats.skew)
565
+ with np.errstate(invalid="ignore"):
566
+ self.check_funs(
567
+ nanops.nanskew,
568
+ func,
569
+ skipna,
570
+ allow_complex=False,
571
+ allow_date=False,
572
+ allow_tdelta=False,
573
+ )
574
+
575
+ def test_nankurt(self, skipna):
576
+ sp_stats = pytest.importorskip("scipy.stats")
577
+
578
+ func1 = partial(sp_stats.kurtosis, fisher=True)
579
+ func = partial(self._skew_kurt_wrap, func=func1)
580
+ with np.errstate(invalid="ignore"):
581
+ self.check_funs(
582
+ nanops.nankurt,
583
+ func,
584
+ skipna,
585
+ allow_complex=False,
586
+ allow_date=False,
587
+ allow_tdelta=False,
588
+ )
589
+
590
+ def test_nanprod(self, skipna):
591
+ self.check_funs(
592
+ nanops.nanprod,
593
+ np.prod,
594
+ skipna,
595
+ allow_date=False,
596
+ allow_tdelta=False,
597
+ empty_targfunc=np.nanprod,
598
+ )
599
+
600
+ def check_nancorr_nancov_2d(self, checkfun, targ0, targ1, **kwargs):
601
+ res00 = checkfun(self.arr_float_2d, self.arr_float1_2d, **kwargs)
602
+ res01 = checkfun(
603
+ self.arr_float_2d,
604
+ self.arr_float1_2d,
605
+ min_periods=len(self.arr_float_2d) - 1,
606
+ **kwargs,
607
+ )
608
+ tm.assert_almost_equal(targ0, res00)
609
+ tm.assert_almost_equal(targ0, res01)
610
+
611
+ res10 = checkfun(self.arr_float_nan_2d, self.arr_float1_nan_2d, **kwargs)
612
+ res11 = checkfun(
613
+ self.arr_float_nan_2d,
614
+ self.arr_float1_nan_2d,
615
+ min_periods=len(self.arr_float_2d) - 1,
616
+ **kwargs,
617
+ )
618
+ tm.assert_almost_equal(targ1, res10)
619
+ tm.assert_almost_equal(targ1, res11)
620
+
621
+ targ2 = np.nan
622
+ res20 = checkfun(self.arr_nan_2d, self.arr_float1_2d, **kwargs)
623
+ res21 = checkfun(self.arr_float_2d, self.arr_nan_2d, **kwargs)
624
+ res22 = checkfun(self.arr_nan_2d, self.arr_nan_2d, **kwargs)
625
+ res23 = checkfun(self.arr_float_nan_2d, self.arr_nan_float1_2d, **kwargs)
626
+ res24 = checkfun(
627
+ self.arr_float_nan_2d,
628
+ self.arr_nan_float1_2d,
629
+ min_periods=len(self.arr_float_2d) - 1,
630
+ **kwargs,
631
+ )
632
+ res25 = checkfun(
633
+ self.arr_float_2d,
634
+ self.arr_float1_2d,
635
+ min_periods=len(self.arr_float_2d) + 1,
636
+ **kwargs,
637
+ )
638
+ tm.assert_almost_equal(targ2, res20)
639
+ tm.assert_almost_equal(targ2, res21)
640
+ tm.assert_almost_equal(targ2, res22)
641
+ tm.assert_almost_equal(targ2, res23)
642
+ tm.assert_almost_equal(targ2, res24)
643
+ tm.assert_almost_equal(targ2, res25)
644
+
645
+ def check_nancorr_nancov_1d(self, checkfun, targ0, targ1, **kwargs):
646
+ res00 = checkfun(self.arr_float_1d, self.arr_float1_1d, **kwargs)
647
+ res01 = checkfun(
648
+ self.arr_float_1d,
649
+ self.arr_float1_1d,
650
+ min_periods=len(self.arr_float_1d) - 1,
651
+ **kwargs,
652
+ )
653
+ tm.assert_almost_equal(targ0, res00)
654
+ tm.assert_almost_equal(targ0, res01)
655
+
656
+ res10 = checkfun(self.arr_float_nan_1d, self.arr_float1_nan_1d, **kwargs)
657
+ res11 = checkfun(
658
+ self.arr_float_nan_1d,
659
+ self.arr_float1_nan_1d,
660
+ min_periods=len(self.arr_float_1d) - 1,
661
+ **kwargs,
662
+ )
663
+ tm.assert_almost_equal(targ1, res10)
664
+ tm.assert_almost_equal(targ1, res11)
665
+
666
+ targ2 = np.nan
667
+ res20 = checkfun(self.arr_nan_1d, self.arr_float1_1d, **kwargs)
668
+ res21 = checkfun(self.arr_float_1d, self.arr_nan_1d, **kwargs)
669
+ res22 = checkfun(self.arr_nan_1d, self.arr_nan_1d, **kwargs)
670
+ res23 = checkfun(self.arr_float_nan_1d, self.arr_nan_float1_1d, **kwargs)
671
+ res24 = checkfun(
672
+ self.arr_float_nan_1d,
673
+ self.arr_nan_float1_1d,
674
+ min_periods=len(self.arr_float_1d) - 1,
675
+ **kwargs,
676
+ )
677
+ res25 = checkfun(
678
+ self.arr_float_1d,
679
+ self.arr_float1_1d,
680
+ min_periods=len(self.arr_float_1d) + 1,
681
+ **kwargs,
682
+ )
683
+ tm.assert_almost_equal(targ2, res20)
684
+ tm.assert_almost_equal(targ2, res21)
685
+ tm.assert_almost_equal(targ2, res22)
686
+ tm.assert_almost_equal(targ2, res23)
687
+ tm.assert_almost_equal(targ2, res24)
688
+ tm.assert_almost_equal(targ2, res25)
689
+
690
+ def test_nancorr(self):
691
+ targ0 = np.corrcoef(self.arr_float_2d, self.arr_float1_2d)[0, 1]
692
+ targ1 = np.corrcoef(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1]
693
+ self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1)
694
+ targ0 = np.corrcoef(self.arr_float_1d, self.arr_float1_1d)[0, 1]
695
+ targ1 = np.corrcoef(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0, 1]
696
+ self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="pearson")
697
+
698
+ def test_nancorr_pearson(self):
699
+ targ0 = np.corrcoef(self.arr_float_2d, self.arr_float1_2d)[0, 1]
700
+ targ1 = np.corrcoef(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1]
701
+ self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1, method="pearson")
702
+ targ0 = np.corrcoef(self.arr_float_1d, self.arr_float1_1d)[0, 1]
703
+ targ1 = np.corrcoef(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0, 1]
704
+ self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="pearson")
705
+
706
+ def test_nancorr_kendall(self):
707
+ sp_stats = pytest.importorskip("scipy.stats")
708
+
709
+ targ0 = sp_stats.kendalltau(self.arr_float_2d, self.arr_float1_2d)[0]
710
+ targ1 = sp_stats.kendalltau(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0]
711
+ self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1, method="kendall")
712
+ targ0 = sp_stats.kendalltau(self.arr_float_1d, self.arr_float1_1d)[0]
713
+ targ1 = sp_stats.kendalltau(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0]
714
+ self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="kendall")
715
+
716
+ def test_nancorr_spearman(self):
717
+ sp_stats = pytest.importorskip("scipy.stats")
718
+
719
+ targ0 = sp_stats.spearmanr(self.arr_float_2d, self.arr_float1_2d)[0]
720
+ targ1 = sp_stats.spearmanr(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0]
721
+ self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1, method="spearman")
722
+ targ0 = sp_stats.spearmanr(self.arr_float_1d, self.arr_float1_1d)[0]
723
+ targ1 = sp_stats.spearmanr(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0]
724
+ self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="spearman")
725
+
726
+ def test_invalid_method(self):
727
+ pytest.importorskip("scipy")
728
+ targ0 = np.corrcoef(self.arr_float_2d, self.arr_float1_2d)[0, 1]
729
+ targ1 = np.corrcoef(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1]
730
+ msg = "Unknown method 'foo', expected one of 'kendall', 'spearman'"
731
+ with pytest.raises(ValueError, match=msg):
732
+ self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="foo")
733
+
734
+ def test_nancov(self):
735
+ targ0 = np.cov(self.arr_float_2d, self.arr_float1_2d)[0, 1]
736
+ targ1 = np.cov(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1]
737
+ self.check_nancorr_nancov_2d(nanops.nancov, targ0, targ1)
738
+ targ0 = np.cov(self.arr_float_1d, self.arr_float1_1d)[0, 1]
739
+ targ1 = np.cov(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0, 1]
740
+ self.check_nancorr_nancov_1d(nanops.nancov, targ0, targ1)
741
+
742
+
743
+ @pytest.mark.parametrize(
744
+ "arr, correct",
745
+ [
746
+ ("arr_complex", False),
747
+ ("arr_int", False),
748
+ ("arr_bool", False),
749
+ ("arr_str", False),
750
+ ("arr_utf", False),
751
+ ("arr_complex", False),
752
+ ("arr_complex_nan", False),
753
+ ("arr_nan_nanj", False),
754
+ ("arr_nan_infj", True),
755
+ ("arr_complex_nan_infj", True),
756
+ ],
757
+ )
758
+ def test_has_infs_non_float(request, arr, correct, disable_bottleneck):
759
+ val = request.getfixturevalue(arr)
760
+ while getattr(val, "ndim", True):
761
+ res0 = nanops._has_infs(val)
762
+ if correct:
763
+ assert res0
764
+ else:
765
+ assert not res0
766
+
767
+ if not hasattr(val, "ndim"):
768
+ break
769
+
770
+ # Reduce dimension for next step in the loop
771
+ val = np.take(val, 0, axis=-1)
772
+
773
+
774
+ @pytest.mark.parametrize(
775
+ "arr, correct",
776
+ [
777
+ ("arr_float", False),
778
+ ("arr_nan", False),
779
+ ("arr_float_nan", False),
780
+ ("arr_nan_nan", False),
781
+ ("arr_float_inf", True),
782
+ ("arr_inf", True),
783
+ ("arr_nan_inf", True),
784
+ ("arr_float_nan_inf", True),
785
+ ("arr_nan_nan_inf", True),
786
+ ],
787
+ )
788
+ @pytest.mark.parametrize("astype", [None, "f4", "f2"])
789
+ def test_has_infs_floats(request, arr, correct, astype, disable_bottleneck):
790
+ val = request.getfixturevalue(arr)
791
+ if astype is not None:
792
+ val = val.astype(astype)
793
+ while getattr(val, "ndim", True):
794
+ res0 = nanops._has_infs(val)
795
+ if correct:
796
+ assert res0
797
+ else:
798
+ assert not res0
799
+
800
+ if not hasattr(val, "ndim"):
801
+ break
802
+
803
+ # Reduce dimension for next step in the loop
804
+ val = np.take(val, 0, axis=-1)
805
+
806
+
807
+ @pytest.mark.parametrize(
808
+ "fixture", ["arr_float", "arr_complex", "arr_int", "arr_bool", "arr_str", "arr_utf"]
809
+ )
810
+ def test_bn_ok_dtype(fixture, request, disable_bottleneck):
811
+ obj = request.getfixturevalue(fixture)
812
+ assert nanops._bn_ok_dtype(obj.dtype, "test")
813
+
814
+
815
+ @pytest.mark.parametrize(
816
+ "fixture",
817
+ [
818
+ "arr_date",
819
+ "arr_tdelta",
820
+ "arr_obj",
821
+ ],
822
+ )
823
+ def test_bn_not_ok_dtype(fixture, request, disable_bottleneck):
824
+ obj = request.getfixturevalue(fixture)
825
+ assert not nanops._bn_ok_dtype(obj.dtype, "test")
826
+
827
+
828
+ class TestEnsureNumeric:
829
+ def test_numeric_values(self):
830
+ # Test integer
831
+ assert nanops._ensure_numeric(1) == 1
832
+
833
+ # Test float
834
+ assert nanops._ensure_numeric(1.1) == 1.1
835
+
836
+ # Test complex
837
+ assert nanops._ensure_numeric(1 + 2j) == 1 + 2j
838
+
839
+ def test_ndarray(self):
840
+ # Test numeric ndarray
841
+ values = np.array([1, 2, 3])
842
+ assert np.allclose(nanops._ensure_numeric(values), values)
843
+
844
+ # Test object ndarray
845
+ o_values = values.astype(object)
846
+ assert np.allclose(nanops._ensure_numeric(o_values), values)
847
+
848
+ # Test convertible string ndarray
849
+ s_values = np.array(["1", "2", "3"], dtype=object)
850
+ msg = r"Could not convert \['1' '2' '3'\] to numeric"
851
+ with pytest.raises(TypeError, match=msg):
852
+ nanops._ensure_numeric(s_values)
853
+
854
+ # Test non-convertible string ndarray
855
+ s_values = np.array(["foo", "bar", "baz"], dtype=object)
856
+ msg = r"Could not convert .* to numeric"
857
+ with pytest.raises(TypeError, match=msg):
858
+ nanops._ensure_numeric(s_values)
859
+
860
+ def test_convertable_values(self):
861
+ with pytest.raises(TypeError, match="Could not convert string '1' to numeric"):
862
+ nanops._ensure_numeric("1")
863
+ with pytest.raises(
864
+ TypeError, match="Could not convert string '1.1' to numeric"
865
+ ):
866
+ nanops._ensure_numeric("1.1")
867
+ with pytest.raises(
868
+ TypeError, match=r"Could not convert string '1\+1j' to numeric"
869
+ ):
870
+ nanops._ensure_numeric("1+1j")
871
+
872
+ def test_non_convertable_values(self):
873
+ msg = "Could not convert string 'foo' to numeric"
874
+ with pytest.raises(TypeError, match=msg):
875
+ nanops._ensure_numeric("foo")
876
+
877
+ # with the wrong type, python raises TypeError for us
878
+ msg = "argument must be a string or a number"
879
+ with pytest.raises(TypeError, match=msg):
880
+ nanops._ensure_numeric({})
881
+ with pytest.raises(TypeError, match=msg):
882
+ nanops._ensure_numeric([])
883
+
884
+
885
+ class TestNanvarFixedValues:
886
+ # xref GH10242
887
+ # Samples from a normal distribution.
888
+ @pytest.fixture
889
+ def variance(self):
890
+ return 3.0
891
+
892
+ @pytest.fixture
893
+ def samples(self, variance):
894
+ return self.prng.normal(scale=variance**0.5, size=100000)
895
+
896
+ def test_nanvar_all_finite(self, samples, variance):
897
+ actual_variance = nanops.nanvar(samples)
898
+ tm.assert_almost_equal(actual_variance, variance, rtol=1e-2)
899
+
900
+ def test_nanvar_nans(self, samples, variance):
901
+ samples_test = np.nan * np.ones(2 * samples.shape[0])
902
+ samples_test[::2] = samples
903
+
904
+ actual_variance = nanops.nanvar(samples_test, skipna=True)
905
+ tm.assert_almost_equal(actual_variance, variance, rtol=1e-2)
906
+
907
+ actual_variance = nanops.nanvar(samples_test, skipna=False)
908
+ tm.assert_almost_equal(actual_variance, np.nan, rtol=1e-2)
909
+
910
+ def test_nanstd_nans(self, samples, variance):
911
+ samples_test = np.nan * np.ones(2 * samples.shape[0])
912
+ samples_test[::2] = samples
913
+
914
+ actual_std = nanops.nanstd(samples_test, skipna=True)
915
+ tm.assert_almost_equal(actual_std, variance**0.5, rtol=1e-2)
916
+
917
+ actual_std = nanops.nanvar(samples_test, skipna=False)
918
+ tm.assert_almost_equal(actual_std, np.nan, rtol=1e-2)
919
+
920
+ def test_nanvar_axis(self, samples, variance):
921
+ # Generate some sample data.
922
+ samples_unif = self.prng.uniform(size=samples.shape[0])
923
+ samples = np.vstack([samples, samples_unif])
924
+
925
+ actual_variance = nanops.nanvar(samples, axis=1)
926
+ tm.assert_almost_equal(
927
+ actual_variance, np.array([variance, 1.0 / 12]), rtol=1e-2
928
+ )
929
+
930
+ def test_nanvar_ddof(self):
931
+ n = 5
932
+ samples = self.prng.uniform(size=(10000, n + 1))
933
+ samples[:, -1] = np.nan # Force use of our own algorithm.
934
+
935
+ variance_0 = nanops.nanvar(samples, axis=1, skipna=True, ddof=0).mean()
936
+ variance_1 = nanops.nanvar(samples, axis=1, skipna=True, ddof=1).mean()
937
+ variance_2 = nanops.nanvar(samples, axis=1, skipna=True, ddof=2).mean()
938
+
939
+ # The unbiased estimate.
940
+ var = 1.0 / 12
941
+ tm.assert_almost_equal(variance_1, var, rtol=1e-2)
942
+
943
+ # The underestimated variance.
944
+ tm.assert_almost_equal(variance_0, (n - 1.0) / n * var, rtol=1e-2)
945
+
946
+ # The overestimated variance.
947
+ tm.assert_almost_equal(variance_2, (n - 1.0) / (n - 2.0) * var, rtol=1e-2)
948
+
949
+ @pytest.mark.parametrize("axis", range(2))
950
+ @pytest.mark.parametrize("ddof", range(3))
951
+ def test_ground_truth(self, axis, ddof):
952
+ # Test against values that were precomputed with Numpy.
953
+ samples = np.empty((4, 4))
954
+ samples[:3, :3] = np.array(
955
+ [
956
+ [0.97303362, 0.21869576, 0.55560287],
957
+ [0.72980153, 0.03109364, 0.99155171],
958
+ [0.09317602, 0.60078248, 0.15871292],
959
+ ]
960
+ )
961
+ samples[3] = samples[:, 3] = np.nan
962
+
963
+ # Actual variances along axis=0, 1 for ddof=0, 1, 2
964
+ variance = np.array(
965
+ [
966
+ [
967
+ [0.13762259, 0.05619224, 0.11568816],
968
+ [0.20643388, 0.08428837, 0.17353224],
969
+ [0.41286776, 0.16857673, 0.34706449],
970
+ ],
971
+ [
972
+ [0.09519783, 0.16435395, 0.05082054],
973
+ [0.14279674, 0.24653093, 0.07623082],
974
+ [0.28559348, 0.49306186, 0.15246163],
975
+ ],
976
+ ]
977
+ )
978
+
979
+ # Test nanvar.
980
+ var = nanops.nanvar(samples, skipna=True, axis=axis, ddof=ddof)
981
+ tm.assert_almost_equal(var[:3], variance[axis, ddof])
982
+ assert np.isnan(var[3])
983
+
984
+ # Test nanstd.
985
+ std = nanops.nanstd(samples, skipna=True, axis=axis, ddof=ddof)
986
+ tm.assert_almost_equal(std[:3], variance[axis, ddof] ** 0.5)
987
+ assert np.isnan(std[3])
988
+
989
+ @pytest.mark.parametrize("ddof", range(3))
990
+ def test_nanstd_roundoff(self, ddof):
991
+ # Regression test for GH 10242 (test data taken from GH 10489). Ensure
992
+ # that variance is stable.
993
+ data = Series(766897346 * np.ones(10))
994
+ result = data.std(ddof=ddof)
995
+ assert result == 0.0
996
+
997
+ @property
998
+ def prng(self):
999
+ return np.random.default_rng(2)
1000
+
1001
+
1002
+ class TestNanskewFixedValues:
1003
+ # xref GH 11974
1004
+ # Test data + skewness value (computed with scipy.stats.skew)
1005
+ @pytest.fixture
1006
+ def samples(self):
1007
+ return np.sin(np.linspace(0, 1, 200))
1008
+
1009
+ @pytest.fixture
1010
+ def actual_skew(self):
1011
+ return -0.1875895205961754
1012
+
1013
+ @pytest.mark.parametrize("val", [3075.2, 3075.3, 3075.5])
1014
+ def test_constant_series(self, val):
1015
+ # xref GH 11974
1016
+ data = val * np.ones(300)
1017
+ skew = nanops.nanskew(data)
1018
+ assert skew == 0.0
1019
+
1020
+ def test_all_finite(self):
1021
+ alpha, beta = 0.3, 0.1
1022
+ left_tailed = self.prng.beta(alpha, beta, size=100)
1023
+ assert nanops.nanskew(left_tailed) < 0
1024
+
1025
+ alpha, beta = 0.1, 0.3
1026
+ right_tailed = self.prng.beta(alpha, beta, size=100)
1027
+ assert nanops.nanskew(right_tailed) > 0
1028
+
1029
+ def test_ground_truth(self, samples, actual_skew):
1030
+ skew = nanops.nanskew(samples)
1031
+ tm.assert_almost_equal(skew, actual_skew)
1032
+
1033
+ def test_axis(self, samples, actual_skew):
1034
+ samples = np.vstack([samples, np.nan * np.ones(len(samples))])
1035
+ skew = nanops.nanskew(samples, axis=1)
1036
+ tm.assert_almost_equal(skew, np.array([actual_skew, np.nan]))
1037
+
1038
+ def test_nans(self, samples):
1039
+ samples = np.hstack([samples, np.nan])
1040
+ skew = nanops.nanskew(samples, skipna=False)
1041
+ assert np.isnan(skew)
1042
+
1043
+ def test_nans_skipna(self, samples, actual_skew):
1044
+ samples = np.hstack([samples, np.nan])
1045
+ skew = nanops.nanskew(samples, skipna=True)
1046
+ tm.assert_almost_equal(skew, actual_skew)
1047
+
1048
+ @property
1049
+ def prng(self):
1050
+ return np.random.default_rng(2)
1051
+
1052
+
1053
+ class TestNankurtFixedValues:
1054
+ # xref GH 11974
1055
+ # Test data + kurtosis value (computed with scipy.stats.kurtosis)
1056
+ @pytest.fixture
1057
+ def samples(self):
1058
+ return np.sin(np.linspace(0, 1, 200))
1059
+
1060
+ @pytest.fixture
1061
+ def actual_kurt(self):
1062
+ return -1.2058303433799713
1063
+
1064
+ @pytest.mark.parametrize("val", [3075.2, 3075.3, 3075.5])
1065
+ def test_constant_series(self, val):
1066
+ # xref GH 11974
1067
+ data = val * np.ones(300)
1068
+ kurt = nanops.nankurt(data)
1069
+ assert kurt == 0.0
1070
+
1071
+ def test_all_finite(self):
1072
+ alpha, beta = 0.3, 0.1
1073
+ left_tailed = self.prng.beta(alpha, beta, size=100)
1074
+ assert nanops.nankurt(left_tailed) < 2
1075
+
1076
+ alpha, beta = 0.1, 0.3
1077
+ right_tailed = self.prng.beta(alpha, beta, size=100)
1078
+ assert nanops.nankurt(right_tailed) < 0
1079
+
1080
+ def test_ground_truth(self, samples, actual_kurt):
1081
+ kurt = nanops.nankurt(samples)
1082
+ tm.assert_almost_equal(kurt, actual_kurt)
1083
+
1084
+ def test_axis(self, samples, actual_kurt):
1085
+ samples = np.vstack([samples, np.nan * np.ones(len(samples))])
1086
+ kurt = nanops.nankurt(samples, axis=1)
1087
+ tm.assert_almost_equal(kurt, np.array([actual_kurt, np.nan]))
1088
+
1089
+ def test_nans(self, samples):
1090
+ samples = np.hstack([samples, np.nan])
1091
+ kurt = nanops.nankurt(samples, skipna=False)
1092
+ assert np.isnan(kurt)
1093
+
1094
+ def test_nans_skipna(self, samples, actual_kurt):
1095
+ samples = np.hstack([samples, np.nan])
1096
+ kurt = nanops.nankurt(samples, skipna=True)
1097
+ tm.assert_almost_equal(kurt, actual_kurt)
1098
+
1099
+ @property
1100
+ def prng(self):
1101
+ return np.random.default_rng(2)
1102
+
1103
+
1104
+ class TestDatetime64NaNOps:
1105
+ @pytest.fixture(params=["s", "ms", "us", "ns"])
1106
+ def unit(self, request):
1107
+ return request.param
1108
+
1109
+ # Enabling mean changes the behavior of DataFrame.mean
1110
+ # See https://github.com/pandas-dev/pandas/issues/24752
1111
+ def test_nanmean(self, unit):
1112
+ dti = pd.date_range("2016-01-01", periods=3).as_unit(unit)
1113
+ expected = dti[1]
1114
+
1115
+ for obj in [dti, dti._data]:
1116
+ result = nanops.nanmean(obj)
1117
+ assert result == expected
1118
+
1119
+ dti2 = dti.insert(1, pd.NaT)
1120
+
1121
+ for obj in [dti2, dti2._data]:
1122
+ result = nanops.nanmean(obj)
1123
+ assert result == expected
1124
+
1125
+ @pytest.mark.parametrize("constructor", ["M8", "m8"])
1126
+ def test_nanmean_skipna_false(self, constructor, unit):
1127
+ dtype = f"{constructor}[{unit}]"
1128
+ arr = np.arange(12).astype(np.int64).view(dtype).reshape(4, 3)
1129
+
1130
+ arr[-1, -1] = "NaT"
1131
+
1132
+ result = nanops.nanmean(arr, skipna=False)
1133
+ assert np.isnat(result)
1134
+ assert result.dtype == dtype
1135
+
1136
+ result = nanops.nanmean(arr, axis=0, skipna=False)
1137
+ expected = np.array([4, 5, "NaT"], dtype=arr.dtype)
1138
+ tm.assert_numpy_array_equal(result, expected)
1139
+
1140
+ result = nanops.nanmean(arr, axis=1, skipna=False)
1141
+ expected = np.array([arr[0, 1], arr[1, 1], arr[2, 1], arr[-1, -1]])
1142
+ tm.assert_numpy_array_equal(result, expected)
1143
+
1144
+
1145
+ def test_use_bottleneck():
1146
+ if nanops._BOTTLENECK_INSTALLED:
1147
+ with pd.option_context("use_bottleneck", True):
1148
+ assert pd.get_option("use_bottleneck")
1149
+
1150
+ with pd.option_context("use_bottleneck", False):
1151
+ assert not pd.get_option("use_bottleneck")
1152
+
1153
+
1154
+ @pytest.mark.parametrize(
1155
+ "numpy_op, expected",
1156
+ [
1157
+ (np.sum, 10),
1158
+ (np.nansum, 10),
1159
+ (np.mean, 2.5),
1160
+ (np.nanmean, 2.5),
1161
+ (np.median, 2.5),
1162
+ (np.nanmedian, 2.5),
1163
+ (np.min, 1),
1164
+ (np.max, 4),
1165
+ (np.nanmin, 1),
1166
+ (np.nanmax, 4),
1167
+ ],
1168
+ )
1169
+ def test_numpy_ops(numpy_op, expected):
1170
+ # GH8383
1171
+ result = numpy_op(Series([1, 2, 3, 4]))
1172
+ assert result == expected
1173
+
1174
+
1175
+ @pytest.mark.parametrize(
1176
+ "operation",
1177
+ [
1178
+ nanops.nanany,
1179
+ nanops.nanall,
1180
+ nanops.nansum,
1181
+ nanops.nanmean,
1182
+ nanops.nanmedian,
1183
+ nanops.nanstd,
1184
+ nanops.nanvar,
1185
+ nanops.nansem,
1186
+ nanops.nanargmax,
1187
+ nanops.nanargmin,
1188
+ nanops.nanmax,
1189
+ nanops.nanmin,
1190
+ nanops.nanskew,
1191
+ nanops.nankurt,
1192
+ nanops.nanprod,
1193
+ ],
1194
+ )
1195
+ def test_nanops_independent_of_mask_param(operation):
1196
+ # GH22764
1197
+ ser = Series([1, 2, np.nan, 3, np.nan, 4])
1198
+ mask = ser.isna()
1199
+ median_expected = operation(ser._values)
1200
+ median_result = operation(ser._values, mask=mask)
1201
+ assert median_expected == median_result
1202
+
1203
+
1204
+ @pytest.mark.parametrize("min_count", [-1, 0])
1205
+ def test_check_below_min_count_negative_or_zero_min_count(min_count):
1206
+ # GH35227
1207
+ result = nanops.check_below_min_count((21, 37), None, min_count)
1208
+ expected_result = False
1209
+ assert result == expected_result
1210
+
1211
+
1212
+ @pytest.mark.parametrize(
1213
+ "mask", [None, np.array([False, False, True]), np.array([True] + 9 * [False])]
1214
+ )
1215
+ @pytest.mark.parametrize("min_count, expected_result", [(1, False), (101, True)])
1216
+ def test_check_below_min_count_positive_min_count(mask, min_count, expected_result):
1217
+ # GH35227
1218
+ shape = (10, 10)
1219
+ result = nanops.check_below_min_count(shape, mask, min_count)
1220
+ assert result == expected_result
1221
+
1222
+
1223
+ @td.skip_if_windows
1224
+ @td.skip_if_32bit
1225
+ @pytest.mark.parametrize("min_count, expected_result", [(1, False), (2812191852, True)])
1226
+ def test_check_below_min_count_large_shape(min_count, expected_result):
1227
+ # GH35227 large shape used to show that the issue is fixed
1228
+ shape = (2244367, 1253)
1229
+ result = nanops.check_below_min_count(shape, mask=None, min_count=min_count)
1230
+ assert result == expected_result
1231
+
1232
+
1233
+ @pytest.mark.parametrize("func", ["nanmean", "nansum"])
1234
+ def test_check_bottleneck_disallow(any_real_numpy_dtype, func):
1235
+ # GH 42878 bottleneck sometimes produces unreliable results for mean and sum
1236
+ assert not nanops._bn_ok_dtype(np.dtype(any_real_numpy_dtype).type, func)
1237
+
1238
+
1239
+ @pytest.mark.parametrize("val", [2**55, -(2**55), 20150515061816532])
1240
+ def test_nanmean_overflow(disable_bottleneck, val):
1241
+ # GH 10155
1242
+ # In the previous implementation mean can overflow for int dtypes, it
1243
+ # is now consistent with numpy
1244
+
1245
+ ser = Series(val, index=range(500), dtype=np.int64)
1246
+ result = ser.mean()
1247
+ np_result = ser.values.mean()
1248
+ assert result == val
1249
+ assert result == np_result
1250
+ assert result.dtype == np.float64
1251
+
1252
+
1253
+ @pytest.mark.parametrize(
1254
+ "dtype",
1255
+ [
1256
+ np.int16,
1257
+ np.int32,
1258
+ np.int64,
1259
+ np.float32,
1260
+ np.float64,
1261
+ getattr(np, "float128", None),
1262
+ ],
1263
+ )
1264
+ @pytest.mark.parametrize("method", ["mean", "std", "var", "skew", "kurt", "min", "max"])
1265
+ def test_returned_dtype(disable_bottleneck, dtype, method):
1266
+ if dtype is None:
1267
+ pytest.skip("np.float128 not available")
1268
+
1269
+ ser = Series(range(10), dtype=dtype)
1270
+ result = getattr(ser, method)()
1271
+ if is_integer_dtype(dtype) and method not in ["min", "max"]:
1272
+ assert result.dtype == np.float64
1273
+ else:
1274
+ assert result.dtype == dtype
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_optional_dependency.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import types
3
+
4
+ import pytest
5
+
6
+ from pandas.compat._optional import (
7
+ VERSIONS,
8
+ import_optional_dependency,
9
+ )
10
+
11
+ import pandas._testing as tm
12
+
13
+
14
+ def test_import_optional():
15
+ match = "Missing .*notapackage.* pip .* conda .* notapackage"
16
+ with pytest.raises(ImportError, match=match) as exc_info:
17
+ import_optional_dependency("notapackage")
18
+ # The original exception should be there as context:
19
+ assert isinstance(exc_info.value.__context__, ImportError)
20
+
21
+ result = import_optional_dependency("notapackage", errors="ignore")
22
+ assert result is None
23
+
24
+
25
+ def test_xlrd_version_fallback():
26
+ pytest.importorskip("xlrd")
27
+ import_optional_dependency("xlrd")
28
+
29
+
30
+ def test_bad_version(monkeypatch):
31
+ name = "fakemodule"
32
+ module = types.ModuleType(name)
33
+ module.__version__ = "0.9.0"
34
+ sys.modules[name] = module
35
+ monkeypatch.setitem(VERSIONS, name, "1.0.0")
36
+
37
+ match = "Pandas requires .*1.0.0.* of .fakemodule.*'0.9.0'"
38
+ with pytest.raises(ImportError, match=match):
39
+ import_optional_dependency("fakemodule")
40
+
41
+ # Test min_version parameter
42
+ result = import_optional_dependency("fakemodule", min_version="0.8")
43
+ assert result is module
44
+
45
+ with tm.assert_produces_warning(UserWarning):
46
+ result = import_optional_dependency("fakemodule", errors="warn")
47
+ assert result is None
48
+
49
+ module.__version__ = "1.0.0" # exact match is OK
50
+ result = import_optional_dependency("fakemodule")
51
+ assert result is module
52
+
53
+ with pytest.raises(ImportError, match="Pandas requires version '1.1.0'"):
54
+ import_optional_dependency("fakemodule", min_version="1.1.0")
55
+
56
+ with tm.assert_produces_warning(UserWarning):
57
+ result = import_optional_dependency(
58
+ "fakemodule", errors="warn", min_version="1.1.0"
59
+ )
60
+ assert result is None
61
+
62
+ result = import_optional_dependency(
63
+ "fakemodule", errors="ignore", min_version="1.1.0"
64
+ )
65
+ assert result is None
66
+
67
+
68
+ def test_submodule(monkeypatch):
69
+ # Create a fake module with a submodule
70
+ name = "fakemodule"
71
+ module = types.ModuleType(name)
72
+ module.__version__ = "0.9.0"
73
+ sys.modules[name] = module
74
+ sub_name = "submodule"
75
+ submodule = types.ModuleType(sub_name)
76
+ setattr(module, sub_name, submodule)
77
+ sys.modules[f"{name}.{sub_name}"] = submodule
78
+ monkeypatch.setitem(VERSIONS, name, "1.0.0")
79
+
80
+ match = "Pandas requires .*1.0.0.* of .fakemodule.*'0.9.0'"
81
+ with pytest.raises(ImportError, match=match):
82
+ import_optional_dependency("fakemodule.submodule")
83
+
84
+ with tm.assert_produces_warning(UserWarning):
85
+ result = import_optional_dependency("fakemodule.submodule", errors="warn")
86
+ assert result is None
87
+
88
+ module.__version__ = "1.0.0" # exact match is OK
89
+ result = import_optional_dependency("fakemodule.submodule")
90
+ assert result is submodule
91
+
92
+
93
+ def test_no_version_raises(monkeypatch):
94
+ name = "fakemodule"
95
+ module = types.ModuleType(name)
96
+ sys.modules[name] = module
97
+ monkeypatch.setitem(VERSIONS, name, "1.0.0")
98
+
99
+ with pytest.raises(ImportError, match="Can't determine .* fakemodule"):
100
+ import_optional_dependency(name)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_register_accessor.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections.abc import Generator
2
+ import contextlib
3
+
4
+ import pytest
5
+
6
+ import pandas as pd
7
+ import pandas._testing as tm
8
+ from pandas.core import accessor
9
+
10
+
11
+ def test_dirname_mixin() -> None:
12
+ # GH37173
13
+
14
+ class X(accessor.DirNamesMixin):
15
+ x = 1
16
+ y: int
17
+
18
+ def __init__(self) -> None:
19
+ self.z = 3
20
+
21
+ result = [attr_name for attr_name in dir(X()) if not attr_name.startswith("_")]
22
+
23
+ assert result == ["x", "z"]
24
+
25
+
26
+ @contextlib.contextmanager
27
+ def ensure_removed(obj, attr) -> Generator[None, None, None]:
28
+ """Ensure that an attribute added to 'obj' during the test is
29
+ removed when we're done
30
+ """
31
+ try:
32
+ yield
33
+ finally:
34
+ try:
35
+ delattr(obj, attr)
36
+ except AttributeError:
37
+ pass
38
+ obj._accessors.discard(attr)
39
+
40
+
41
+ class MyAccessor:
42
+ def __init__(self, obj) -> None:
43
+ self.obj = obj
44
+ self.item = "item"
45
+
46
+ @property
47
+ def prop(self):
48
+ return self.item
49
+
50
+ def method(self):
51
+ return self.item
52
+
53
+
54
+ @pytest.mark.parametrize(
55
+ "obj, registrar",
56
+ [
57
+ (pd.Series, pd.api.extensions.register_series_accessor),
58
+ (pd.DataFrame, pd.api.extensions.register_dataframe_accessor),
59
+ (pd.Index, pd.api.extensions.register_index_accessor),
60
+ ],
61
+ )
62
+ def test_register(obj, registrar):
63
+ with ensure_removed(obj, "mine"):
64
+ before = set(dir(obj))
65
+ registrar("mine")(MyAccessor)
66
+ o = obj([]) if obj is not pd.Series else obj([], dtype=object)
67
+ assert o.mine.prop == "item"
68
+ after = set(dir(obj))
69
+ assert (before ^ after) == {"mine"}
70
+ assert "mine" in obj._accessors
71
+
72
+
73
+ def test_accessor_works():
74
+ with ensure_removed(pd.Series, "mine"):
75
+ pd.api.extensions.register_series_accessor("mine")(MyAccessor)
76
+
77
+ s = pd.Series([1, 2])
78
+ assert s.mine.obj is s
79
+
80
+ assert s.mine.prop == "item"
81
+ assert s.mine.method() == "item"
82
+
83
+
84
+ def test_overwrite_warns():
85
+ match = r".*MyAccessor.*fake.*Series.*"
86
+ with tm.assert_produces_warning(UserWarning, match=match):
87
+ with ensure_removed(pd.Series, "fake"):
88
+ setattr(pd.Series, "fake", 123)
89
+ pd.api.extensions.register_series_accessor("fake")(MyAccessor)
90
+ s = pd.Series([1, 2])
91
+ assert s.fake.prop == "item"
92
+
93
+
94
+ def test_raises_attribute_error():
95
+ with ensure_removed(pd.Series, "bad"):
96
+
97
+ @pd.api.extensions.register_series_accessor("bad")
98
+ class Bad:
99
+ def __init__(self, data) -> None:
100
+ raise AttributeError("whoops")
101
+
102
+ with pytest.raises(AttributeError, match="whoops"):
103
+ pd.Series([], dtype=object).bad
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_sorting.py ADDED
@@ -0,0 +1,487 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections import defaultdict
2
+ from datetime import datetime
3
+ from itertools import product
4
+
5
+ import numpy as np
6
+ import pytest
7
+
8
+ from pandas import (
9
+ NA,
10
+ DataFrame,
11
+ MultiIndex,
12
+ Series,
13
+ array,
14
+ concat,
15
+ merge,
16
+ )
17
+ import pandas._testing as tm
18
+ from pandas.core.algorithms import safe_sort
19
+ import pandas.core.common as com
20
+ from pandas.core.sorting import (
21
+ _decons_group_index,
22
+ get_group_index,
23
+ is_int64_overflow_possible,
24
+ lexsort_indexer,
25
+ nargsort,
26
+ )
27
+
28
+
29
+ @pytest.fixture
30
+ def left_right():
31
+ low, high, n = -1 << 10, 1 << 10, 1 << 20
32
+ left = DataFrame(
33
+ np.random.default_rng(2).integers(low, high, (n, 7)), columns=list("ABCDEFG")
34
+ )
35
+ left["left"] = left.sum(axis=1)
36
+
37
+ # one-2-one match
38
+ i = np.random.default_rng(2).permutation(len(left))
39
+ right = left.iloc[i].copy()
40
+ right.columns = right.columns[:-1].tolist() + ["right"]
41
+ right.index = np.arange(len(right))
42
+ right["right"] *= -1
43
+ return left, right
44
+
45
+
46
+ class TestSorting:
47
+ @pytest.mark.slow
48
+ def test_int64_overflow(self):
49
+ B = np.concatenate((np.arange(1000), np.arange(1000), np.arange(500)))
50
+ A = np.arange(2500)
51
+ df = DataFrame(
52
+ {
53
+ "A": A,
54
+ "B": B,
55
+ "C": A,
56
+ "D": B,
57
+ "E": A,
58
+ "F": B,
59
+ "G": A,
60
+ "H": B,
61
+ "values": np.random.default_rng(2).standard_normal(2500),
62
+ }
63
+ )
64
+
65
+ lg = df.groupby(["A", "B", "C", "D", "E", "F", "G", "H"])
66
+ rg = df.groupby(["H", "G", "F", "E", "D", "C", "B", "A"])
67
+
68
+ left = lg.sum()["values"]
69
+ right = rg.sum()["values"]
70
+
71
+ exp_index, _ = left.index.sortlevel()
72
+ tm.assert_index_equal(left.index, exp_index)
73
+
74
+ exp_index, _ = right.index.sortlevel(0)
75
+ tm.assert_index_equal(right.index, exp_index)
76
+
77
+ tups = list(map(tuple, df[["A", "B", "C", "D", "E", "F", "G", "H"]].values))
78
+ tups = com.asarray_tuplesafe(tups)
79
+
80
+ expected = df.groupby(tups).sum()["values"]
81
+
82
+ for k, v in expected.items():
83
+ assert left[k] == right[k[::-1]]
84
+ assert left[k] == v
85
+ assert len(left) == len(right)
86
+
87
+ def test_int64_overflow_groupby_large_range(self):
88
+ # GH9096
89
+ values = range(55109)
90
+ data = DataFrame.from_dict({"a": values, "b": values, "c": values, "d": values})
91
+ grouped = data.groupby(["a", "b", "c", "d"])
92
+ assert len(grouped) == len(values)
93
+
94
+ @pytest.mark.parametrize("agg", ["mean", "median"])
95
+ def test_int64_overflow_groupby_large_df_shuffled(self, agg):
96
+ rs = np.random.default_rng(2)
97
+ arr = rs.integers(-1 << 12, 1 << 12, (1 << 15, 5))
98
+ i = rs.choice(len(arr), len(arr) * 4)
99
+ arr = np.vstack((arr, arr[i])) # add some duplicate rows
100
+
101
+ i = rs.permutation(len(arr))
102
+ arr = arr[i] # shuffle rows
103
+
104
+ df = DataFrame(arr, columns=list("abcde"))
105
+ df["jim"], df["joe"] = np.zeros((2, len(df)))
106
+ gr = df.groupby(list("abcde"))
107
+
108
+ # verify this is testing what it is supposed to test!
109
+ assert is_int64_overflow_possible(gr._grouper.shape)
110
+
111
+ mi = MultiIndex.from_arrays(
112
+ [ar.ravel() for ar in np.array_split(np.unique(arr, axis=0), 5, axis=1)],
113
+ names=list("abcde"),
114
+ )
115
+
116
+ res = DataFrame(
117
+ np.zeros((len(mi), 2)), columns=["jim", "joe"], index=mi
118
+ ).sort_index()
119
+
120
+ tm.assert_frame_equal(getattr(gr, agg)(), res)
121
+
122
+ @pytest.mark.parametrize(
123
+ "order, na_position, exp",
124
+ [
125
+ [
126
+ True,
127
+ "last",
128
+ list(range(5, 105)) + list(range(5)) + list(range(105, 110)),
129
+ ],
130
+ [
131
+ True,
132
+ "first",
133
+ list(range(5)) + list(range(105, 110)) + list(range(5, 105)),
134
+ ],
135
+ [
136
+ False,
137
+ "last",
138
+ list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)),
139
+ ],
140
+ [
141
+ False,
142
+ "first",
143
+ list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)),
144
+ ],
145
+ ],
146
+ )
147
+ def test_lexsort_indexer(self, order, na_position, exp):
148
+ keys = [[np.nan] * 5 + list(range(100)) + [np.nan] * 5]
149
+ result = lexsort_indexer(keys, orders=order, na_position=na_position)
150
+ tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp))
151
+
152
+ @pytest.mark.parametrize(
153
+ "ascending, na_position, exp",
154
+ [
155
+ [
156
+ True,
157
+ "last",
158
+ list(range(5, 105)) + list(range(5)) + list(range(105, 110)),
159
+ ],
160
+ [
161
+ True,
162
+ "first",
163
+ list(range(5)) + list(range(105, 110)) + list(range(5, 105)),
164
+ ],
165
+ [
166
+ False,
167
+ "last",
168
+ list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)),
169
+ ],
170
+ [
171
+ False,
172
+ "first",
173
+ list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)),
174
+ ],
175
+ ],
176
+ )
177
+ def test_nargsort(self, ascending, na_position, exp):
178
+ # list places NaNs last, np.array(..., dtype="O") may not place NaNs first
179
+ items = np.array([np.nan] * 5 + list(range(100)) + [np.nan] * 5, dtype="O")
180
+
181
+ # mergesort is the most difficult to get right because we want it to be
182
+ # stable.
183
+
184
+ # According to numpy/core/tests/test_multiarray, """The number of
185
+ # sorted items must be greater than ~50 to check the actual algorithm
186
+ # because quick and merge sort fall over to insertion sort for small
187
+ # arrays."""
188
+
189
+ result = nargsort(
190
+ items, kind="mergesort", ascending=ascending, na_position=na_position
191
+ )
192
+ tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False)
193
+
194
+
195
+ class TestMerge:
196
+ def test_int64_overflow_outer_merge(self):
197
+ # #2690, combinatorial explosion
198
+ df1 = DataFrame(
199
+ np.random.default_rng(2).standard_normal((1000, 7)),
200
+ columns=list("ABCDEF") + ["G1"],
201
+ )
202
+ df2 = DataFrame(
203
+ np.random.default_rng(3).standard_normal((1000, 7)),
204
+ columns=list("ABCDEF") + ["G2"],
205
+ )
206
+ result = merge(df1, df2, how="outer")
207
+ assert len(result) == 2000
208
+
209
+ @pytest.mark.slow
210
+ def test_int64_overflow_check_sum_col(self, left_right):
211
+ left, right = left_right
212
+
213
+ out = merge(left, right, how="outer")
214
+ assert len(out) == len(left)
215
+ tm.assert_series_equal(out["left"], -out["right"], check_names=False)
216
+ result = out.iloc[:, :-2].sum(axis=1)
217
+ tm.assert_series_equal(out["left"], result, check_names=False)
218
+ assert result.name is None
219
+
220
+ @pytest.mark.slow
221
+ @pytest.mark.parametrize("how", ["left", "right", "outer", "inner"])
222
+ def test_int64_overflow_how_merge(self, left_right, how):
223
+ left, right = left_right
224
+
225
+ out = merge(left, right, how="outer")
226
+ out.sort_values(out.columns.tolist(), inplace=True)
227
+ out.index = np.arange(len(out))
228
+ tm.assert_frame_equal(out, merge(left, right, how=how, sort=True))
229
+
230
+ @pytest.mark.slow
231
+ def test_int64_overflow_sort_false_order(self, left_right):
232
+ left, right = left_right
233
+
234
+ # check that left merge w/ sort=False maintains left frame order
235
+ out = merge(left, right, how="left", sort=False)
236
+ tm.assert_frame_equal(left, out[left.columns.tolist()])
237
+
238
+ out = merge(right, left, how="left", sort=False)
239
+ tm.assert_frame_equal(right, out[right.columns.tolist()])
240
+
241
+ @pytest.mark.slow
242
+ @pytest.mark.parametrize("how", ["left", "right", "outer", "inner"])
243
+ @pytest.mark.parametrize("sort", [True, False])
244
+ def test_int64_overflow_one_to_many_none_match(self, how, sort):
245
+ # one-2-many/none match
246
+ low, high, n = -1 << 10, 1 << 10, 1 << 11
247
+ left = DataFrame(
248
+ np.random.default_rng(2).integers(low, high, (n, 7)).astype("int64"),
249
+ columns=list("ABCDEFG"),
250
+ )
251
+
252
+ # confirm that this is checking what it is supposed to check
253
+ shape = left.apply(Series.nunique).values
254
+ assert is_int64_overflow_possible(shape)
255
+
256
+ # add duplicates to left frame
257
+ left = concat([left, left], ignore_index=True)
258
+
259
+ right = DataFrame(
260
+ np.random.default_rng(3).integers(low, high, (n // 2, 7)).astype("int64"),
261
+ columns=list("ABCDEFG"),
262
+ )
263
+
264
+ # add duplicates & overlap with left to the right frame
265
+ i = np.random.default_rng(4).choice(len(left), n)
266
+ right = concat([right, right, left.iloc[i]], ignore_index=True)
267
+
268
+ left["left"] = np.random.default_rng(2).standard_normal(len(left))
269
+ right["right"] = np.random.default_rng(2).standard_normal(len(right))
270
+
271
+ # shuffle left & right frames
272
+ i = np.random.default_rng(5).permutation(len(left))
273
+ left = left.iloc[i].copy()
274
+ left.index = np.arange(len(left))
275
+
276
+ i = np.random.default_rng(6).permutation(len(right))
277
+ right = right.iloc[i].copy()
278
+ right.index = np.arange(len(right))
279
+
280
+ # manually compute outer merge
281
+ ldict, rdict = defaultdict(list), defaultdict(list)
282
+
283
+ for idx, row in left.set_index(list("ABCDEFG")).iterrows():
284
+ ldict[idx].append(row["left"])
285
+
286
+ for idx, row in right.set_index(list("ABCDEFG")).iterrows():
287
+ rdict[idx].append(row["right"])
288
+
289
+ vals = []
290
+ for k, lval in ldict.items():
291
+ rval = rdict.get(k, [np.nan])
292
+ for lv, rv in product(lval, rval):
293
+ vals.append(
294
+ k
295
+ + (
296
+ lv,
297
+ rv,
298
+ )
299
+ )
300
+
301
+ for k, rval in rdict.items():
302
+ if k not in ldict:
303
+ vals.extend(
304
+ k
305
+ + (
306
+ np.nan,
307
+ rv,
308
+ )
309
+ for rv in rval
310
+ )
311
+
312
+ def align(df):
313
+ df = df.sort_values(df.columns.tolist())
314
+ df.index = np.arange(len(df))
315
+ return df
316
+
317
+ out = DataFrame(vals, columns=list("ABCDEFG") + ["left", "right"])
318
+ out = align(out)
319
+
320
+ jmask = {
321
+ "left": out["left"].notna(),
322
+ "right": out["right"].notna(),
323
+ "inner": out["left"].notna() & out["right"].notna(),
324
+ "outer": np.ones(len(out), dtype="bool"),
325
+ }
326
+
327
+ mask = jmask[how]
328
+ frame = align(out[mask].copy())
329
+ assert mask.all() ^ mask.any() or how == "outer"
330
+
331
+ res = merge(left, right, how=how, sort=sort)
332
+ if sort:
333
+ kcols = list("ABCDEFG")
334
+ tm.assert_frame_equal(
335
+ res[kcols].copy(), res[kcols].sort_values(kcols, kind="mergesort")
336
+ )
337
+
338
+ # as in GH9092 dtypes break with outer/right join
339
+ # 2021-12-18: dtype does not break anymore
340
+ tm.assert_frame_equal(frame, align(res))
341
+
342
+
343
+ @pytest.mark.parametrize(
344
+ "codes_list, shape",
345
+ [
346
+ [
347
+ [
348
+ np.tile([0, 1, 2, 3, 0, 1, 2, 3], 100).astype(np.int64),
349
+ np.tile([0, 2, 4, 3, 0, 1, 2, 3], 100).astype(np.int64),
350
+ np.tile([5, 1, 0, 2, 3, 0, 5, 4], 100).astype(np.int64),
351
+ ],
352
+ (4, 5, 6),
353
+ ],
354
+ [
355
+ [
356
+ np.tile(np.arange(10000, dtype=np.int64), 5),
357
+ np.tile(np.arange(10000, dtype=np.int64), 5),
358
+ ],
359
+ (10000, 10000),
360
+ ],
361
+ ],
362
+ )
363
+ def test_decons(codes_list, shape):
364
+ group_index = get_group_index(codes_list, shape, sort=True, xnull=True)
365
+ codes_list2 = _decons_group_index(group_index, shape)
366
+
367
+ for a, b in zip(codes_list, codes_list2):
368
+ tm.assert_numpy_array_equal(a, b)
369
+
370
+
371
+ class TestSafeSort:
372
+ @pytest.mark.parametrize(
373
+ "arg, exp",
374
+ [
375
+ [[3, 1, 2, 0, 4], [0, 1, 2, 3, 4]],
376
+ [
377
+ np.array(list("baaacb"), dtype=object),
378
+ np.array(list("aaabbc"), dtype=object),
379
+ ],
380
+ [[], []],
381
+ ],
382
+ )
383
+ def test_basic_sort(self, arg, exp):
384
+ result = safe_sort(np.array(arg))
385
+ expected = np.array(exp)
386
+ tm.assert_numpy_array_equal(result, expected)
387
+
388
+ @pytest.mark.parametrize("verify", [True, False])
389
+ @pytest.mark.parametrize(
390
+ "codes, exp_codes",
391
+ [
392
+ [[0, 1, 1, 2, 3, 0, -1, 4], [3, 1, 1, 2, 0, 3, -1, 4]],
393
+ [[], []],
394
+ ],
395
+ )
396
+ def test_codes(self, verify, codes, exp_codes):
397
+ values = np.array([3, 1, 2, 0, 4])
398
+ expected = np.array([0, 1, 2, 3, 4])
399
+
400
+ result, result_codes = safe_sort(
401
+ values, codes, use_na_sentinel=True, verify=verify
402
+ )
403
+ expected_codes = np.array(exp_codes, dtype=np.intp)
404
+ tm.assert_numpy_array_equal(result, expected)
405
+ tm.assert_numpy_array_equal(result_codes, expected_codes)
406
+
407
+ def test_codes_out_of_bound(self):
408
+ values = np.array([3, 1, 2, 0, 4])
409
+ expected = np.array([0, 1, 2, 3, 4])
410
+
411
+ # out of bound indices
412
+ codes = [0, 101, 102, 2, 3, 0, 99, 4]
413
+ result, result_codes = safe_sort(values, codes, use_na_sentinel=True)
414
+ expected_codes = np.array([3, -1, -1, 2, 0, 3, -1, 4], dtype=np.intp)
415
+ tm.assert_numpy_array_equal(result, expected)
416
+ tm.assert_numpy_array_equal(result_codes, expected_codes)
417
+
418
+ def test_mixed_integer(self):
419
+ values = np.array(["b", 1, 0, "a", 0, "b"], dtype=object)
420
+ result = safe_sort(values)
421
+ expected = np.array([0, 0, 1, "a", "b", "b"], dtype=object)
422
+ tm.assert_numpy_array_equal(result, expected)
423
+
424
+ def test_mixed_integer_with_codes(self):
425
+ values = np.array(["b", 1, 0, "a"], dtype=object)
426
+ codes = [0, 1, 2, 3, 0, -1, 1]
427
+ result, result_codes = safe_sort(values, codes)
428
+ expected = np.array([0, 1, "a", "b"], dtype=object)
429
+ expected_codes = np.array([3, 1, 0, 2, 3, -1, 1], dtype=np.intp)
430
+ tm.assert_numpy_array_equal(result, expected)
431
+ tm.assert_numpy_array_equal(result_codes, expected_codes)
432
+
433
+ def test_unsortable(self):
434
+ # GH 13714
435
+ arr = np.array([1, 2, datetime.now(), 0, 3], dtype=object)
436
+ msg = "'[<>]' not supported between instances of .*"
437
+ with pytest.raises(TypeError, match=msg):
438
+ safe_sort(arr)
439
+
440
+ @pytest.mark.parametrize(
441
+ "arg, codes, err, msg",
442
+ [
443
+ [1, None, TypeError, "Only np.ndarray, ExtensionArray, and Index"],
444
+ [np.array([0, 1, 2]), 1, TypeError, "Only list-like objects or None"],
445
+ [np.array([0, 1, 2, 1]), [0, 1], ValueError, "values should be unique"],
446
+ ],
447
+ )
448
+ def test_exceptions(self, arg, codes, err, msg):
449
+ with pytest.raises(err, match=msg):
450
+ safe_sort(values=arg, codes=codes)
451
+
452
+ @pytest.mark.parametrize(
453
+ "arg, exp", [[[1, 3, 2], [1, 2, 3]], [[1, 3, np.nan, 2], [1, 2, 3, np.nan]]]
454
+ )
455
+ def test_extension_array(self, arg, exp):
456
+ a = array(arg, dtype="Int64")
457
+ result = safe_sort(a)
458
+ expected = array(exp, dtype="Int64")
459
+ tm.assert_extension_array_equal(result, expected)
460
+
461
+ @pytest.mark.parametrize("verify", [True, False])
462
+ def test_extension_array_codes(self, verify):
463
+ a = array([1, 3, 2], dtype="Int64")
464
+ result, codes = safe_sort(a, [0, 1, -1, 2], use_na_sentinel=True, verify=verify)
465
+ expected_values = array([1, 2, 3], dtype="Int64")
466
+ expected_codes = np.array([0, 2, -1, 1], dtype=np.intp)
467
+ tm.assert_extension_array_equal(result, expected_values)
468
+ tm.assert_numpy_array_equal(codes, expected_codes)
469
+
470
+
471
+ def test_mixed_str_null(nulls_fixture):
472
+ values = np.array(["b", nulls_fixture, "a", "b"], dtype=object)
473
+ result = safe_sort(values)
474
+ expected = np.array(["a", "b", "b", nulls_fixture], dtype=object)
475
+ tm.assert_numpy_array_equal(result, expected)
476
+
477
+
478
+ def test_safe_sort_multiindex():
479
+ # GH#48412
480
+ arr1 = Series([2, 1, NA, NA], dtype="Int64")
481
+ arr2 = [2, 1, 3, 3]
482
+ midx = MultiIndex.from_arrays([arr1, arr2])
483
+ result = safe_sort(midx)
484
+ expected = MultiIndex.from_arrays(
485
+ [Series([1, 2, NA, NA], dtype="Int64"), [1, 2, 3, 3]]
486
+ )
487
+ tm.assert_index_equal(result, expected)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/tests/test_take.py ADDED
@@ -0,0 +1,307 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime
2
+
3
+ import numpy as np
4
+ import pytest
5
+
6
+ from pandas._libs import iNaT
7
+
8
+ import pandas._testing as tm
9
+ import pandas.core.algorithms as algos
10
+
11
+
12
+ @pytest.fixture(
13
+ params=[
14
+ (np.int8, np.int16(127), np.int8),
15
+ (np.int8, np.int16(128), np.int16),
16
+ (np.int32, 1, np.int32),
17
+ (np.int32, 2.0, np.float64),
18
+ (np.int32, 3.0 + 4.0j, np.complex128),
19
+ (np.int32, True, np.object_),
20
+ (np.int32, "", np.object_),
21
+ (np.float64, 1, np.float64),
22
+ (np.float64, 2.0, np.float64),
23
+ (np.float64, 3.0 + 4.0j, np.complex128),
24
+ (np.float64, True, np.object_),
25
+ (np.float64, "", np.object_),
26
+ (np.complex128, 1, np.complex128),
27
+ (np.complex128, 2.0, np.complex128),
28
+ (np.complex128, 3.0 + 4.0j, np.complex128),
29
+ (np.complex128, True, np.object_),
30
+ (np.complex128, "", np.object_),
31
+ (np.bool_, 1, np.object_),
32
+ (np.bool_, 2.0, np.object_),
33
+ (np.bool_, 3.0 + 4.0j, np.object_),
34
+ (np.bool_, True, np.bool_),
35
+ (np.bool_, "", np.object_),
36
+ ]
37
+ )
38
+ def dtype_fill_out_dtype(request):
39
+ return request.param
40
+
41
+
42
+ class TestTake:
43
+ def test_1d_fill_nonna(self, dtype_fill_out_dtype):
44
+ dtype, fill_value, out_dtype = dtype_fill_out_dtype
45
+ data = np.random.default_rng(2).integers(0, 2, 4).astype(dtype)
46
+ indexer = [2, 1, 0, -1]
47
+
48
+ result = algos.take_nd(data, indexer, fill_value=fill_value)
49
+ assert (result[[0, 1, 2]] == data[[2, 1, 0]]).all()
50
+ assert result[3] == fill_value
51
+ assert result.dtype == out_dtype
52
+
53
+ indexer = [2, 1, 0, 1]
54
+
55
+ result = algos.take_nd(data, indexer, fill_value=fill_value)
56
+ assert (result[[0, 1, 2, 3]] == data[indexer]).all()
57
+ assert result.dtype == dtype
58
+
59
+ def test_2d_fill_nonna(self, dtype_fill_out_dtype):
60
+ dtype, fill_value, out_dtype = dtype_fill_out_dtype
61
+ data = np.random.default_rng(2).integers(0, 2, (5, 3)).astype(dtype)
62
+ indexer = [2, 1, 0, -1]
63
+
64
+ result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value)
65
+ assert (result[[0, 1, 2], :] == data[[2, 1, 0], :]).all()
66
+ assert (result[3, :] == fill_value).all()
67
+ assert result.dtype == out_dtype
68
+
69
+ result = algos.take_nd(data, indexer, axis=1, fill_value=fill_value)
70
+ assert (result[:, [0, 1, 2]] == data[:, [2, 1, 0]]).all()
71
+ assert (result[:, 3] == fill_value).all()
72
+ assert result.dtype == out_dtype
73
+
74
+ indexer = [2, 1, 0, 1]
75
+ result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value)
76
+ assert (result[[0, 1, 2, 3], :] == data[indexer, :]).all()
77
+ assert result.dtype == dtype
78
+
79
+ result = algos.take_nd(data, indexer, axis=1, fill_value=fill_value)
80
+ assert (result[:, [0, 1, 2, 3]] == data[:, indexer]).all()
81
+ assert result.dtype == dtype
82
+
83
+ def test_3d_fill_nonna(self, dtype_fill_out_dtype):
84
+ dtype, fill_value, out_dtype = dtype_fill_out_dtype
85
+
86
+ data = np.random.default_rng(2).integers(0, 2, (5, 4, 3)).astype(dtype)
87
+ indexer = [2, 1, 0, -1]
88
+
89
+ result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value)
90
+ assert (result[[0, 1, 2], :, :] == data[[2, 1, 0], :, :]).all()
91
+ assert (result[3, :, :] == fill_value).all()
92
+ assert result.dtype == out_dtype
93
+
94
+ result = algos.take_nd(data, indexer, axis=1, fill_value=fill_value)
95
+ assert (result[:, [0, 1, 2], :] == data[:, [2, 1, 0], :]).all()
96
+ assert (result[:, 3, :] == fill_value).all()
97
+ assert result.dtype == out_dtype
98
+
99
+ result = algos.take_nd(data, indexer, axis=2, fill_value=fill_value)
100
+ assert (result[:, :, [0, 1, 2]] == data[:, :, [2, 1, 0]]).all()
101
+ assert (result[:, :, 3] == fill_value).all()
102
+ assert result.dtype == out_dtype
103
+
104
+ indexer = [2, 1, 0, 1]
105
+ result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value)
106
+ assert (result[[0, 1, 2, 3], :, :] == data[indexer, :, :]).all()
107
+ assert result.dtype == dtype
108
+
109
+ result = algos.take_nd(data, indexer, axis=1, fill_value=fill_value)
110
+ assert (result[:, [0, 1, 2, 3], :] == data[:, indexer, :]).all()
111
+ assert result.dtype == dtype
112
+
113
+ result = algos.take_nd(data, indexer, axis=2, fill_value=fill_value)
114
+ assert (result[:, :, [0, 1, 2, 3]] == data[:, :, indexer]).all()
115
+ assert result.dtype == dtype
116
+
117
+ def test_1d_other_dtypes(self):
118
+ arr = np.random.default_rng(2).standard_normal(10).astype(np.float32)
119
+
120
+ indexer = [1, 2, 3, -1]
121
+ result = algos.take_nd(arr, indexer)
122
+ expected = arr.take(indexer)
123
+ expected[-1] = np.nan
124
+ tm.assert_almost_equal(result, expected)
125
+
126
+ def test_2d_other_dtypes(self):
127
+ arr = np.random.default_rng(2).standard_normal((10, 5)).astype(np.float32)
128
+
129
+ indexer = [1, 2, 3, -1]
130
+
131
+ # axis=0
132
+ result = algos.take_nd(arr, indexer, axis=0)
133
+ expected = arr.take(indexer, axis=0)
134
+ expected[-1] = np.nan
135
+ tm.assert_almost_equal(result, expected)
136
+
137
+ # axis=1
138
+ result = algos.take_nd(arr, indexer, axis=1)
139
+ expected = arr.take(indexer, axis=1)
140
+ expected[:, -1] = np.nan
141
+ tm.assert_almost_equal(result, expected)
142
+
143
+ def test_1d_bool(self):
144
+ arr = np.array([0, 1, 0], dtype=bool)
145
+
146
+ result = algos.take_nd(arr, [0, 2, 2, 1])
147
+ expected = arr.take([0, 2, 2, 1])
148
+ tm.assert_numpy_array_equal(result, expected)
149
+
150
+ result = algos.take_nd(arr, [0, 2, -1])
151
+ assert result.dtype == np.object_
152
+
153
+ def test_2d_bool(self):
154
+ arr = np.array([[0, 1, 0], [1, 0, 1], [0, 1, 1]], dtype=bool)
155
+
156
+ result = algos.take_nd(arr, [0, 2, 2, 1])
157
+ expected = arr.take([0, 2, 2, 1], axis=0)
158
+ tm.assert_numpy_array_equal(result, expected)
159
+
160
+ result = algos.take_nd(arr, [0, 2, 2, 1], axis=1)
161
+ expected = arr.take([0, 2, 2, 1], axis=1)
162
+ tm.assert_numpy_array_equal(result, expected)
163
+
164
+ result = algos.take_nd(arr, [0, 2, -1])
165
+ assert result.dtype == np.object_
166
+
167
+ def test_2d_float32(self):
168
+ arr = np.random.default_rng(2).standard_normal((4, 3)).astype(np.float32)
169
+ indexer = [0, 2, -1, 1, -1]
170
+
171
+ # axis=0
172
+ result = algos.take_nd(arr, indexer, axis=0)
173
+
174
+ expected = arr.take(indexer, axis=0)
175
+ expected[[2, 4], :] = np.nan
176
+ tm.assert_almost_equal(result, expected)
177
+
178
+ # axis=1
179
+ result = algos.take_nd(arr, indexer, axis=1)
180
+ expected = arr.take(indexer, axis=1)
181
+ expected[:, [2, 4]] = np.nan
182
+ tm.assert_almost_equal(result, expected)
183
+
184
+ def test_2d_datetime64(self):
185
+ # 2005/01/01 - 2006/01/01
186
+ arr = (
187
+ np.random.default_rng(2).integers(11_045_376, 11_360_736, (5, 3))
188
+ * 100_000_000_000
189
+ )
190
+ arr = arr.view(dtype="datetime64[ns]")
191
+ indexer = [0, 2, -1, 1, -1]
192
+
193
+ # axis=0
194
+ result = algos.take_nd(arr, indexer, axis=0)
195
+ expected = arr.take(indexer, axis=0)
196
+ expected.view(np.int64)[[2, 4], :] = iNaT
197
+ tm.assert_almost_equal(result, expected)
198
+
199
+ result = algos.take_nd(arr, indexer, axis=0, fill_value=datetime(2007, 1, 1))
200
+ expected = arr.take(indexer, axis=0)
201
+ expected[[2, 4], :] = datetime(2007, 1, 1)
202
+ tm.assert_almost_equal(result, expected)
203
+
204
+ # axis=1
205
+ result = algos.take_nd(arr, indexer, axis=1)
206
+ expected = arr.take(indexer, axis=1)
207
+ expected.view(np.int64)[:, [2, 4]] = iNaT
208
+ tm.assert_almost_equal(result, expected)
209
+
210
+ result = algos.take_nd(arr, indexer, axis=1, fill_value=datetime(2007, 1, 1))
211
+ expected = arr.take(indexer, axis=1)
212
+ expected[:, [2, 4]] = datetime(2007, 1, 1)
213
+ tm.assert_almost_equal(result, expected)
214
+
215
+ def test_take_axis_0(self):
216
+ arr = np.arange(12).reshape(4, 3)
217
+ result = algos.take(arr, [0, -1])
218
+ expected = np.array([[0, 1, 2], [9, 10, 11]])
219
+ tm.assert_numpy_array_equal(result, expected)
220
+
221
+ # allow_fill=True
222
+ result = algos.take(arr, [0, -1], allow_fill=True, fill_value=0)
223
+ expected = np.array([[0, 1, 2], [0, 0, 0]])
224
+ tm.assert_numpy_array_equal(result, expected)
225
+
226
+ def test_take_axis_1(self):
227
+ arr = np.arange(12).reshape(4, 3)
228
+ result = algos.take(arr, [0, -1], axis=1)
229
+ expected = np.array([[0, 2], [3, 5], [6, 8], [9, 11]])
230
+ tm.assert_numpy_array_equal(result, expected)
231
+
232
+ # allow_fill=True
233
+ result = algos.take(arr, [0, -1], axis=1, allow_fill=True, fill_value=0)
234
+ expected = np.array([[0, 0], [3, 0], [6, 0], [9, 0]])
235
+ tm.assert_numpy_array_equal(result, expected)
236
+
237
+ # GH#26976 make sure we validate along the correct axis
238
+ with pytest.raises(IndexError, match="indices are out-of-bounds"):
239
+ algos.take(arr, [0, 3], axis=1, allow_fill=True, fill_value=0)
240
+
241
+ def test_take_non_hashable_fill_value(self):
242
+ arr = np.array([1, 2, 3])
243
+ indexer = np.array([1, -1])
244
+ with pytest.raises(ValueError, match="fill_value must be a scalar"):
245
+ algos.take(arr, indexer, allow_fill=True, fill_value=[1])
246
+
247
+ # with object dtype it is allowed
248
+ arr = np.array([1, 2, 3], dtype=object)
249
+ result = algos.take(arr, indexer, allow_fill=True, fill_value=[1])
250
+ expected = np.array([2, [1]], dtype=object)
251
+ tm.assert_numpy_array_equal(result, expected)
252
+
253
+
254
+ class TestExtensionTake:
255
+ # The take method found in pd.api.extensions
256
+
257
+ def test_bounds_check_large(self):
258
+ arr = np.array([1, 2])
259
+
260
+ msg = "indices are out-of-bounds"
261
+ with pytest.raises(IndexError, match=msg):
262
+ algos.take(arr, [2, 3], allow_fill=True)
263
+
264
+ msg = "index 2 is out of bounds for( axis 0 with)? size 2"
265
+ with pytest.raises(IndexError, match=msg):
266
+ algos.take(arr, [2, 3], allow_fill=False)
267
+
268
+ def test_bounds_check_small(self):
269
+ arr = np.array([1, 2, 3], dtype=np.int64)
270
+ indexer = [0, -1, -2]
271
+
272
+ msg = r"'indices' contains values less than allowed \(-2 < -1\)"
273
+ with pytest.raises(ValueError, match=msg):
274
+ algos.take(arr, indexer, allow_fill=True)
275
+
276
+ result = algos.take(arr, indexer)
277
+ expected = np.array([1, 3, 2], dtype=np.int64)
278
+ tm.assert_numpy_array_equal(result, expected)
279
+
280
+ @pytest.mark.parametrize("allow_fill", [True, False])
281
+ def test_take_empty(self, allow_fill):
282
+ arr = np.array([], dtype=np.int64)
283
+ # empty take is ok
284
+ result = algos.take(arr, [], allow_fill=allow_fill)
285
+ tm.assert_numpy_array_equal(arr, result)
286
+
287
+ msg = "|".join(
288
+ [
289
+ "cannot do a non-empty take from an empty axes.",
290
+ "indices are out-of-bounds",
291
+ ]
292
+ )
293
+ with pytest.raises(IndexError, match=msg):
294
+ algos.take(arr, [0], allow_fill=allow_fill)
295
+
296
+ def test_take_na_empty(self):
297
+ result = algos.take(np.array([]), [-1, -1], allow_fill=True, fill_value=0.0)
298
+ expected = np.array([0.0, 0.0])
299
+ tm.assert_numpy_array_equal(result, expected)
300
+
301
+ def test_take_coerces_list(self):
302
+ arr = [1, 2, 3]
303
+ msg = "take accepting non-standard inputs is deprecated"
304
+ with tm.assert_produces_warning(FutureWarning, match=msg):
305
+ result = algos.take(arr, [0, 0])
306
+ expected = np.array([1, 1])
307
+ tm.assert_numpy_array_equal(result, expected)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/util/__init__.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def __getattr__(key: str):
2
+ # These imports need to be lazy to avoid circular import errors
3
+ if key == "hash_array":
4
+ from pandas.core.util.hashing import hash_array
5
+
6
+ return hash_array
7
+ if key == "hash_pandas_object":
8
+ from pandas.core.util.hashing import hash_pandas_object
9
+
10
+ return hash_pandas_object
11
+ if key == "Appender":
12
+ from pandas.util._decorators import Appender
13
+
14
+ return Appender
15
+ if key == "Substitution":
16
+ from pandas.util._decorators import Substitution
17
+
18
+ return Substitution
19
+
20
+ if key == "cache_readonly":
21
+ from pandas.util._decorators import cache_readonly
22
+
23
+ return cache_readonly
24
+
25
+ raise AttributeError(f"module 'pandas.util' has no attribute '{key}'")
26
+
27
+
28
+ def capitalize_first_letter(s):
29
+ return s[:1].upper() + s[1:]
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/util/_decorators.py ADDED
@@ -0,0 +1,508 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from functools import wraps
4
+ import inspect
5
+ from textwrap import dedent
6
+ from typing import (
7
+ TYPE_CHECKING,
8
+ Any,
9
+ Callable,
10
+ cast,
11
+ )
12
+ import warnings
13
+
14
+ from pandas._libs.properties import cache_readonly
15
+ from pandas._typing import (
16
+ F,
17
+ T,
18
+ )
19
+ from pandas.util._exceptions import find_stack_level
20
+
21
+ if TYPE_CHECKING:
22
+ from collections.abc import Mapping
23
+
24
+
25
+ def deprecate(
26
+ name: str,
27
+ alternative: Callable[..., Any],
28
+ version: str,
29
+ alt_name: str | None = None,
30
+ klass: type[Warning] | None = None,
31
+ stacklevel: int = 2,
32
+ msg: str | None = None,
33
+ ) -> Callable[[F], F]:
34
+ """
35
+ Return a new function that emits a deprecation warning on use.
36
+
37
+ To use this method for a deprecated function, another function
38
+ `alternative` with the same signature must exist. The deprecated
39
+ function will emit a deprecation warning, and in the docstring
40
+ it will contain the deprecation directive with the provided version
41
+ so it can be detected for future removal.
42
+
43
+ Parameters
44
+ ----------
45
+ name : str
46
+ Name of function to deprecate.
47
+ alternative : func
48
+ Function to use instead.
49
+ version : str
50
+ Version of pandas in which the method has been deprecated.
51
+ alt_name : str, optional
52
+ Name to use in preference of alternative.__name__.
53
+ klass : Warning, default FutureWarning
54
+ stacklevel : int, default 2
55
+ msg : str
56
+ The message to display in the warning.
57
+ Default is '{name} is deprecated. Use {alt_name} instead.'
58
+ """
59
+ alt_name = alt_name or alternative.__name__
60
+ klass = klass or FutureWarning
61
+ warning_msg = msg or f"{name} is deprecated, use {alt_name} instead."
62
+
63
+ @wraps(alternative)
64
+ def wrapper(*args, **kwargs) -> Callable[..., Any]:
65
+ warnings.warn(warning_msg, klass, stacklevel=stacklevel)
66
+ return alternative(*args, **kwargs)
67
+
68
+ # adding deprecated directive to the docstring
69
+ msg = msg or f"Use `{alt_name}` instead."
70
+ doc_error_msg = (
71
+ "deprecate needs a correctly formatted docstring in "
72
+ "the target function (should have a one liner short "
73
+ "summary, and opening quotes should be in their own "
74
+ f"line). Found:\n{alternative.__doc__}"
75
+ )
76
+
77
+ # when python is running in optimized mode (i.e. `-OO`), docstrings are
78
+ # removed, so we check that a docstring with correct formatting is used
79
+ # but we allow empty docstrings
80
+ if alternative.__doc__:
81
+ if alternative.__doc__.count("\n") < 3:
82
+ raise AssertionError(doc_error_msg)
83
+ empty1, summary, empty2, doc_string = alternative.__doc__.split("\n", 3)
84
+ if empty1 or empty2 and not summary:
85
+ raise AssertionError(doc_error_msg)
86
+ wrapper.__doc__ = dedent(
87
+ f"""
88
+ {summary.strip()}
89
+
90
+ .. deprecated:: {version}
91
+ {msg}
92
+
93
+ {dedent(doc_string)}"""
94
+ )
95
+ # error: Incompatible return value type (got "Callable[[VarArg(Any), KwArg(Any)],
96
+ # Callable[...,Any]]", expected "Callable[[F], F]")
97
+ return wrapper # type: ignore[return-value]
98
+
99
+
100
+ def deprecate_kwarg(
101
+ old_arg_name: str,
102
+ new_arg_name: str | None,
103
+ mapping: Mapping[Any, Any] | Callable[[Any], Any] | None = None,
104
+ stacklevel: int = 2,
105
+ ) -> Callable[[F], F]:
106
+ """
107
+ Decorator to deprecate a keyword argument of a function.
108
+
109
+ Parameters
110
+ ----------
111
+ old_arg_name : str
112
+ Name of argument in function to deprecate
113
+ new_arg_name : str or None
114
+ Name of preferred argument in function. Use None to raise warning that
115
+ ``old_arg_name`` keyword is deprecated.
116
+ mapping : dict or callable
117
+ If mapping is present, use it to translate old arguments to
118
+ new arguments. A callable must do its own value checking;
119
+ values not found in a dict will be forwarded unchanged.
120
+
121
+ Examples
122
+ --------
123
+ The following deprecates 'cols', using 'columns' instead
124
+
125
+ >>> @deprecate_kwarg(old_arg_name='cols', new_arg_name='columns')
126
+ ... def f(columns=''):
127
+ ... print(columns)
128
+ ...
129
+ >>> f(columns='should work ok')
130
+ should work ok
131
+
132
+ >>> f(cols='should raise warning') # doctest: +SKIP
133
+ FutureWarning: cols is deprecated, use columns instead
134
+ warnings.warn(msg, FutureWarning)
135
+ should raise warning
136
+
137
+ >>> f(cols='should error', columns="can\'t pass do both") # doctest: +SKIP
138
+ TypeError: Can only specify 'cols' or 'columns', not both
139
+
140
+ >>> @deprecate_kwarg('old', 'new', {'yes': True, 'no': False})
141
+ ... def f(new=False):
142
+ ... print('yes!' if new else 'no!')
143
+ ...
144
+ >>> f(old='yes') # doctest: +SKIP
145
+ FutureWarning: old='yes' is deprecated, use new=True instead
146
+ warnings.warn(msg, FutureWarning)
147
+ yes!
148
+
149
+ To raise a warning that a keyword will be removed entirely in the future
150
+
151
+ >>> @deprecate_kwarg(old_arg_name='cols', new_arg_name=None)
152
+ ... def f(cols='', another_param=''):
153
+ ... print(cols)
154
+ ...
155
+ >>> f(cols='should raise warning') # doctest: +SKIP
156
+ FutureWarning: the 'cols' keyword is deprecated and will be removed in a
157
+ future version please takes steps to stop use of 'cols'
158
+ should raise warning
159
+ >>> f(another_param='should not raise warning') # doctest: +SKIP
160
+ should not raise warning
161
+
162
+ >>> f(cols='should raise warning', another_param='') # doctest: +SKIP
163
+ FutureWarning: the 'cols' keyword is deprecated and will be removed in a
164
+ future version please takes steps to stop use of 'cols'
165
+ should raise warning
166
+ """
167
+ if mapping is not None and not hasattr(mapping, "get") and not callable(mapping):
168
+ raise TypeError(
169
+ "mapping from old to new argument values must be dict or callable!"
170
+ )
171
+
172
+ def _deprecate_kwarg(func: F) -> F:
173
+ @wraps(func)
174
+ def wrapper(*args, **kwargs) -> Callable[..., Any]:
175
+ old_arg_value = kwargs.pop(old_arg_name, None)
176
+
177
+ if old_arg_value is not None:
178
+ if new_arg_name is None:
179
+ msg = (
180
+ f"the {repr(old_arg_name)} keyword is deprecated and "
181
+ "will be removed in a future version. Please take "
182
+ f"steps to stop the use of {repr(old_arg_name)}"
183
+ )
184
+ warnings.warn(msg, FutureWarning, stacklevel=stacklevel)
185
+ kwargs[old_arg_name] = old_arg_value
186
+ return func(*args, **kwargs)
187
+
188
+ elif mapping is not None:
189
+ if callable(mapping):
190
+ new_arg_value = mapping(old_arg_value)
191
+ else:
192
+ new_arg_value = mapping.get(old_arg_value, old_arg_value)
193
+ msg = (
194
+ f"the {old_arg_name}={repr(old_arg_value)} keyword is "
195
+ "deprecated, use "
196
+ f"{new_arg_name}={repr(new_arg_value)} instead."
197
+ )
198
+ else:
199
+ new_arg_value = old_arg_value
200
+ msg = (
201
+ f"the {repr(old_arg_name)} keyword is deprecated, "
202
+ f"use {repr(new_arg_name)} instead."
203
+ )
204
+
205
+ warnings.warn(msg, FutureWarning, stacklevel=stacklevel)
206
+ if kwargs.get(new_arg_name) is not None:
207
+ msg = (
208
+ f"Can only specify {repr(old_arg_name)} "
209
+ f"or {repr(new_arg_name)}, not both."
210
+ )
211
+ raise TypeError(msg)
212
+ kwargs[new_arg_name] = new_arg_value
213
+ return func(*args, **kwargs)
214
+
215
+ return cast(F, wrapper)
216
+
217
+ return _deprecate_kwarg
218
+
219
+
220
+ def _format_argument_list(allow_args: list[str]) -> str:
221
+ """
222
+ Convert the allow_args argument (either string or integer) of
223
+ `deprecate_nonkeyword_arguments` function to a string describing
224
+ it to be inserted into warning message.
225
+
226
+ Parameters
227
+ ----------
228
+ allowed_args : list, tuple or int
229
+ The `allowed_args` argument for `deprecate_nonkeyword_arguments`,
230
+ but None value is not allowed.
231
+
232
+ Returns
233
+ -------
234
+ str
235
+ The substring describing the argument list in best way to be
236
+ inserted to the warning message.
237
+
238
+ Examples
239
+ --------
240
+ `format_argument_list([])` -> ''
241
+ `format_argument_list(['a'])` -> "except for the arguments 'a'"
242
+ `format_argument_list(['a', 'b'])` -> "except for the arguments 'a' and 'b'"
243
+ `format_argument_list(['a', 'b', 'c'])` ->
244
+ "except for the arguments 'a', 'b' and 'c'"
245
+ """
246
+ if "self" in allow_args:
247
+ allow_args.remove("self")
248
+ if not allow_args:
249
+ return ""
250
+ elif len(allow_args) == 1:
251
+ return f" except for the argument '{allow_args[0]}'"
252
+ else:
253
+ last = allow_args[-1]
254
+ args = ", ".join(["'" + x + "'" for x in allow_args[:-1]])
255
+ return f" except for the arguments {args} and '{last}'"
256
+
257
+
258
+ def future_version_msg(version: str | None) -> str:
259
+ """Specify which version of pandas the deprecation will take place in."""
260
+ if version is None:
261
+ return "In a future version of pandas"
262
+ else:
263
+ return f"Starting with pandas version {version}"
264
+
265
+
266
+ def deprecate_nonkeyword_arguments(
267
+ version: str | None,
268
+ allowed_args: list[str] | None = None,
269
+ name: str | None = None,
270
+ ) -> Callable[[F], F]:
271
+ """
272
+ Decorator to deprecate a use of non-keyword arguments of a function.
273
+
274
+ Parameters
275
+ ----------
276
+ version : str, optional
277
+ The version in which positional arguments will become
278
+ keyword-only. If None, then the warning message won't
279
+ specify any particular version.
280
+
281
+ allowed_args : list, optional
282
+ In case of list, it must be the list of names of some
283
+ first arguments of the decorated functions that are
284
+ OK to be given as positional arguments. In case of None value,
285
+ defaults to list of all arguments not having the
286
+ default value.
287
+
288
+ name : str, optional
289
+ The specific name of the function to show in the warning
290
+ message. If None, then the Qualified name of the function
291
+ is used.
292
+ """
293
+
294
+ def decorate(func):
295
+ old_sig = inspect.signature(func)
296
+
297
+ if allowed_args is not None:
298
+ allow_args = allowed_args
299
+ else:
300
+ allow_args = [
301
+ p.name
302
+ for p in old_sig.parameters.values()
303
+ if p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD)
304
+ and p.default is p.empty
305
+ ]
306
+
307
+ new_params = [
308
+ p.replace(kind=p.KEYWORD_ONLY)
309
+ if (
310
+ p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD)
311
+ and p.name not in allow_args
312
+ )
313
+ else p
314
+ for p in old_sig.parameters.values()
315
+ ]
316
+ new_params.sort(key=lambda p: p.kind)
317
+ new_sig = old_sig.replace(parameters=new_params)
318
+
319
+ num_allow_args = len(allow_args)
320
+ msg = (
321
+ f"{future_version_msg(version)} all arguments of "
322
+ f"{name or func.__qualname__}{{arguments}} will be keyword-only."
323
+ )
324
+
325
+ @wraps(func)
326
+ def wrapper(*args, **kwargs):
327
+ if len(args) > num_allow_args:
328
+ warnings.warn(
329
+ msg.format(arguments=_format_argument_list(allow_args)),
330
+ FutureWarning,
331
+ stacklevel=find_stack_level(),
332
+ )
333
+ return func(*args, **kwargs)
334
+
335
+ # error: "Callable[[VarArg(Any), KwArg(Any)], Any]" has no
336
+ # attribute "__signature__"
337
+ wrapper.__signature__ = new_sig # type: ignore[attr-defined]
338
+ return wrapper
339
+
340
+ return decorate
341
+
342
+
343
+ def doc(*docstrings: None | str | Callable, **params) -> Callable[[F], F]:
344
+ """
345
+ A decorator to take docstring templates, concatenate them and perform string
346
+ substitution on them.
347
+
348
+ This decorator will add a variable "_docstring_components" to the wrapped
349
+ callable to keep track the original docstring template for potential usage.
350
+ If it should be consider as a template, it will be saved as a string.
351
+ Otherwise, it will be saved as callable, and later user __doc__ and dedent
352
+ to get docstring.
353
+
354
+ Parameters
355
+ ----------
356
+ *docstrings : None, str, or callable
357
+ The string / docstring / docstring template to be appended in order
358
+ after default docstring under callable.
359
+ **params
360
+ The string which would be used to format docstring template.
361
+ """
362
+
363
+ def decorator(decorated: F) -> F:
364
+ # collecting docstring and docstring templates
365
+ docstring_components: list[str | Callable] = []
366
+ if decorated.__doc__:
367
+ docstring_components.append(dedent(decorated.__doc__))
368
+
369
+ for docstring in docstrings:
370
+ if docstring is None:
371
+ continue
372
+ if hasattr(docstring, "_docstring_components"):
373
+ docstring_components.extend(
374
+ docstring._docstring_components # pyright: ignore[reportGeneralTypeIssues]
375
+ )
376
+ elif isinstance(docstring, str) or docstring.__doc__:
377
+ docstring_components.append(docstring)
378
+
379
+ params_applied = [
380
+ component.format(**params)
381
+ if isinstance(component, str) and len(params) > 0
382
+ else component
383
+ for component in docstring_components
384
+ ]
385
+
386
+ decorated.__doc__ = "".join(
387
+ [
388
+ component
389
+ if isinstance(component, str)
390
+ else dedent(component.__doc__ or "")
391
+ for component in params_applied
392
+ ]
393
+ )
394
+
395
+ # error: "F" has no attribute "_docstring_components"
396
+ decorated._docstring_components = ( # type: ignore[attr-defined]
397
+ docstring_components
398
+ )
399
+ return decorated
400
+
401
+ return decorator
402
+
403
+
404
+ # Substitution and Appender are derived from matplotlib.docstring (1.1.0)
405
+ # module https://matplotlib.org/users/license.html
406
+
407
+
408
+ class Substitution:
409
+ """
410
+ A decorator to take a function's docstring and perform string
411
+ substitution on it.
412
+
413
+ This decorator should be robust even if func.__doc__ is None
414
+ (for example, if -OO was passed to the interpreter)
415
+
416
+ Usage: construct a docstring.Substitution with a sequence or
417
+ dictionary suitable for performing substitution; then
418
+ decorate a suitable function with the constructed object. e.g.
419
+
420
+ sub_author_name = Substitution(author='Jason')
421
+
422
+ @sub_author_name
423
+ def some_function(x):
424
+ "%(author)s wrote this function"
425
+
426
+ # note that some_function.__doc__ is now "Jason wrote this function"
427
+
428
+ One can also use positional arguments.
429
+
430
+ sub_first_last_names = Substitution('Edgar Allen', 'Poe')
431
+
432
+ @sub_first_last_names
433
+ def some_function(x):
434
+ "%s %s wrote the Raven"
435
+ """
436
+
437
+ def __init__(self, *args, **kwargs) -> None:
438
+ if args and kwargs:
439
+ raise AssertionError("Only positional or keyword args are allowed")
440
+
441
+ self.params = args or kwargs
442
+
443
+ def __call__(self, func: F) -> F:
444
+ func.__doc__ = func.__doc__ and func.__doc__ % self.params
445
+ return func
446
+
447
+ def update(self, *args, **kwargs) -> None:
448
+ """
449
+ Update self.params with supplied args.
450
+ """
451
+ if isinstance(self.params, dict):
452
+ self.params.update(*args, **kwargs)
453
+
454
+
455
+ class Appender:
456
+ """
457
+ A function decorator that will append an addendum to the docstring
458
+ of the target function.
459
+
460
+ This decorator should be robust even if func.__doc__ is None
461
+ (for example, if -OO was passed to the interpreter).
462
+
463
+ Usage: construct a docstring.Appender with a string to be joined to
464
+ the original docstring. An optional 'join' parameter may be supplied
465
+ which will be used to join the docstring and addendum. e.g.
466
+
467
+ add_copyright = Appender("Copyright (c) 2009", join='\n')
468
+
469
+ @add_copyright
470
+ def my_dog(has='fleas'):
471
+ "This docstring will have a copyright below"
472
+ pass
473
+ """
474
+
475
+ addendum: str | None
476
+
477
+ def __init__(self, addendum: str | None, join: str = "", indents: int = 0) -> None:
478
+ if indents > 0:
479
+ self.addendum = indent(addendum, indents=indents)
480
+ else:
481
+ self.addendum = addendum
482
+ self.join = join
483
+
484
+ def __call__(self, func: T) -> T:
485
+ func.__doc__ = func.__doc__ if func.__doc__ else ""
486
+ self.addendum = self.addendum if self.addendum else ""
487
+ docitems = [func.__doc__, self.addendum]
488
+ func.__doc__ = dedent(self.join.join(docitems))
489
+ return func
490
+
491
+
492
+ def indent(text: str | None, indents: int = 1) -> str:
493
+ if not text or not isinstance(text, str):
494
+ return ""
495
+ jointext = "".join(["\n"] + [" "] * indents)
496
+ return jointext.join(text.split("\n"))
497
+
498
+
499
+ __all__ = [
500
+ "Appender",
501
+ "cache_readonly",
502
+ "deprecate",
503
+ "deprecate_kwarg",
504
+ "deprecate_nonkeyword_arguments",
505
+ "doc",
506
+ "future_version_msg",
507
+ "Substitution",
508
+ ]
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/util/_doctools.py ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+
5
+ import numpy as np
6
+
7
+ import pandas as pd
8
+
9
+ if TYPE_CHECKING:
10
+ from collections.abc import Iterable
11
+
12
+
13
+ class TablePlotter:
14
+ """
15
+ Layout some DataFrames in vertical/horizontal layout for explanation.
16
+ Used in merging.rst
17
+ """
18
+
19
+ def __init__(
20
+ self,
21
+ cell_width: float = 0.37,
22
+ cell_height: float = 0.25,
23
+ font_size: float = 7.5,
24
+ ) -> None:
25
+ self.cell_width = cell_width
26
+ self.cell_height = cell_height
27
+ self.font_size = font_size
28
+
29
+ def _shape(self, df: pd.DataFrame) -> tuple[int, int]:
30
+ """
31
+ Calculate table shape considering index levels.
32
+ """
33
+ row, col = df.shape
34
+ return row + df.columns.nlevels, col + df.index.nlevels
35
+
36
+ def _get_cells(self, left, right, vertical) -> tuple[int, int]:
37
+ """
38
+ Calculate appropriate figure size based on left and right data.
39
+ """
40
+ if vertical:
41
+ # calculate required number of cells
42
+ vcells = max(sum(self._shape(df)[0] for df in left), self._shape(right)[0])
43
+ hcells = max(self._shape(df)[1] for df in left) + self._shape(right)[1]
44
+ else:
45
+ vcells = max([self._shape(df)[0] for df in left] + [self._shape(right)[0]])
46
+ hcells = sum([self._shape(df)[1] for df in left] + [self._shape(right)[1]])
47
+ return hcells, vcells
48
+
49
+ def plot(self, left, right, labels: Iterable[str] = (), vertical: bool = True):
50
+ """
51
+ Plot left / right DataFrames in specified layout.
52
+
53
+ Parameters
54
+ ----------
55
+ left : list of DataFrames before operation is applied
56
+ right : DataFrame of operation result
57
+ labels : list of str to be drawn as titles of left DataFrames
58
+ vertical : bool, default True
59
+ If True, use vertical layout. If False, use horizontal layout.
60
+ """
61
+ from matplotlib import gridspec
62
+ import matplotlib.pyplot as plt
63
+
64
+ if not isinstance(left, list):
65
+ left = [left]
66
+ left = [self._conv(df) for df in left]
67
+ right = self._conv(right)
68
+
69
+ hcells, vcells = self._get_cells(left, right, vertical)
70
+
71
+ if vertical:
72
+ figsize = self.cell_width * hcells, self.cell_height * vcells
73
+ else:
74
+ # include margin for titles
75
+ figsize = self.cell_width * hcells, self.cell_height * vcells
76
+ fig = plt.figure(figsize=figsize)
77
+
78
+ if vertical:
79
+ gs = gridspec.GridSpec(len(left), hcells)
80
+ # left
81
+ max_left_cols = max(self._shape(df)[1] for df in left)
82
+ max_left_rows = max(self._shape(df)[0] for df in left)
83
+ for i, (_left, _label) in enumerate(zip(left, labels)):
84
+ ax = fig.add_subplot(gs[i, 0:max_left_cols])
85
+ self._make_table(ax, _left, title=_label, height=1.0 / max_left_rows)
86
+ # right
87
+ ax = plt.subplot(gs[:, max_left_cols:])
88
+ self._make_table(ax, right, title="Result", height=1.05 / vcells)
89
+ fig.subplots_adjust(top=0.9, bottom=0.05, left=0.05, right=0.95)
90
+ else:
91
+ max_rows = max(self._shape(df)[0] for df in left + [right])
92
+ height = 1.0 / np.max(max_rows)
93
+ gs = gridspec.GridSpec(1, hcells)
94
+ # left
95
+ i = 0
96
+ for df, _label in zip(left, labels):
97
+ sp = self._shape(df)
98
+ ax = fig.add_subplot(gs[0, i : i + sp[1]])
99
+ self._make_table(ax, df, title=_label, height=height)
100
+ i += sp[1]
101
+ # right
102
+ ax = plt.subplot(gs[0, i:])
103
+ self._make_table(ax, right, title="Result", height=height)
104
+ fig.subplots_adjust(top=0.85, bottom=0.05, left=0.05, right=0.95)
105
+
106
+ return fig
107
+
108
+ def _conv(self, data):
109
+ """
110
+ Convert each input to appropriate for table outplot.
111
+ """
112
+ if isinstance(data, pd.Series):
113
+ if data.name is None:
114
+ data = data.to_frame(name="")
115
+ else:
116
+ data = data.to_frame()
117
+ data = data.fillna("NaN")
118
+ return data
119
+
120
+ def _insert_index(self, data):
121
+ # insert is destructive
122
+ data = data.copy()
123
+ idx_nlevels = data.index.nlevels
124
+ if idx_nlevels == 1:
125
+ data.insert(0, "Index", data.index)
126
+ else:
127
+ for i in range(idx_nlevels):
128
+ data.insert(i, f"Index{i}", data.index._get_level_values(i))
129
+
130
+ col_nlevels = data.columns.nlevels
131
+ if col_nlevels > 1:
132
+ col = data.columns._get_level_values(0)
133
+ values = [
134
+ data.columns._get_level_values(i)._values for i in range(1, col_nlevels)
135
+ ]
136
+ col_df = pd.DataFrame(values)
137
+ data.columns = col_df.columns
138
+ data = pd.concat([col_df, data])
139
+ data.columns = col
140
+ return data
141
+
142
+ def _make_table(self, ax, df, title: str, height: float | None = None) -> None:
143
+ if df is None:
144
+ ax.set_visible(False)
145
+ return
146
+
147
+ from pandas import plotting
148
+
149
+ idx_nlevels = df.index.nlevels
150
+ col_nlevels = df.columns.nlevels
151
+ # must be convert here to get index levels for colorization
152
+ df = self._insert_index(df)
153
+ tb = plotting.table(ax, df, loc=9)
154
+ tb.set_fontsize(self.font_size)
155
+
156
+ if height is None:
157
+ height = 1.0 / (len(df) + 1)
158
+
159
+ props = tb.properties()
160
+ for (r, c), cell in props["celld"].items():
161
+ if c == -1:
162
+ cell.set_visible(False)
163
+ elif r < col_nlevels and c < idx_nlevels:
164
+ cell.set_visible(False)
165
+ elif r < col_nlevels or c < idx_nlevels:
166
+ cell.set_facecolor("#AAAAAA")
167
+ cell.set_height(height)
168
+
169
+ ax.set_title(title, size=self.font_size)
170
+ ax.axis("off")
171
+
172
+
173
+ def main() -> None:
174
+ import matplotlib.pyplot as plt
175
+
176
+ p = TablePlotter()
177
+
178
+ df1 = pd.DataFrame({"A": [10, 11, 12], "B": [20, 21, 22], "C": [30, 31, 32]})
179
+ df2 = pd.DataFrame({"A": [10, 12], "C": [30, 32]})
180
+
181
+ p.plot([df1, df2], pd.concat([df1, df2]), labels=["df1", "df2"], vertical=True)
182
+ plt.show()
183
+
184
+ df3 = pd.DataFrame({"X": [10, 12], "Z": [30, 32]})
185
+
186
+ p.plot(
187
+ [df1, df3], pd.concat([df1, df3], axis=1), labels=["df1", "df2"], vertical=False
188
+ )
189
+ plt.show()
190
+
191
+ idx = pd.MultiIndex.from_tuples(
192
+ [(1, "A"), (1, "B"), (1, "C"), (2, "A"), (2, "B"), (2, "C")]
193
+ )
194
+ column = pd.MultiIndex.from_tuples([(1, "A"), (1, "B")])
195
+ df3 = pd.DataFrame({"v1": [1, 2, 3, 4, 5, 6], "v2": [5, 6, 7, 8, 9, 10]}, index=idx)
196
+ df3.columns = column
197
+ p.plot(df3, df3, labels=["df3"])
198
+ plt.show()
199
+
200
+
201
+ if __name__ == "__main__":
202
+ main()
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/util/_exceptions.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import contextlib
4
+ import inspect
5
+ import os
6
+ import re
7
+ from typing import TYPE_CHECKING
8
+ import warnings
9
+
10
+ if TYPE_CHECKING:
11
+ from collections.abc import Generator
12
+ from types import FrameType
13
+
14
+
15
+ @contextlib.contextmanager
16
+ def rewrite_exception(old_name: str, new_name: str) -> Generator[None, None, None]:
17
+ """
18
+ Rewrite the message of an exception.
19
+ """
20
+ try:
21
+ yield
22
+ except Exception as err:
23
+ if not err.args:
24
+ raise
25
+ msg = str(err.args[0])
26
+ msg = msg.replace(old_name, new_name)
27
+ args: tuple[str, ...] = (msg,)
28
+ if len(err.args) > 1:
29
+ args = args + err.args[1:]
30
+ err.args = args
31
+ raise
32
+
33
+
34
+ def find_stack_level() -> int:
35
+ """
36
+ Find the first place in the stack that is not inside pandas
37
+ (tests notwithstanding).
38
+ """
39
+
40
+ import pandas as pd
41
+
42
+ pkg_dir = os.path.dirname(pd.__file__)
43
+ test_dir = os.path.join(pkg_dir, "tests")
44
+
45
+ # https://stackoverflow.com/questions/17407119/python-inspect-stack-is-slow
46
+ frame: FrameType | None = inspect.currentframe()
47
+ try:
48
+ n = 0
49
+ while frame:
50
+ filename = inspect.getfile(frame)
51
+ if filename.startswith(pkg_dir) and not filename.startswith(test_dir):
52
+ frame = frame.f_back
53
+ n += 1
54
+ else:
55
+ break
56
+ finally:
57
+ # See note in
58
+ # https://docs.python.org/3/library/inspect.html#inspect.Traceback
59
+ del frame
60
+ return n
61
+
62
+
63
+ @contextlib.contextmanager
64
+ def rewrite_warning(
65
+ target_message: str,
66
+ target_category: type[Warning],
67
+ new_message: str,
68
+ new_category: type[Warning] | None = None,
69
+ ) -> Generator[None, None, None]:
70
+ """
71
+ Rewrite the message of a warning.
72
+
73
+ Parameters
74
+ ----------
75
+ target_message : str
76
+ Warning message to match.
77
+ target_category : Warning
78
+ Warning type to match.
79
+ new_message : str
80
+ New warning message to emit.
81
+ new_category : Warning or None, default None
82
+ New warning type to emit. When None, will be the same as target_category.
83
+ """
84
+ if new_category is None:
85
+ new_category = target_category
86
+ with warnings.catch_warnings(record=True) as record:
87
+ yield
88
+ if len(record) > 0:
89
+ match = re.compile(target_message)
90
+ for warning in record:
91
+ if warning.category is target_category and re.search(
92
+ match, str(warning.message)
93
+ ):
94
+ category = new_category
95
+ message: Warning | str = new_message
96
+ else:
97
+ category, message = warning.category, warning.message
98
+ warnings.warn_explicit(
99
+ message=message,
100
+ category=category,
101
+ filename=warning.filename,
102
+ lineno=warning.lineno,
103
+ )
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/util/_print_versions.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import codecs
4
+ import json
5
+ import locale
6
+ import os
7
+ import platform
8
+ import struct
9
+ import sys
10
+ from typing import TYPE_CHECKING
11
+
12
+ if TYPE_CHECKING:
13
+ from pandas._typing import JSONSerializable
14
+
15
+ from pandas.compat._optional import (
16
+ VERSIONS,
17
+ get_version,
18
+ import_optional_dependency,
19
+ )
20
+
21
+
22
+ def _get_commit_hash() -> str | None:
23
+ """
24
+ Use vendored versioneer code to get git hash, which handles
25
+ git worktree correctly.
26
+ """
27
+ try:
28
+ from pandas._version_meson import ( # pyright: ignore [reportMissingImports]
29
+ __git_version__,
30
+ )
31
+
32
+ return __git_version__
33
+ except ImportError:
34
+ from pandas._version import get_versions
35
+
36
+ versions = get_versions()
37
+ return versions["full-revisionid"]
38
+
39
+
40
+ def _get_sys_info() -> dict[str, JSONSerializable]:
41
+ """
42
+ Returns system information as a JSON serializable dictionary.
43
+ """
44
+ uname_result = platform.uname()
45
+ language_code, encoding = locale.getlocale()
46
+ return {
47
+ "commit": _get_commit_hash(),
48
+ "python": platform.python_version(),
49
+ "python-bits": struct.calcsize("P") * 8,
50
+ "OS": uname_result.system,
51
+ "OS-release": uname_result.release,
52
+ "Version": uname_result.version,
53
+ "machine": uname_result.machine,
54
+ "processor": uname_result.processor,
55
+ "byteorder": sys.byteorder,
56
+ "LC_ALL": os.environ.get("LC_ALL"),
57
+ "LANG": os.environ.get("LANG"),
58
+ "LOCALE": {"language-code": language_code, "encoding": encoding},
59
+ }
60
+
61
+
62
+ def _get_dependency_info() -> dict[str, JSONSerializable]:
63
+ """
64
+ Returns dependency information as a JSON serializable dictionary.
65
+ """
66
+ deps = [
67
+ "pandas",
68
+ # required
69
+ "numpy",
70
+ "pytz",
71
+ "dateutil",
72
+ # install / build,
73
+ "pip",
74
+ "Cython",
75
+ # docs
76
+ "sphinx",
77
+ # Other, not imported.
78
+ "IPython",
79
+ ]
80
+ # Optional dependencies
81
+ deps.extend(list(VERSIONS))
82
+
83
+ result: dict[str, JSONSerializable] = {}
84
+ for modname in deps:
85
+ try:
86
+ mod = import_optional_dependency(modname, errors="ignore")
87
+ except Exception:
88
+ # Dependency conflicts may cause a non ImportError
89
+ result[modname] = "N/A"
90
+ else:
91
+ result[modname] = get_version(mod) if mod else None
92
+ return result
93
+
94
+
95
+ def show_versions(as_json: str | bool = False) -> None:
96
+ """
97
+ Provide useful information, important for bug reports.
98
+
99
+ It comprises info about hosting operation system, pandas version,
100
+ and versions of other installed relative packages.
101
+
102
+ Parameters
103
+ ----------
104
+ as_json : str or bool, default False
105
+ * If False, outputs info in a human readable form to the console.
106
+ * If str, it will be considered as a path to a file.
107
+ Info will be written to that file in JSON format.
108
+ * If True, outputs info in JSON format to the console.
109
+
110
+ Examples
111
+ --------
112
+ >>> pd.show_versions() # doctest: +SKIP
113
+ Your output may look something like this:
114
+ INSTALLED VERSIONS
115
+ ------------------
116
+ commit : 37ea63d540fd27274cad6585082c91b1283f963d
117
+ python : 3.10.6.final.0
118
+ python-bits : 64
119
+ OS : Linux
120
+ OS-release : 5.10.102.1-microsoft-standard-WSL2
121
+ Version : #1 SMP Wed Mar 2 00:30:59 UTC 2022
122
+ machine : x86_64
123
+ processor : x86_64
124
+ byteorder : little
125
+ LC_ALL : None
126
+ LANG : en_GB.UTF-8
127
+ LOCALE : en_GB.UTF-8
128
+ pandas : 2.0.1
129
+ numpy : 1.24.3
130
+ ...
131
+ """
132
+ sys_info = _get_sys_info()
133
+ deps = _get_dependency_info()
134
+
135
+ if as_json:
136
+ j = {"system": sys_info, "dependencies": deps}
137
+
138
+ if as_json is True:
139
+ sys.stdout.writelines(json.dumps(j, indent=2))
140
+ else:
141
+ assert isinstance(as_json, str) # needed for mypy
142
+ with codecs.open(as_json, "wb", encoding="utf8") as f:
143
+ json.dump(j, f, indent=2)
144
+
145
+ else:
146
+ assert isinstance(sys_info["LOCALE"], dict) # needed for mypy
147
+ language_code = sys_info["LOCALE"]["language-code"]
148
+ encoding = sys_info["LOCALE"]["encoding"]
149
+ sys_info["LOCALE"] = f"{language_code}.{encoding}"
150
+
151
+ maxlen = max(len(x) for x in deps)
152
+ print("\nINSTALLED VERSIONS")
153
+ print("------------------")
154
+ for k, v in sys_info.items():
155
+ print(f"{k:<{maxlen}}: {v}")
156
+ print("")
157
+ for k, v in deps.items():
158
+ print(f"{k:<{maxlen}}: {v}")
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/util/_test_decorators.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This module provides decorator functions which can be applied to test objects
3
+ in order to skip those objects when certain conditions occur. A sample use case
4
+ is to detect if the platform is missing ``matplotlib``. If so, any test objects
5
+ which require ``matplotlib`` and decorated with ``@td.skip_if_no("matplotlib")``
6
+ will be skipped by ``pytest`` during the execution of the test suite.
7
+
8
+ To illustrate, after importing this module:
9
+
10
+ import pandas.util._test_decorators as td
11
+
12
+ The decorators can be applied to classes:
13
+
14
+ @td.skip_if_no("package")
15
+ class Foo:
16
+ ...
17
+
18
+ Or individual functions:
19
+
20
+ @td.skip_if_no("package")
21
+ def test_foo():
22
+ ...
23
+
24
+ For more information, refer to the ``pytest`` documentation on ``skipif``.
25
+ """
26
+ from __future__ import annotations
27
+
28
+ import locale
29
+ from typing import (
30
+ TYPE_CHECKING,
31
+ Callable,
32
+ )
33
+
34
+ import pytest
35
+
36
+ from pandas._config import get_option
37
+
38
+ if TYPE_CHECKING:
39
+ from pandas._typing import F
40
+
41
+ from pandas._config.config import _get_option
42
+
43
+ from pandas.compat import (
44
+ IS64,
45
+ is_platform_windows,
46
+ )
47
+ from pandas.compat._optional import import_optional_dependency
48
+
49
+
50
+ def skip_if_installed(package: str) -> pytest.MarkDecorator:
51
+ """
52
+ Skip a test if a package is installed.
53
+
54
+ Parameters
55
+ ----------
56
+ package : str
57
+ The name of the package.
58
+
59
+ Returns
60
+ -------
61
+ pytest.MarkDecorator
62
+ a pytest.mark.skipif to use as either a test decorator or a
63
+ parametrization mark.
64
+ """
65
+ return pytest.mark.skipif(
66
+ bool(import_optional_dependency(package, errors="ignore")),
67
+ reason=f"Skipping because {package} is installed.",
68
+ )
69
+
70
+
71
+ def skip_if_no(package: str, min_version: str | None = None) -> pytest.MarkDecorator:
72
+ """
73
+ Generic function to help skip tests when required packages are not
74
+ present on the testing system.
75
+
76
+ This function returns a pytest mark with a skip condition that will be
77
+ evaluated during test collection. An attempt will be made to import the
78
+ specified ``package`` and optionally ensure it meets the ``min_version``
79
+
80
+ The mark can be used as either a decorator for a test class or to be
81
+ applied to parameters in pytest.mark.parametrize calls or parametrized
82
+ fixtures. Use pytest.importorskip if an imported moduled is later needed
83
+ or for test functions.
84
+
85
+ If the import and version check are unsuccessful, then the test function
86
+ (or test case when used in conjunction with parametrization) will be
87
+ skipped.
88
+
89
+ Parameters
90
+ ----------
91
+ package: str
92
+ The name of the required package.
93
+ min_version: str or None, default None
94
+ Optional minimum version of the package.
95
+
96
+ Returns
97
+ -------
98
+ pytest.MarkDecorator
99
+ a pytest.mark.skipif to use as either a test decorator or a
100
+ parametrization mark.
101
+ """
102
+ msg = f"Could not import '{package}'"
103
+ if min_version:
104
+ msg += f" satisfying a min_version of {min_version}"
105
+ return pytest.mark.skipif(
106
+ not bool(
107
+ import_optional_dependency(
108
+ package, errors="ignore", min_version=min_version
109
+ )
110
+ ),
111
+ reason=msg,
112
+ )
113
+
114
+
115
+ skip_if_32bit = pytest.mark.skipif(not IS64, reason="skipping for 32 bit")
116
+ skip_if_windows = pytest.mark.skipif(is_platform_windows(), reason="Running on Windows")
117
+ skip_if_not_us_locale = pytest.mark.skipif(
118
+ locale.getlocale()[0] != "en_US",
119
+ reason=f"Set local {locale.getlocale()[0]} is not en_US",
120
+ )
121
+
122
+
123
+ def parametrize_fixture_doc(*args) -> Callable[[F], F]:
124
+ """
125
+ Intended for use as a decorator for parametrized fixture,
126
+ this function will wrap the decorated function with a pytest
127
+ ``parametrize_fixture_doc`` mark. That mark will format
128
+ initial fixture docstring by replacing placeholders {0}, {1} etc
129
+ with parameters passed as arguments.
130
+
131
+ Parameters
132
+ ----------
133
+ args: iterable
134
+ Positional arguments for docstring.
135
+
136
+ Returns
137
+ -------
138
+ function
139
+ The decorated function wrapped within a pytest
140
+ ``parametrize_fixture_doc`` mark
141
+ """
142
+
143
+ def documented_fixture(fixture):
144
+ fixture.__doc__ = fixture.__doc__.format(*args)
145
+ return fixture
146
+
147
+ return documented_fixture
148
+
149
+
150
+ def mark_array_manager_not_yet_implemented(request) -> None:
151
+ mark = pytest.mark.xfail(reason="Not yet implemented for ArrayManager")
152
+ request.applymarker(mark)
153
+
154
+
155
+ skip_array_manager_not_yet_implemented = pytest.mark.xfail(
156
+ _get_option("mode.data_manager", silent=True) == "array",
157
+ reason="Not yet implemented for ArrayManager",
158
+ )
159
+
160
+ skip_array_manager_invalid_test = pytest.mark.skipif(
161
+ _get_option("mode.data_manager", silent=True) == "array",
162
+ reason="Test that relies on BlockManager internals or specific behaviour",
163
+ )
164
+
165
+ skip_copy_on_write_not_yet_implemented = pytest.mark.xfail(
166
+ get_option("mode.copy_on_write") is True,
167
+ reason="Not yet implemented/adapted for Copy-on-Write mode",
168
+ )
169
+
170
+ skip_copy_on_write_invalid_test = pytest.mark.skipif(
171
+ get_option("mode.copy_on_write") is True,
172
+ reason="Test not valid for Copy-on-Write mode",
173
+ )
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/util/_tester.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Entrypoint for testing from the top-level namespace.
3
+ """
4
+ from __future__ import annotations
5
+
6
+ import os
7
+ import sys
8
+
9
+ from pandas.compat._optional import import_optional_dependency
10
+
11
+ PKG = os.path.dirname(os.path.dirname(__file__))
12
+
13
+
14
+ def test(extra_args: list[str] | None = None, run_doctests: bool = False) -> None:
15
+ """
16
+ Run the pandas test suite using pytest.
17
+
18
+ By default, runs with the marks -m "not slow and not network and not db"
19
+
20
+ Parameters
21
+ ----------
22
+ extra_args : list[str], default None
23
+ Extra marks to run the tests.
24
+ run_doctests : bool, default False
25
+ Whether to only run the Python and Cython doctests. If you would like to run
26
+ both doctests/regular tests, just append "--doctest-modules"/"--doctest-cython"
27
+ to extra_args.
28
+
29
+ Examples
30
+ --------
31
+ >>> pd.test() # doctest: +SKIP
32
+ running: pytest...
33
+ """
34
+ pytest = import_optional_dependency("pytest")
35
+ import_optional_dependency("hypothesis")
36
+ cmd = ["-m not slow and not network and not db"]
37
+ if extra_args:
38
+ if not isinstance(extra_args, list):
39
+ extra_args = [extra_args]
40
+ cmd = extra_args
41
+ if run_doctests:
42
+ cmd = [
43
+ "--doctest-modules",
44
+ "--doctest-cython",
45
+ f"--ignore={os.path.join(PKG, 'tests')}",
46
+ ]
47
+ cmd += [PKG]
48
+ joined = " ".join(cmd)
49
+ print(f"running: pytest {joined}")
50
+ sys.exit(pytest.main(cmd))
51
+
52
+
53
+ __all__ = ["test"]
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pandas/util/_validators.py ADDED
@@ -0,0 +1,456 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Module that contains many useful utilities
3
+ for validating data or function arguments
4
+ """
5
+ from __future__ import annotations
6
+
7
+ from collections.abc import (
8
+ Iterable,
9
+ Sequence,
10
+ )
11
+ from typing import (
12
+ TypeVar,
13
+ overload,
14
+ )
15
+
16
+ import numpy as np
17
+
18
+ from pandas._libs import lib
19
+
20
+ from pandas.core.dtypes.common import (
21
+ is_bool,
22
+ is_integer,
23
+ )
24
+
25
+ BoolishT = TypeVar("BoolishT", bool, int)
26
+ BoolishNoneT = TypeVar("BoolishNoneT", bool, int, None)
27
+
28
+
29
+ def _check_arg_length(fname, args, max_fname_arg_count, compat_args) -> None:
30
+ """
31
+ Checks whether 'args' has length of at most 'compat_args'. Raises
32
+ a TypeError if that is not the case, similar to in Python when a
33
+ function is called with too many arguments.
34
+ """
35
+ if max_fname_arg_count < 0:
36
+ raise ValueError("'max_fname_arg_count' must be non-negative")
37
+
38
+ if len(args) > len(compat_args):
39
+ max_arg_count = len(compat_args) + max_fname_arg_count
40
+ actual_arg_count = len(args) + max_fname_arg_count
41
+ argument = "argument" if max_arg_count == 1 else "arguments"
42
+
43
+ raise TypeError(
44
+ f"{fname}() takes at most {max_arg_count} {argument} "
45
+ f"({actual_arg_count} given)"
46
+ )
47
+
48
+
49
+ def _check_for_default_values(fname, arg_val_dict, compat_args) -> None:
50
+ """
51
+ Check that the keys in `arg_val_dict` are mapped to their
52
+ default values as specified in `compat_args`.
53
+
54
+ Note that this function is to be called only when it has been
55
+ checked that arg_val_dict.keys() is a subset of compat_args
56
+ """
57
+ for key in arg_val_dict:
58
+ # try checking equality directly with '=' operator,
59
+ # as comparison may have been overridden for the left
60
+ # hand object
61
+ try:
62
+ v1 = arg_val_dict[key]
63
+ v2 = compat_args[key]
64
+
65
+ # check for None-ness otherwise we could end up
66
+ # comparing a numpy array vs None
67
+ if (v1 is not None and v2 is None) or (v1 is None and v2 is not None):
68
+ match = False
69
+ else:
70
+ match = v1 == v2
71
+
72
+ if not is_bool(match):
73
+ raise ValueError("'match' is not a boolean")
74
+
75
+ # could not compare them directly, so try comparison
76
+ # using the 'is' operator
77
+ except ValueError:
78
+ match = arg_val_dict[key] is compat_args[key]
79
+
80
+ if not match:
81
+ raise ValueError(
82
+ f"the '{key}' parameter is not supported in "
83
+ f"the pandas implementation of {fname}()"
84
+ )
85
+
86
+
87
+ def validate_args(fname, args, max_fname_arg_count, compat_args) -> None:
88
+ """
89
+ Checks whether the length of the `*args` argument passed into a function
90
+ has at most `len(compat_args)` arguments and whether or not all of these
91
+ elements in `args` are set to their default values.
92
+
93
+ Parameters
94
+ ----------
95
+ fname : str
96
+ The name of the function being passed the `*args` parameter
97
+ args : tuple
98
+ The `*args` parameter passed into a function
99
+ max_fname_arg_count : int
100
+ The maximum number of arguments that the function `fname`
101
+ can accept, excluding those in `args`. Used for displaying
102
+ appropriate error messages. Must be non-negative.
103
+ compat_args : dict
104
+ A dictionary of keys and their associated default values.
105
+ In order to accommodate buggy behaviour in some versions of `numpy`,
106
+ where a signature displayed keyword arguments but then passed those
107
+ arguments **positionally** internally when calling downstream
108
+ implementations, a dict ensures that the original
109
+ order of the keyword arguments is enforced.
110
+
111
+ Raises
112
+ ------
113
+ TypeError
114
+ If `args` contains more values than there are `compat_args`
115
+ ValueError
116
+ If `args` contains values that do not correspond to those
117
+ of the default values specified in `compat_args`
118
+ """
119
+ _check_arg_length(fname, args, max_fname_arg_count, compat_args)
120
+
121
+ # We do this so that we can provide a more informative
122
+ # error message about the parameters that we are not
123
+ # supporting in the pandas implementation of 'fname'
124
+ kwargs = dict(zip(compat_args, args))
125
+ _check_for_default_values(fname, kwargs, compat_args)
126
+
127
+
128
+ def _check_for_invalid_keys(fname, kwargs, compat_args) -> None:
129
+ """
130
+ Checks whether 'kwargs' contains any keys that are not
131
+ in 'compat_args' and raises a TypeError if there is one.
132
+ """
133
+ # set(dict) --> set of the dictionary's keys
134
+ diff = set(kwargs) - set(compat_args)
135
+
136
+ if diff:
137
+ bad_arg = next(iter(diff))
138
+ raise TypeError(f"{fname}() got an unexpected keyword argument '{bad_arg}'")
139
+
140
+
141
+ def validate_kwargs(fname, kwargs, compat_args) -> None:
142
+ """
143
+ Checks whether parameters passed to the **kwargs argument in a
144
+ function `fname` are valid parameters as specified in `*compat_args`
145
+ and whether or not they are set to their default values.
146
+
147
+ Parameters
148
+ ----------
149
+ fname : str
150
+ The name of the function being passed the `**kwargs` parameter
151
+ kwargs : dict
152
+ The `**kwargs` parameter passed into `fname`
153
+ compat_args: dict
154
+ A dictionary of keys that `kwargs` is allowed to have and their
155
+ associated default values
156
+
157
+ Raises
158
+ ------
159
+ TypeError if `kwargs` contains keys not in `compat_args`
160
+ ValueError if `kwargs` contains keys in `compat_args` that do not
161
+ map to the default values specified in `compat_args`
162
+ """
163
+ kwds = kwargs.copy()
164
+ _check_for_invalid_keys(fname, kwargs, compat_args)
165
+ _check_for_default_values(fname, kwds, compat_args)
166
+
167
+
168
+ def validate_args_and_kwargs(
169
+ fname, args, kwargs, max_fname_arg_count, compat_args
170
+ ) -> None:
171
+ """
172
+ Checks whether parameters passed to the *args and **kwargs argument in a
173
+ function `fname` are valid parameters as specified in `*compat_args`
174
+ and whether or not they are set to their default values.
175
+
176
+ Parameters
177
+ ----------
178
+ fname: str
179
+ The name of the function being passed the `**kwargs` parameter
180
+ args: tuple
181
+ The `*args` parameter passed into a function
182
+ kwargs: dict
183
+ The `**kwargs` parameter passed into `fname`
184
+ max_fname_arg_count: int
185
+ The minimum number of arguments that the function `fname`
186
+ requires, excluding those in `args`. Used for displaying
187
+ appropriate error messages. Must be non-negative.
188
+ compat_args: dict
189
+ A dictionary of keys that `kwargs` is allowed to
190
+ have and their associated default values.
191
+
192
+ Raises
193
+ ------
194
+ TypeError if `args` contains more values than there are
195
+ `compat_args` OR `kwargs` contains keys not in `compat_args`
196
+ ValueError if `args` contains values not at the default value (`None`)
197
+ `kwargs` contains keys in `compat_args` that do not map to the default
198
+ value as specified in `compat_args`
199
+
200
+ See Also
201
+ --------
202
+ validate_args : Purely args validation.
203
+ validate_kwargs : Purely kwargs validation.
204
+
205
+ """
206
+ # Check that the total number of arguments passed in (i.e.
207
+ # args and kwargs) does not exceed the length of compat_args
208
+ _check_arg_length(
209
+ fname, args + tuple(kwargs.values()), max_fname_arg_count, compat_args
210
+ )
211
+
212
+ # Check there is no overlap with the positional and keyword
213
+ # arguments, similar to what is done in actual Python functions
214
+ args_dict = dict(zip(compat_args, args))
215
+
216
+ for key in args_dict:
217
+ if key in kwargs:
218
+ raise TypeError(
219
+ f"{fname}() got multiple values for keyword argument '{key}'"
220
+ )
221
+
222
+ kwargs.update(args_dict)
223
+ validate_kwargs(fname, kwargs, compat_args)
224
+
225
+
226
+ def validate_bool_kwarg(
227
+ value: BoolishNoneT,
228
+ arg_name: str,
229
+ none_allowed: bool = True,
230
+ int_allowed: bool = False,
231
+ ) -> BoolishNoneT:
232
+ """
233
+ Ensure that argument passed in arg_name can be interpreted as boolean.
234
+
235
+ Parameters
236
+ ----------
237
+ value : bool
238
+ Value to be validated.
239
+ arg_name : str
240
+ Name of the argument. To be reflected in the error message.
241
+ none_allowed : bool, default True
242
+ Whether to consider None to be a valid boolean.
243
+ int_allowed : bool, default False
244
+ Whether to consider integer value to be a valid boolean.
245
+
246
+ Returns
247
+ -------
248
+ value
249
+ The same value as input.
250
+
251
+ Raises
252
+ ------
253
+ ValueError
254
+ If the value is not a valid boolean.
255
+ """
256
+ good_value = is_bool(value)
257
+ if none_allowed:
258
+ good_value = good_value or (value is None)
259
+
260
+ if int_allowed:
261
+ good_value = good_value or isinstance(value, int)
262
+
263
+ if not good_value:
264
+ raise ValueError(
265
+ f'For argument "{arg_name}" expected type bool, received '
266
+ f"type {type(value).__name__}."
267
+ )
268
+ return value # pyright: ignore[reportGeneralTypeIssues]
269
+
270
+
271
+ def validate_fillna_kwargs(value, method, validate_scalar_dict_value: bool = True):
272
+ """
273
+ Validate the keyword arguments to 'fillna'.
274
+
275
+ This checks that exactly one of 'value' and 'method' is specified.
276
+ If 'method' is specified, this validates that it's a valid method.
277
+
278
+ Parameters
279
+ ----------
280
+ value, method : object
281
+ The 'value' and 'method' keyword arguments for 'fillna'.
282
+ validate_scalar_dict_value : bool, default True
283
+ Whether to validate that 'value' is a scalar or dict. Specifically,
284
+ validate that it is not a list or tuple.
285
+
286
+ Returns
287
+ -------
288
+ value, method : object
289
+ """
290
+ from pandas.core.missing import clean_fill_method
291
+
292
+ if value is None and method is None:
293
+ raise ValueError("Must specify a fill 'value' or 'method'.")
294
+ if value is None and method is not None:
295
+ method = clean_fill_method(method)
296
+
297
+ elif value is not None and method is None:
298
+ if validate_scalar_dict_value and isinstance(value, (list, tuple)):
299
+ raise TypeError(
300
+ '"value" parameter must be a scalar or dict, but '
301
+ f'you passed a "{type(value).__name__}"'
302
+ )
303
+
304
+ elif value is not None and method is not None:
305
+ raise ValueError("Cannot specify both 'value' and 'method'.")
306
+
307
+ return value, method
308
+
309
+
310
+ def validate_percentile(q: float | Iterable[float]) -> np.ndarray:
311
+ """
312
+ Validate percentiles (used by describe and quantile).
313
+
314
+ This function checks if the given float or iterable of floats is a valid percentile
315
+ otherwise raises a ValueError.
316
+
317
+ Parameters
318
+ ----------
319
+ q: float or iterable of floats
320
+ A single percentile or an iterable of percentiles.
321
+
322
+ Returns
323
+ -------
324
+ ndarray
325
+ An ndarray of the percentiles if valid.
326
+
327
+ Raises
328
+ ------
329
+ ValueError if percentiles are not in given interval([0, 1]).
330
+ """
331
+ q_arr = np.asarray(q)
332
+ # Don't change this to an f-string. The string formatting
333
+ # is too expensive for cases where we don't need it.
334
+ msg = "percentiles should all be in the interval [0, 1]"
335
+ if q_arr.ndim == 0:
336
+ if not 0 <= q_arr <= 1:
337
+ raise ValueError(msg)
338
+ else:
339
+ if not all(0 <= qs <= 1 for qs in q_arr):
340
+ raise ValueError(msg)
341
+ return q_arr
342
+
343
+
344
+ @overload
345
+ def validate_ascending(ascending: BoolishT) -> BoolishT:
346
+ ...
347
+
348
+
349
+ @overload
350
+ def validate_ascending(ascending: Sequence[BoolishT]) -> list[BoolishT]:
351
+ ...
352
+
353
+
354
+ def validate_ascending(
355
+ ascending: bool | int | Sequence[BoolishT],
356
+ ) -> bool | int | list[BoolishT]:
357
+ """Validate ``ascending`` kwargs for ``sort_index`` method."""
358
+ kwargs = {"none_allowed": False, "int_allowed": True}
359
+ if not isinstance(ascending, Sequence):
360
+ return validate_bool_kwarg(ascending, "ascending", **kwargs)
361
+
362
+ return [validate_bool_kwarg(item, "ascending", **kwargs) for item in ascending]
363
+
364
+
365
+ def validate_endpoints(closed: str | None) -> tuple[bool, bool]:
366
+ """
367
+ Check that the `closed` argument is among [None, "left", "right"]
368
+
369
+ Parameters
370
+ ----------
371
+ closed : {None, "left", "right"}
372
+
373
+ Returns
374
+ -------
375
+ left_closed : bool
376
+ right_closed : bool
377
+
378
+ Raises
379
+ ------
380
+ ValueError : if argument is not among valid values
381
+ """
382
+ left_closed = False
383
+ right_closed = False
384
+
385
+ if closed is None:
386
+ left_closed = True
387
+ right_closed = True
388
+ elif closed == "left":
389
+ left_closed = True
390
+ elif closed == "right":
391
+ right_closed = True
392
+ else:
393
+ raise ValueError("Closed has to be either 'left', 'right' or None")
394
+
395
+ return left_closed, right_closed
396
+
397
+
398
+ def validate_inclusive(inclusive: str | None) -> tuple[bool, bool]:
399
+ """
400
+ Check that the `inclusive` argument is among {"both", "neither", "left", "right"}.
401
+
402
+ Parameters
403
+ ----------
404
+ inclusive : {"both", "neither", "left", "right"}
405
+
406
+ Returns
407
+ -------
408
+ left_right_inclusive : tuple[bool, bool]
409
+
410
+ Raises
411
+ ------
412
+ ValueError : if argument is not among valid values
413
+ """
414
+ left_right_inclusive: tuple[bool, bool] | None = None
415
+
416
+ if isinstance(inclusive, str):
417
+ left_right_inclusive = {
418
+ "both": (True, True),
419
+ "left": (True, False),
420
+ "right": (False, True),
421
+ "neither": (False, False),
422
+ }.get(inclusive)
423
+
424
+ if left_right_inclusive is None:
425
+ raise ValueError(
426
+ "Inclusive has to be either 'both', 'neither', 'left' or 'right'"
427
+ )
428
+
429
+ return left_right_inclusive
430
+
431
+
432
+ def validate_insert_loc(loc: int, length: int) -> int:
433
+ """
434
+ Check that we have an integer between -length and length, inclusive.
435
+
436
+ Standardize negative loc to within [0, length].
437
+
438
+ The exceptions we raise on failure match np.insert.
439
+ """
440
+ if not is_integer(loc):
441
+ raise TypeError(f"loc must be an integer between -{length} and {length}")
442
+
443
+ if loc < 0:
444
+ loc += length
445
+ if not 0 <= loc <= length:
446
+ raise IndexError(f"loc must be an integer between -{length} and {length}")
447
+ return loc # pyright: ignore[reportGeneralTypeIssues]
448
+
449
+
450
+ def check_dtype_backend(dtype_backend) -> None:
451
+ if dtype_backend is not lib.no_default:
452
+ if dtype_backend not in ["numpy_nullable", "pyarrow"]:
453
+ raise ValueError(
454
+ f"dtype_backend {dtype_backend} is invalid, only 'numpy_nullable' and "
455
+ f"'pyarrow' are allowed.",
456
+ )
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_decomp/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (28.5 kB). View file
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_decomp/__pycache__/decompositions_for_jvp.cpython-312.pyc ADDED
Binary file (12.8 kB). View file
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_decomp/__pycache__/decompositions_for_rng.cpython-312.pyc ADDED
Binary file (12.5 kB). View file
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/__init__.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # mypy: allow-untyped-defs
2
+ import dataclasses
3
+ import glob
4
+ import inspect
5
+ from os.path import basename, dirname, isfile, join
6
+
7
+ import torch
8
+ from torch._export.db.case import (
9
+ _EXAMPLE_CASES,
10
+ _EXAMPLE_CONFLICT_CASES,
11
+ _EXAMPLE_REWRITE_CASES,
12
+ SupportLevel,
13
+ export_case,
14
+ ExportCase,
15
+ )
16
+
17
+
18
+ def _collect_examples():
19
+ case_names = glob.glob(join(dirname(__file__), "*.py"))
20
+ case_names = [
21
+ basename(f)[:-3] for f in case_names if isfile(f) and not f.endswith("__init__.py")
22
+ ]
23
+
24
+ case_fields = {f.name for f in dataclasses.fields(ExportCase)}
25
+ for case_name in case_names:
26
+ case = __import__(case_name, globals(), locals(), [], 1)
27
+ variables = [name for name in dir(case) if name in case_fields]
28
+ export_case(**{v: getattr(case, v) for v in variables})(case.model)
29
+
30
+ _collect_examples()
31
+
32
+ def all_examples():
33
+ return _EXAMPLE_CASES
34
+
35
+
36
+ if len(_EXAMPLE_CONFLICT_CASES) > 0:
37
+
38
+ def get_name(case):
39
+ model = case.model
40
+ if isinstance(model, torch.nn.Module):
41
+ model = type(model)
42
+ return model.__name__
43
+
44
+ msg = "Error on conflict export case name.\n"
45
+ for case_name, cases in _EXAMPLE_CONFLICT_CASES.items():
46
+ msg += f"Case name {case_name} is associated with multiple cases:\n "
47
+ msg += f"[{','.join(map(get_name, cases))}]\n"
48
+
49
+ raise RuntimeError(msg)
50
+
51
+
52
+ def filter_examples_by_support_level(support_level: SupportLevel):
53
+ return {
54
+ key: val
55
+ for key, val in all_examples().items()
56
+ if val.support_level == support_level
57
+ }
58
+
59
+
60
+ def get_rewrite_cases(case):
61
+ return _EXAMPLE_REWRITE_CASES.get(case.name, [])
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/assume_constant_result.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # mypy: allow-untyped-defs
2
+ import torch
3
+ import torch._dynamo as torchdynamo
4
+
5
+
6
+ class AssumeConstantResult(torch.nn.Module):
7
+ """
8
+ Applying `assume_constant_result` decorator to burn make non-tracable code as constant.
9
+ """
10
+
11
+ @torchdynamo.assume_constant_result
12
+ def get_item(self, y):
13
+ return y.int().item()
14
+
15
+ def forward(self, x, y):
16
+ return x[: self.get_item(y)]
17
+
18
+ example_args = (torch.randn(3, 2), torch.tensor(4))
19
+ tags = {"torch.escape-hatch"}
20
+ model = AssumeConstantResult()
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/autograd_function.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # mypy: allow-untyped-defs
2
+ import torch
3
+
4
+ class MyAutogradFunction(torch.autograd.Function):
5
+ @staticmethod
6
+ # pyrefly: ignore [bad-override]
7
+ def forward(ctx, x):
8
+ return x.clone()
9
+
10
+ @staticmethod
11
+ # pyrefly: ignore [bad-override]
12
+ def backward(ctx, grad_output):
13
+ return grad_output + 1
14
+
15
+ class AutogradFunction(torch.nn.Module):
16
+ """
17
+ TorchDynamo does not keep track of backward() on autograd functions. We recommend to
18
+ use `allow_in_graph` to mitigate this problem.
19
+ """
20
+
21
+ def forward(self, x):
22
+ return MyAutogradFunction.apply(x)
23
+
24
+ example_args = (torch.randn(3, 2),)
25
+ model = AutogradFunction()
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/class_method.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # mypy: allow-untyped-defs
2
+ import torch
3
+
4
+ class ClassMethod(torch.nn.Module):
5
+ """
6
+ Class methods are inlined during tracing.
7
+ """
8
+
9
+ @classmethod
10
+ def method(cls, x):
11
+ return x + 1
12
+
13
+ def __init__(self) -> None:
14
+ super().__init__()
15
+ self.linear = torch.nn.Linear(4, 2)
16
+
17
+ def forward(self, x):
18
+ x = self.linear(x)
19
+ return self.method(x) * self.__class__.method(x) * type(self).method(x)
20
+
21
+ example_args = (torch.randn(3, 4),)
22
+ model = ClassMethod()
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/cond_branch_class_method.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # mypy: allow-untyped-defs
2
+ import torch
3
+
4
+ from functorch.experimental.control_flow import cond
5
+
6
+ class MySubModule(torch.nn.Module):
7
+ def foo(self, x):
8
+ return x.cos()
9
+
10
+ def forward(self, x):
11
+ return self.foo(x)
12
+
13
+ class CondBranchClassMethod(torch.nn.Module):
14
+ """
15
+ The branch functions (`true_fn` and `false_fn`) passed to cond() must follow these rules:
16
+ - both branches must take the same args, which must also match the branch args passed to cond.
17
+ - both branches must return a single tensor
18
+ - returned tensor must have the same tensor metadata, e.g. shape and dtype
19
+ - branch function can be free function, nested function, lambda, class methods
20
+ - branch function can not have closure variables
21
+ - no inplace mutations on inputs or global variables
22
+
23
+
24
+ This example demonstrates using class method in cond().
25
+
26
+ NOTE: If the `pred` is test on a dim with batch size < 2, it will be specialized.
27
+ """
28
+
29
+ def __init__(self) -> None:
30
+ super().__init__()
31
+ self.subm = MySubModule()
32
+
33
+ def bar(self, x):
34
+ return x.sin()
35
+
36
+ def forward(self, x):
37
+ return cond(x.shape[0] <= 2, self.subm.forward, self.bar, [x])
38
+
39
+ example_args = (torch.randn(3),)
40
+ tags = {
41
+ "torch.cond",
42
+ "torch.dynamic-shape",
43
+ }
44
+ model = CondBranchClassMethod()
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/cond_branch_nested_function.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # mypy: allow-untyped-defs
2
+ import torch
3
+
4
+ from functorch.experimental.control_flow import cond
5
+
6
+ class CondBranchNestedFunction(torch.nn.Module):
7
+ """
8
+ The branch functions (`true_fn` and `false_fn`) passed to cond() must follow these rules:
9
+ - both branches must take the same args, which must also match the branch args passed to cond.
10
+ - both branches must return a single tensor
11
+ - returned tensor must have the same tensor metadata, e.g. shape and dtype
12
+ - branch function can be free function, nested function, lambda, class methods
13
+ - branch function can not have closure variables
14
+ - no inplace mutations on inputs or global variables
15
+
16
+ This example demonstrates using nested function in cond().
17
+
18
+ NOTE: If the `pred` is test on a dim with batch size < 2, it will be specialized.
19
+ """
20
+
21
+ def forward(self, x):
22
+ def true_fn(x):
23
+ def inner_true_fn(y):
24
+ return x + y
25
+
26
+ return inner_true_fn(x)
27
+
28
+ def false_fn(x):
29
+ def inner_false_fn(y):
30
+ return x - y
31
+
32
+ return inner_false_fn(x)
33
+
34
+ return cond(x.shape[0] < 10, true_fn, false_fn, [x])
35
+
36
+ example_args = (torch.randn(3),)
37
+ tags = {
38
+ "torch.cond",
39
+ "torch.dynamic-shape",
40
+ }
41
+ model = CondBranchNestedFunction()
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/cond_branch_nonlocal_variables.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # mypy: allow-untyped-defs
2
+ import torch
3
+
4
+ from functorch.experimental.control_flow import cond
5
+
6
+ class CondBranchNonlocalVariables(torch.nn.Module):
7
+ """
8
+ The branch functions (`true_fn` and `false_fn`) passed to cond() must follow these rules:
9
+ - both branches must take the same args, which must also match the branch args passed to cond.
10
+ - both branches must return a single tensor
11
+ - returned tensor must have the same tensor metadata, e.g. shape and dtype
12
+ - branch function can be free function, nested function, lambda, class methods
13
+ - branch function can not have closure variables
14
+ - no inplace mutations on inputs or global variables
15
+
16
+ This example demonstrates how to rewrite code to avoid capturing closure variables in branch functions.
17
+
18
+ The code below will not work because capturing closure variables is not supported.
19
+ ```
20
+ my_tensor_var = x + 100
21
+ my_primitive_var = 3.14
22
+
23
+ def true_fn(y):
24
+ nonlocal my_tensor_var, my_primitive_var
25
+ return y + my_tensor_var + my_primitive_var
26
+
27
+ def false_fn(y):
28
+ nonlocal my_tensor_var, my_primitive_var
29
+ return y - my_tensor_var - my_primitive_var
30
+
31
+ return cond(x.shape[0] > 5, true_fn, false_fn, [x])
32
+ ```
33
+
34
+ NOTE: If the `pred` is test on a dim with batch size < 2, it will be specialized.
35
+ """
36
+
37
+ def forward(self, x):
38
+ my_tensor_var = x + 100
39
+ my_primitive_var = 3.14
40
+
41
+ def true_fn(x, y, z):
42
+ return x + y + z
43
+
44
+ def false_fn(x, y, z):
45
+ return x - y - z
46
+
47
+ return cond(
48
+ x.shape[0] > 5,
49
+ true_fn,
50
+ false_fn,
51
+ [x, my_tensor_var, torch.tensor(my_primitive_var)],
52
+ )
53
+
54
+ example_args = (torch.randn(6),)
55
+ tags = {
56
+ "torch.cond",
57
+ "torch.dynamic-shape",
58
+ }
59
+ model = CondBranchNonlocalVariables()
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/cond_closed_over_variable.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # mypy: allow-untyped-defs
2
+ import torch
3
+
4
+ from functorch.experimental.control_flow import cond
5
+
6
+ class CondClosedOverVariable(torch.nn.Module):
7
+ """
8
+ torch.cond() supports branches closed over arbitrary variables.
9
+ """
10
+
11
+ def forward(self, pred, x):
12
+ def true_fn(val):
13
+ return x * 2
14
+
15
+ def false_fn(val):
16
+ return x - 2
17
+
18
+ return cond(pred, true_fn, false_fn, [x + 1])
19
+
20
+ example_args = (torch.tensor(True), torch.randn(3, 2))
21
+ tags = {"torch.cond", "python.closure"}
22
+ model = CondClosedOverVariable()
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/cond_operands.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # mypy: allow-untyped-defs
2
+ import torch
3
+
4
+ from torch.export import Dim
5
+
6
+ x = torch.randn(3, 2)
7
+ y = torch.randn(2)
8
+ dim0_x = Dim("dim0_x")
9
+
10
+ class CondOperands(torch.nn.Module):
11
+ """
12
+ The operands passed to cond() must be:
13
+ - a list of tensors
14
+ - match arguments of `true_fn` and `false_fn`
15
+
16
+ NOTE: If the `pred` is test on a dim with batch size < 2, it will be specialized.
17
+ """
18
+
19
+ def forward(self, x, y):
20
+ def true_fn(x, y):
21
+ return x + y
22
+
23
+ def false_fn(x, y):
24
+ return x - y
25
+
26
+ return torch.cond(x.shape[0] > 2, true_fn, false_fn, [x, y])
27
+
28
+ example_args = (x, y)
29
+ tags = {
30
+ "torch.cond",
31
+ "torch.dynamic-shape",
32
+ }
33
+ extra_inputs = (torch.randn(2, 2), torch.randn(2))
34
+ dynamic_shapes = {"x": {0: dim0_x}, "y": None}
35
+ model = CondOperands()
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/cond_predicate.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # mypy: allow-untyped-defs
2
+ import torch
3
+
4
+ from functorch.experimental.control_flow import cond
5
+
6
+ class CondPredicate(torch.nn.Module):
7
+ """
8
+ The conditional statement (aka predicate) passed to cond() must be one of the following:
9
+ - torch.Tensor with a single element
10
+ - boolean expression
11
+
12
+ NOTE: If the `pred` is test on a dim with batch size < 2, it will be specialized.
13
+ """
14
+
15
+ def forward(self, x):
16
+ pred = x.dim() > 2 and x.shape[2] > 10
17
+
18
+ return cond(pred, lambda x: x.cos(), lambda y: y.sin(), [x])
19
+
20
+ example_args = (torch.randn(6, 4, 3),)
21
+ tags = {
22
+ "torch.cond",
23
+ "torch.dynamic-shape",
24
+ }
25
+ model = CondPredicate()
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/constrain_as_size_example.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # mypy: allow-untyped-defs
2
+ import torch
3
+
4
+
5
+ class ConstrainAsSizeExample(torch.nn.Module):
6
+ """
7
+ If the value is not known at tracing time, you can provide hint so that we
8
+ can trace further. Please look at torch._check APIs.
9
+ """
10
+
11
+ def forward(self, x):
12
+ a = x.item()
13
+ torch._check(a >= 0)
14
+ torch._check(a <= 5)
15
+ return torch.zeros((a, 5))
16
+
17
+
18
+ example_args = (torch.tensor(4),)
19
+ tags = {
20
+ "torch.dynamic-value",
21
+ "torch.escape-hatch",
22
+ }
23
+ model = ConstrainAsSizeExample()
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/constrain_as_value_example.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # mypy: allow-untyped-defs
2
+ import torch
3
+
4
+
5
+ class ConstrainAsValueExample(torch.nn.Module):
6
+ """
7
+ If the value is not known at tracing time, you can provide hint so that we
8
+ can trace further. Please look at torch._check API.
9
+ """
10
+
11
+ def forward(self, x, y):
12
+ a = x.item()
13
+ torch._check(a >= 0)
14
+ torch._check(a <= 5)
15
+
16
+ if a < 6:
17
+ return y.sin()
18
+ return y.cos()
19
+
20
+
21
+ example_args = (torch.tensor(4), torch.randn(5, 5))
22
+ tags = {
23
+ "torch.dynamic-value",
24
+ "torch.escape-hatch",
25
+ }
26
+ model = ConstrainAsValueExample()
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/decorator.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # mypy: allow-untyped-defs
2
+ import functools
3
+
4
+ import torch
5
+
6
+ def test_decorator(func):
7
+ @functools.wraps(func)
8
+ def wrapper(*args, **kwargs):
9
+ return func(*args, **kwargs) + 1
10
+
11
+ return wrapper
12
+
13
+ class Decorator(torch.nn.Module):
14
+ """
15
+ Decorators calls are inlined into the exported function during tracing.
16
+ """
17
+
18
+ @test_decorator
19
+ def forward(self, x, y):
20
+ return x + y
21
+
22
+ example_args = (torch.randn(3, 2), torch.randn(3, 2))
23
+ model = Decorator()
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/dictionary.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # mypy: allow-untyped-defs
2
+ import torch
3
+
4
+ class Dictionary(torch.nn.Module):
5
+ """
6
+ Dictionary structures are inlined and flattened along tracing.
7
+ """
8
+
9
+ def forward(self, x, y):
10
+ elements = {}
11
+ elements["x2"] = x * x
12
+ y = y * elements["x2"]
13
+ return {"y": y}
14
+
15
+ example_args = (torch.randn(3, 2), torch.tensor(4))
16
+ tags = {"python.data-structure"}
17
+ model = Dictionary()
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/dynamic_shape_assert.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # mypy: allow-untyped-defs
2
+ import torch
3
+
4
+ class DynamicShapeAssert(torch.nn.Module):
5
+ """
6
+ A basic usage of python assertion.
7
+ """
8
+
9
+ def forward(self, x):
10
+ # assertion with error message
11
+ assert x.shape[0] > 2, f"{x.shape[0]} is greater than 2"
12
+ # assertion without error message
13
+ assert x.shape[0] > 1
14
+ return x
15
+
16
+ example_args = (torch.randn(3, 2),)
17
+ tags = {"python.assert"}
18
+ model = DynamicShapeAssert()
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/dynamic_shape_constructor.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # mypy: allow-untyped-defs
2
+ import torch
3
+
4
+ class DynamicShapeConstructor(torch.nn.Module):
5
+ """
6
+ Tensor constructors should be captured with dynamic shape inputs rather
7
+ than being baked in with static shape.
8
+ """
9
+
10
+ def forward(self, x):
11
+ return torch.zeros(x.shape[0] * 2)
12
+
13
+ example_args = (torch.randn(3, 2),)
14
+ tags = {"torch.dynamic-shape"}
15
+ model = DynamicShapeConstructor()
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/dynamic_shape_if_guard.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # mypy: allow-untyped-defs
2
+ import torch
3
+
4
+ class DynamicShapeIfGuard(torch.nn.Module):
5
+ """
6
+ `if` statement with backed dynamic shape predicate will be specialized into
7
+ one particular branch and generate a guard. However, export will fail if the
8
+ the dimension is marked as dynamic shape from higher level API.
9
+ """
10
+
11
+ def forward(self, x):
12
+ if x.shape[0] == 3:
13
+ return x.cos()
14
+
15
+ return x.sin()
16
+
17
+ example_args = (torch.randn(3, 2, 2),)
18
+ tags = {"torch.dynamic-shape", "python.control-flow"}
19
+ model = DynamicShapeIfGuard()
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/dynamic_shape_map.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # mypy: allow-untyped-defs
2
+ import torch
3
+
4
+ from functorch.experimental.control_flow import map
5
+
6
+ class DynamicShapeMap(torch.nn.Module):
7
+ """
8
+ functorch map() maps a function over the first tensor dimension.
9
+ """
10
+
11
+ def forward(self, xs, y):
12
+ def body(x, y):
13
+ return x + y
14
+
15
+ return map(body, xs, y)
16
+
17
+ example_args = (torch.randn(3, 2), torch.randn(2))
18
+ tags = {"torch.dynamic-shape", "torch.map"}
19
+ model = DynamicShapeMap()
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/dynamic_shape_round.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # mypy: allow-untyped-defs
2
+ import torch
3
+
4
+ from torch._export.db.case import SupportLevel
5
+ from torch.export import Dim
6
+
7
+ class DynamicShapeRound(torch.nn.Module):
8
+ """
9
+ Calling round on dynamic shapes is not supported.
10
+ """
11
+
12
+ def forward(self, x):
13
+ return x[: round(x.shape[0] / 2)]
14
+
15
+ x = torch.randn(3, 2)
16
+ dim0_x = Dim("dim0_x")
17
+ example_args = (x,)
18
+ tags = {"torch.dynamic-shape", "python.builtin"}
19
+ support_level = SupportLevel.NOT_SUPPORTED_YET
20
+ dynamic_shapes = {"x": {0: dim0_x}}
21
+ model = DynamicShapeRound()
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/dynamic_shape_slicing.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # mypy: allow-untyped-defs
2
+ import torch
3
+
4
+ class DynamicShapeSlicing(torch.nn.Module):
5
+ """
6
+ Slices with dynamic shape arguments should be captured into the graph
7
+ rather than being baked in.
8
+ """
9
+
10
+ def forward(self, x):
11
+ return x[: x.shape[0] - 2, x.shape[1] - 1 :: 2]
12
+
13
+ example_args = (torch.randn(3, 2),)
14
+ tags = {"torch.dynamic-shape"}
15
+ model = DynamicShapeSlicing()
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/fn_with_kwargs.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # mypy: allow-untyped-defs
2
+ import torch
3
+
4
+ class FnWithKwargs(torch.nn.Module):
5
+ """
6
+ Keyword arguments are not supported at the moment.
7
+ """
8
+
9
+ def forward(self, pos0, tuple0, *myargs, mykw0, **mykwargs):
10
+ out = pos0
11
+ for arg in tuple0:
12
+ out = out * arg
13
+ for arg in myargs:
14
+ out = out * arg
15
+ out = out * mykw0
16
+ out = out * mykwargs["input0"] * mykwargs["input1"]
17
+ return out
18
+
19
+ example_args = (
20
+ torch.randn(4),
21
+ (torch.randn(4), torch.randn(4)),
22
+ *[torch.randn(4), torch.randn(4)]
23
+ )
24
+ example_kwargs = {
25
+ "mykw0": torch.randn(4),
26
+ "input0": torch.randn(4),
27
+ "input1": torch.randn(4),
28
+ }
29
+ tags = {"python.data-structure"}
30
+ model = FnWithKwargs()
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/db/examples/list_contains.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # mypy: allow-untyped-defs
2
+ import torch
3
+
4
+ class ListContains(torch.nn.Module):
5
+ """
6
+ List containment relation can be checked on a dynamic shape or constants.
7
+ """
8
+
9
+ def forward(self, x):
10
+ assert x.size(-1) in [6, 2]
11
+ assert x.size(0) not in [4, 5, 6]
12
+ assert "monkey" not in ["cow", "pig"]
13
+ return x + x
14
+
15
+ example_args = (torch.randn(3, 2),)
16
+ tags = {"torch.dynamic-shape", "python.data-structure", "python.assert"}
17
+ model = ListContains()