Spaces:
Running
Running
tttc3
commited on
Commit
·
83d8e67
1
Parent(s):
2d0032e
Fixed issues outlined in pull request review
Browse files- pysr/export_numpy.py +9 -1
- pysr/sr.py +53 -30
pysr/export_numpy.py
CHANGED
|
@@ -2,6 +2,7 @@
|
|
| 2 |
import numpy as np
|
| 3 |
import pandas as pd
|
| 4 |
from sympy import lambdify
|
|
|
|
| 5 |
|
| 6 |
|
| 7 |
class CallableEquation:
|
|
@@ -25,5 +26,12 @@ class CallableEquation:
|
|
| 25 |
**{k: X[k].values for k in self._variable_names}
|
| 26 |
) * np.ones(expected_shape)
|
| 27 |
if self._selection is not None:
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
return self._lambda(*X.T) * np.ones(expected_shape)
|
|
|
|
| 2 |
import numpy as np
|
| 3 |
import pandas as pd
|
| 4 |
from sympy import lambdify
|
| 5 |
+
import warnings
|
| 6 |
|
| 7 |
|
| 8 |
class CallableEquation:
|
|
|
|
| 26 |
**{k: X[k].values for k in self._variable_names}
|
| 27 |
) * np.ones(expected_shape)
|
| 28 |
if self._selection is not None:
|
| 29 |
+
if X.shape[1] != len(self._selection):
|
| 30 |
+
warnings.warn(
|
| 31 |
+
"`X` should be of shape (n_samples, len(self._selection)). "
|
| 32 |
+
"Automatically filtering `X` to selection. "
|
| 33 |
+
"Note: Filtered `X` column order may not match column order in fit "
|
| 34 |
+
"this may lead to incorrect predictions and other errors."
|
| 35 |
+
)
|
| 36 |
+
X = X[:, self._selection]
|
| 37 |
return self._lambda(*X.T) * np.ones(expected_shape)
|
pysr/sr.py
CHANGED
|
@@ -190,7 +190,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
| 190 |
binary_operators : list[str], default=["+", "-", "*", "/"]
|
| 191 |
List of strings giving the binary operators in Julia's Base.
|
| 192 |
|
| 193 |
-
unary_operators : list[str], default=
|
| 194 |
Same as :param`binary_operators` but for operators taking a
|
| 195 |
single scalar.
|
| 196 |
|
|
@@ -226,7 +226,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
| 226 |
timeout_in_seconds : float, default=None
|
| 227 |
Make the search return early once this many seconds have passed.
|
| 228 |
|
| 229 |
-
constraints : dict[str, int | tuple[int,int]], default=
|
| 230 |
Dictionary of int (unary) or 2-tuples (binary), this enforces
|
| 231 |
maxsize constraints on the individual arguments of operators.
|
| 232 |
E.g., `'pow': (-1, 1)` says that power laws can have any
|
|
@@ -462,7 +462,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
| 462 |
Whether to create a 'torch_format' column in the output,
|
| 463 |
containing a torch module with trainable parameters.
|
| 464 |
|
| 465 |
-
extra_sympy_mappings : dict[str, Callable], default=
|
| 466 |
Provides mappings between custom :param`binary_operators` or
|
| 467 |
:param`unary_operators` defined in julia strings, to those same
|
| 468 |
operators defined in sympy.
|
|
@@ -470,13 +470,13 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
| 470 |
model to be export to sympy, :param`extra_sympy_mappings`
|
| 471 |
would be `{"inv": lambda x: 1/x}`.
|
| 472 |
|
| 473 |
-
extra_jax_mappings : dict[Callable, str], default=
|
| 474 |
Similar to :param`extra_sympy_mappings` but for model export
|
| 475 |
to jax. The dictionary maps sympy functions to jax functions.
|
| 476 |
For example: `extra_jax_mappings={sympy.sin: "jnp.sin"}` maps
|
| 477 |
the `sympy.sin` function to the equivalent jax expression `jnp.sin`.
|
| 478 |
|
| 479 |
-
extra_torch_mappings : dict[Callable, Callable], default=
|
| 480 |
The same as :param`extra_jax_mappings` but for model export
|
| 481 |
to pytorch. Note that the dictionary keys should be callable
|
| 482 |
pytorch expressions.
|
|
@@ -571,13 +571,8 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
| 571 |
self,
|
| 572 |
model_selection="best",
|
| 573 |
*,
|
| 574 |
-
binary_operators=
|
| 575 |
-
|
| 576 |
-
"-",
|
| 577 |
-
"*",
|
| 578 |
-
"/",
|
| 579 |
-
],
|
| 580 |
-
unary_operators=[],
|
| 581 |
niterations=40,
|
| 582 |
populations=15,
|
| 583 |
population_size=33,
|
|
@@ -586,7 +581,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
| 586 |
maxdepth=None,
|
| 587 |
warmup_maxsize_by=0.0,
|
| 588 |
timeout_in_seconds=None,
|
| 589 |
-
constraints=
|
| 590 |
nested_constraints=None,
|
| 591 |
loss="L2DistLoss()",
|
| 592 |
complexity_of_operators=None,
|
|
@@ -640,9 +635,9 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
| 640 |
update=True,
|
| 641 |
output_jax_format=False,
|
| 642 |
output_torch_format=False,
|
| 643 |
-
extra_sympy_mappings=
|
| 644 |
-
extra_torch_mappings=
|
| 645 |
-
extra_jax_mappings=
|
| 646 |
denoise=False,
|
| 647 |
select_k_features=None,
|
| 648 |
**kwargs,
|
|
@@ -888,6 +883,14 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
| 888 |
|
| 889 |
"""
|
| 890 |
# Handle None values for instance parameters:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 891 |
if self.multithreading is None:
|
| 892 |
# Default is multithreading=True, unless explicitly set,
|
| 893 |
# or procs is set to 0 (serial mode).
|
|
@@ -1018,11 +1021,12 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
| 1018 |
|
| 1019 |
"""
|
| 1020 |
if isinstance(X, pd.DataFrame):
|
| 1021 |
-
variable_names
|
| 1022 |
-
|
| 1023 |
-
|
| 1024 |
-
|
| 1025 |
-
|
|
|
|
| 1026 |
|
| 1027 |
if X.columns.is_object() and X.columns.str.contains(" ").any():
|
| 1028 |
X.columns = X.columns.str.replace(" ", "_")
|
|
@@ -1395,7 +1399,6 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
| 1395 |
self : object
|
| 1396 |
Fitted Estimator.
|
| 1397 |
"""
|
| 1398 |
-
|
| 1399 |
# Init attributes that are not specified in BaseEstimator
|
| 1400 |
self.equations_ = None
|
| 1401 |
self.nout_ = 1
|
|
@@ -1482,14 +1485,35 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
| 1482 |
ValueError
|
| 1483 |
Raises if the `best_equation` cannot be evaluated.
|
| 1484 |
"""
|
| 1485 |
-
check_is_fitted(self)
|
| 1486 |
-
|
| 1487 |
-
|
| 1488 |
-
|
| 1489 |
-
|
| 1490 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1491 |
|
| 1492 |
X = self._validate_data(X, reset=False)
|
|
|
|
| 1493 |
try:
|
| 1494 |
if self.nout_ > 1:
|
| 1495 |
return np.stack(
|
|
@@ -1685,8 +1709,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
| 1685 |
}
|
| 1686 |
|
| 1687 |
sympy_symbols = [
|
| 1688 |
-
sympy.Symbol(self.feature_names_in_
|
| 1689 |
-
for i in range(self.n_features_in_)
|
| 1690 |
]
|
| 1691 |
|
| 1692 |
for _, eqn_row in output.iterrows():
|
|
|
|
| 190 |
binary_operators : list[str], default=["+", "-", "*", "/"]
|
| 191 |
List of strings giving the binary operators in Julia's Base.
|
| 192 |
|
| 193 |
+
unary_operators : list[str], default=None
|
| 194 |
Same as :param`binary_operators` but for operators taking a
|
| 195 |
single scalar.
|
| 196 |
|
|
|
|
| 226 |
timeout_in_seconds : float, default=None
|
| 227 |
Make the search return early once this many seconds have passed.
|
| 228 |
|
| 229 |
+
constraints : dict[str, int | tuple[int,int]], default=None
|
| 230 |
Dictionary of int (unary) or 2-tuples (binary), this enforces
|
| 231 |
maxsize constraints on the individual arguments of operators.
|
| 232 |
E.g., `'pow': (-1, 1)` says that power laws can have any
|
|
|
|
| 462 |
Whether to create a 'torch_format' column in the output,
|
| 463 |
containing a torch module with trainable parameters.
|
| 464 |
|
| 465 |
+
extra_sympy_mappings : dict[str, Callable], default=None
|
| 466 |
Provides mappings between custom :param`binary_operators` or
|
| 467 |
:param`unary_operators` defined in julia strings, to those same
|
| 468 |
operators defined in sympy.
|
|
|
|
| 470 |
model to be export to sympy, :param`extra_sympy_mappings`
|
| 471 |
would be `{"inv": lambda x: 1/x}`.
|
| 472 |
|
| 473 |
+
extra_jax_mappings : dict[Callable, str], default=None
|
| 474 |
Similar to :param`extra_sympy_mappings` but for model export
|
| 475 |
to jax. The dictionary maps sympy functions to jax functions.
|
| 476 |
For example: `extra_jax_mappings={sympy.sin: "jnp.sin"}` maps
|
| 477 |
the `sympy.sin` function to the equivalent jax expression `jnp.sin`.
|
| 478 |
|
| 479 |
+
extra_torch_mappings : dict[Callable, Callable], default=None
|
| 480 |
The same as :param`extra_jax_mappings` but for model export
|
| 481 |
to pytorch. Note that the dictionary keys should be callable
|
| 482 |
pytorch expressions.
|
|
|
|
| 571 |
self,
|
| 572 |
model_selection="best",
|
| 573 |
*,
|
| 574 |
+
binary_operators=None,
|
| 575 |
+
unary_operators=None,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 576 |
niterations=40,
|
| 577 |
populations=15,
|
| 578 |
population_size=33,
|
|
|
|
| 581 |
maxdepth=None,
|
| 582 |
warmup_maxsize_by=0.0,
|
| 583 |
timeout_in_seconds=None,
|
| 584 |
+
constraints=None,
|
| 585 |
nested_constraints=None,
|
| 586 |
loss="L2DistLoss()",
|
| 587 |
complexity_of_operators=None,
|
|
|
|
| 635 |
update=True,
|
| 636 |
output_jax_format=False,
|
| 637 |
output_torch_format=False,
|
| 638 |
+
extra_sympy_mappings=None,
|
| 639 |
+
extra_torch_mappings=None,
|
| 640 |
+
extra_jax_mappings=None,
|
| 641 |
denoise=False,
|
| 642 |
select_k_features=None,
|
| 643 |
**kwargs,
|
|
|
|
| 883 |
|
| 884 |
"""
|
| 885 |
# Handle None values for instance parameters:
|
| 886 |
+
if self.binary_operators is None:
|
| 887 |
+
self.binary_operators = "+ * - /".split(" ")
|
| 888 |
+
if self.unary_operators is None:
|
| 889 |
+
self.unary_operators = []
|
| 890 |
+
if self.extra_sympy_mappings is None:
|
| 891 |
+
self.extra_sympy_mappings = {}
|
| 892 |
+
if self.constraints is None:
|
| 893 |
+
self.constraints = {}
|
| 894 |
if self.multithreading is None:
|
| 895 |
# Default is multithreading=True, unless explicitly set,
|
| 896 |
# or procs is set to 0 (serial mode).
|
|
|
|
| 1021 |
|
| 1022 |
"""
|
| 1023 |
if isinstance(X, pd.DataFrame):
|
| 1024 |
+
if variable_names:
|
| 1025 |
+
variable_names = None
|
| 1026 |
+
warnings.warn(
|
| 1027 |
+
":param`variable_names` has been reset to `None` as `X` is a DataFrame. "
|
| 1028 |
+
"Will use DataFrame column names instead."
|
| 1029 |
+
)
|
| 1030 |
|
| 1031 |
if X.columns.is_object() and X.columns.str.contains(" ").any():
|
| 1032 |
X.columns = X.columns.str.replace(" ", "_")
|
|
|
|
| 1399 |
self : object
|
| 1400 |
Fitted Estimator.
|
| 1401 |
"""
|
|
|
|
| 1402 |
# Init attributes that are not specified in BaseEstimator
|
| 1403 |
self.equations_ = None
|
| 1404 |
self.nout_ = 1
|
|
|
|
| 1485 |
ValueError
|
| 1486 |
Raises if the `best_equation` cannot be evaluated.
|
| 1487 |
"""
|
| 1488 |
+
check_is_fitted(self, attributes=["equations_", "feature_names_in_"])
|
| 1489 |
+
|
| 1490 |
+
# When X is an numpy array or a pandas dataframe with a RangeIndex,
|
| 1491 |
+
# the self.feature_names_in_ generated during fit, for the same X,
|
| 1492 |
+
# will cause a warning to be thrown during _validate_data.
|
| 1493 |
+
# To avoid this, convert X to a dataframe, apply the selection mask,
|
| 1494 |
+
# and then set the column/feature_names of X to be equal to those
|
| 1495 |
+
# generated during fit.
|
| 1496 |
+
if isinstance(X, np.ndarray):
|
| 1497 |
+
X = pd.DataFrame(X)
|
| 1498 |
+
|
| 1499 |
+
if isinstance(X.columns, pd.RangeIndex):
|
| 1500 |
+
if self.selection_mask_:
|
| 1501 |
+
# RangeIndex enforces column order allowing columns to
|
| 1502 |
+
# be correctly filtered with self.selection_mask_
|
| 1503 |
+
X = X.iloc[:, self.selection_mask_]
|
| 1504 |
+
X.columns = self.feature_names_in_
|
| 1505 |
+
|
| 1506 |
+
# Without feature information, CallableEquation/lambda_format equations
|
| 1507 |
+
# require that the column order of X matches that of the X used during
|
| 1508 |
+
# the fitting process. _validate_data removes this feature information
|
| 1509 |
+
# when it converts the dataframe to an np array. Thus, to ensure feature
|
| 1510 |
+
# order is preserved after conversion, the dataframe columns must be
|
| 1511 |
+
# reordered/reindexed to match those of the transformed (denoised and
|
| 1512 |
+
# feature selected) X in fit.
|
| 1513 |
+
X = X.reindex(columns=self.feature_names_in_)
|
| 1514 |
|
| 1515 |
X = self._validate_data(X, reset=False)
|
| 1516 |
+
|
| 1517 |
try:
|
| 1518 |
if self.nout_ > 1:
|
| 1519 |
return np.stack(
|
|
|
|
| 1709 |
}
|
| 1710 |
|
| 1711 |
sympy_symbols = [
|
| 1712 |
+
sympy.Symbol(variable) for variable in self.feature_names_in_
|
|
|
|
| 1713 |
]
|
| 1714 |
|
| 1715 |
for _, eqn_row in output.iterrows():
|