Spaces:
Running
Running
tttc3
commited on
Commit
·
3e8d44d
1
Parent(s):
3821242
Add warm_start
Browse files- pysr/sr.py +35 -47
pysr/sr.py
CHANGED
|
@@ -177,7 +177,7 @@ def best_callable(*args, **kwargs): # pragma: no cover
|
|
| 177 |
VALID_OPTIMIZER_ALGORITHMS = ["NelderMead", "BFGS"]
|
| 178 |
|
| 179 |
|
| 180 |
-
class PySRRegressor(
|
| 181 |
"""
|
| 182 |
High-performance symbolic regression.
|
| 183 |
|
|
@@ -431,6 +431,10 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
| 431 |
Pass an int for reproducible results across multiple function calls.
|
| 432 |
See :term:`Glossary <random_state>`.
|
| 433 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 434 |
verbosity : int, default=1e9
|
| 435 |
What verbosity level to use. 0 means minimal print statements.
|
| 436 |
|
|
@@ -633,6 +637,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
| 633 |
fast_cycle=False,
|
| 634 |
precision=32,
|
| 635 |
random_state=None,
|
|
|
|
| 636 |
verbosity=1e9,
|
| 637 |
update_verbosity=None,
|
| 638 |
progress=True,
|
|
@@ -717,6 +722,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
| 717 |
self.fast_cycle = fast_cycle
|
| 718 |
self.precision = precision
|
| 719 |
self.random_state = random_state
|
|
|
|
| 720 |
# Additional runtime parameters
|
| 721 |
# - Runtime user interface
|
| 722 |
self.verbosity = verbosity
|
|
@@ -914,8 +920,11 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
| 914 |
if self.temp_equation_file:
|
| 915 |
self.equation_file_ = self.tempdir_ / "hall_of_fame.csv"
|
| 916 |
elif self.equation_file is None:
|
| 917 |
-
|
| 918 |
-
|
|
|
|
|
|
|
|
|
|
| 919 |
else:
|
| 920 |
self.equation_file_ = self.equation_file
|
| 921 |
|
|
@@ -1433,10 +1442,13 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
| 1433 |
Fitted Estimator.
|
| 1434 |
"""
|
| 1435 |
# Init attributes that are not specified in BaseEstimator
|
| 1436 |
-
self.
|
| 1437 |
-
|
| 1438 |
-
|
| 1439 |
-
|
|
|
|
|
|
|
|
|
|
| 1440 |
|
| 1441 |
random_state = check_random_state(self.random_state) # For np random
|
| 1442 |
seed = random_state.get_state()[1][0] # For julia random
|
|
@@ -1510,31 +1522,35 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
| 1510 |
self.equation_file_ = checkpoint_file
|
| 1511 |
self.equations_ = self.get_hof()
|
| 1512 |
|
| 1513 |
-
def
|
| 1514 |
"""
|
| 1515 |
-
|
| 1516 |
-
|
|
|
|
|
|
|
| 1517 |
|
| 1518 |
Parameters
|
| 1519 |
----------
|
| 1520 |
X : {ndarray | pandas.DataFrame} of shape (n_samples, n_features)
|
| 1521 |
-
|
| 1522 |
|
| 1523 |
-
|
| 1524 |
-
|
|
|
|
| 1525 |
|
| 1526 |
Returns
|
| 1527 |
-------
|
| 1528 |
-
y_predicted : ndarray of shape (n_samples,
|
| 1529 |
-
Values predicted by substituting `X` into the
|
| 1530 |
-
|
| 1531 |
|
| 1532 |
Raises
|
| 1533 |
------
|
| 1534 |
ValueError
|
| 1535 |
Raises if the `best_equation` cannot be evaluated.
|
| 1536 |
"""
|
| 1537 |
-
|
|
|
|
| 1538 |
|
| 1539 |
# When X is an numpy array or a pandas dataframe with a RangeIndex,
|
| 1540 |
# the self.feature_names_in_ generated during fit, for the same X,
|
|
@@ -1542,16 +1558,15 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
| 1542 |
# To avoid this, convert X to a dataframe, apply the selection mask,
|
| 1543 |
# and then set the column/feature_names of X to be equal to those
|
| 1544 |
# generated during fit.
|
| 1545 |
-
if isinstance(X,
|
|
|
|
| 1546 |
X = pd.DataFrame(X)
|
| 1547 |
-
|
| 1548 |
if isinstance(X.columns, pd.RangeIndex):
|
| 1549 |
if self.selection_mask_ is not None:
|
| 1550 |
# RangeIndex enforces column order allowing columns to
|
| 1551 |
# be correctly filtered with self.selection_mask_
|
| 1552 |
X = X.iloc[:, self.selection_mask_]
|
| 1553 |
X.columns = self.feature_names_in_
|
| 1554 |
-
|
| 1555 |
# Without feature information, CallableEquation/lambda_format equations
|
| 1556 |
# require that the column order of X matches that of the X used during
|
| 1557 |
# the fitting process. _validate_data removes this feature information
|
|
@@ -1560,7 +1575,6 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
| 1560 |
# reordered/reindexed to match those of the transformed (denoised and
|
| 1561 |
# feature selected) X in fit.
|
| 1562 |
X = X.reindex(columns=self.feature_names_in_)
|
| 1563 |
-
|
| 1564 |
X = self._validate_data(X, reset=False)
|
| 1565 |
|
| 1566 |
try:
|
|
@@ -1576,32 +1590,6 @@ class PySRRegressor(BaseEstimator, RegressorMixin, MultiOutputMixin):
|
|
| 1576 |
"e.g., `model.set_params(extra_sympy_mappings={'inv': lambda x: 1 / x})`."
|
| 1577 |
) from error
|
| 1578 |
|
| 1579 |
-
def predict(self, X, index=None):
|
| 1580 |
-
"""
|
| 1581 |
-
Predict y from input X using the equation chosen by `model_selection`.
|
| 1582 |
-
|
| 1583 |
-
You may see what equation is used by printing this object. X should
|
| 1584 |
-
have the same columns as the training data.
|
| 1585 |
-
|
| 1586 |
-
Parameters
|
| 1587 |
-
----------
|
| 1588 |
-
X : {ndarray | pandas.DataFrame} of shape (n_samples, n_features)
|
| 1589 |
-
Training data.
|
| 1590 |
-
|
| 1591 |
-
index : int, default=None
|
| 1592 |
-
If you want to compute the output of an expression using a
|
| 1593 |
-
particular row of `self.equations_`, you may specify the index here.
|
| 1594 |
-
|
| 1595 |
-
Returns
|
| 1596 |
-
-------
|
| 1597 |
-
y_predicted : ndarray of shape (n_samples, nout_)
|
| 1598 |
-
Values predicted by substituting `X` into the fitted symbolic
|
| 1599 |
-
regression model.
|
| 1600 |
-
"""
|
| 1601 |
-
self.refresh()
|
| 1602 |
-
best_equation = self.get_best(index=index)
|
| 1603 |
-
return self._decision_function(X, best_equation)
|
| 1604 |
-
|
| 1605 |
def sympy(self, index=None):
|
| 1606 |
"""
|
| 1607 |
Return sympy representation of the equation(s) chosen by `model_selection`.
|
|
|
|
| 177 |
VALID_OPTIMIZER_ALGORITHMS = ["NelderMead", "BFGS"]
|
| 178 |
|
| 179 |
|
| 180 |
+
class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
| 181 |
"""
|
| 182 |
High-performance symbolic regression.
|
| 183 |
|
|
|
|
| 431 |
Pass an int for reproducible results across multiple function calls.
|
| 432 |
See :term:`Glossary <random_state>`.
|
| 433 |
|
| 434 |
+
warm_start : bool, default=False
|
| 435 |
+
Tells fit to continue from where the last call to fit finished.
|
| 436 |
+
If false, each call to fit will be fresh, overwriting previous results.
|
| 437 |
+
|
| 438 |
verbosity : int, default=1e9
|
| 439 |
What verbosity level to use. 0 means minimal print statements.
|
| 440 |
|
|
|
|
| 637 |
fast_cycle=False,
|
| 638 |
precision=32,
|
| 639 |
random_state=None,
|
| 640 |
+
warm_start=False,
|
| 641 |
verbosity=1e9,
|
| 642 |
update_verbosity=None,
|
| 643 |
progress=True,
|
|
|
|
| 722 |
self.fast_cycle = fast_cycle
|
| 723 |
self.precision = precision
|
| 724 |
self.random_state = random_state
|
| 725 |
+
self.warm_start = warm_start
|
| 726 |
# Additional runtime parameters
|
| 727 |
# - Runtime user interface
|
| 728 |
self.verbosity = verbosity
|
|
|
|
| 920 |
if self.temp_equation_file:
|
| 921 |
self.equation_file_ = self.tempdir_ / "hall_of_fame.csv"
|
| 922 |
elif self.equation_file is None:
|
| 923 |
+
if self.warm_start and self.equation_file_:
|
| 924 |
+
pass
|
| 925 |
+
else:
|
| 926 |
+
date_time = datetime.now().strftime("%Y-%m-%d_%H%M%S.%f")[:-3]
|
| 927 |
+
self.equation_file_ = "hall_of_fame_" + date_time + ".csv"
|
| 928 |
else:
|
| 929 |
self.equation_file_ = self.equation_file
|
| 930 |
|
|
|
|
| 1442 |
Fitted Estimator.
|
| 1443 |
"""
|
| 1444 |
# Init attributes that are not specified in BaseEstimator
|
| 1445 |
+
if self.warm_start and hasattr(self, "raw_julia_state_"):
|
| 1446 |
+
pass
|
| 1447 |
+
else:
|
| 1448 |
+
self.equations_ = None
|
| 1449 |
+
self.nout_ = 1
|
| 1450 |
+
self.selection_mask_ = None
|
| 1451 |
+
self.raw_julia_state_ = None
|
| 1452 |
|
| 1453 |
random_state = check_random_state(self.random_state) # For np random
|
| 1454 |
seed = random_state.get_state()[1][0] # For julia random
|
|
|
|
| 1522 |
self.equation_file_ = checkpoint_file
|
| 1523 |
self.equations_ = self.get_hof()
|
| 1524 |
|
| 1525 |
+
def predict(self, X, index=None):
|
| 1526 |
"""
|
| 1527 |
+
Predict y from input X using the equation chosen by `model_selection`.
|
| 1528 |
+
|
| 1529 |
+
You may see what equation is used by printing this object. X should
|
| 1530 |
+
have the same columns as the training data.
|
| 1531 |
|
| 1532 |
Parameters
|
| 1533 |
----------
|
| 1534 |
X : {ndarray | pandas.DataFrame} of shape (n_samples, n_features)
|
| 1535 |
+
Training data.
|
| 1536 |
|
| 1537 |
+
index : int, default=None
|
| 1538 |
+
If you want to compute the output of an expression using a
|
| 1539 |
+
particular row of `self.equations_`, you may specify the index here.
|
| 1540 |
|
| 1541 |
Returns
|
| 1542 |
-------
|
| 1543 |
+
y_predicted : ndarray of shape (n_samples, nout_)
|
| 1544 |
+
Values predicted by substituting `X` into the fitted symbolic
|
| 1545 |
+
regression model.
|
| 1546 |
|
| 1547 |
Raises
|
| 1548 |
------
|
| 1549 |
ValueError
|
| 1550 |
Raises if the `best_equation` cannot be evaluated.
|
| 1551 |
"""
|
| 1552 |
+
self.refresh()
|
| 1553 |
+
best_equation = self.get_best(index=index)
|
| 1554 |
|
| 1555 |
# When X is an numpy array or a pandas dataframe with a RangeIndex,
|
| 1556 |
# the self.feature_names_in_ generated during fit, for the same X,
|
|
|
|
| 1558 |
# To avoid this, convert X to a dataframe, apply the selection mask,
|
| 1559 |
# and then set the column/feature_names of X to be equal to those
|
| 1560 |
# generated during fit.
|
| 1561 |
+
if not isinstance(X, pd.DataFrame):
|
| 1562 |
+
X = check_array(X)
|
| 1563 |
X = pd.DataFrame(X)
|
|
|
|
| 1564 |
if isinstance(X.columns, pd.RangeIndex):
|
| 1565 |
if self.selection_mask_ is not None:
|
| 1566 |
# RangeIndex enforces column order allowing columns to
|
| 1567 |
# be correctly filtered with self.selection_mask_
|
| 1568 |
X = X.iloc[:, self.selection_mask_]
|
| 1569 |
X.columns = self.feature_names_in_
|
|
|
|
| 1570 |
# Without feature information, CallableEquation/lambda_format equations
|
| 1571 |
# require that the column order of X matches that of the X used during
|
| 1572 |
# the fitting process. _validate_data removes this feature information
|
|
|
|
| 1575 |
# reordered/reindexed to match those of the transformed (denoised and
|
| 1576 |
# feature selected) X in fit.
|
| 1577 |
X = X.reindex(columns=self.feature_names_in_)
|
|
|
|
| 1578 |
X = self._validate_data(X, reset=False)
|
| 1579 |
|
| 1580 |
try:
|
|
|
|
| 1590 |
"e.g., `model.set_params(extra_sympy_mappings={'inv': lambda x: 1 / x})`."
|
| 1591 |
) from error
|
| 1592 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1593 |
def sympy(self, index=None):
|
| 1594 |
"""
|
| 1595 |
Return sympy representation of the equation(s) chosen by `model_selection`.
|