Spaces:
Running
Running
Commit
·
ccf71e9
1
Parent(s):
93cf05b
`load` function to init model from saved equations
Browse files- pysr/__init__.py +1 -0
- pysr/sr.py +74 -0
- test/test.py +27 -1
pysr/__init__.py
CHANGED
|
@@ -6,6 +6,7 @@ from .sr import (
|
|
| 6 |
best_tex,
|
| 7 |
best_callable,
|
| 8 |
best_row,
|
|
|
|
| 9 |
)
|
| 10 |
from .julia_helpers import install
|
| 11 |
from .feynman_problems import Problem, FeynmanProblem
|
|
|
|
| 6 |
best_tex,
|
| 7 |
best_callable,
|
| 8 |
best_row,
|
| 9 |
+
load,
|
| 10 |
)
|
| 11 |
from .julia_helpers import install
|
| 12 |
from .feynman_problems import Problem, FeynmanProblem
|
pysr/sr.py
CHANGED
|
@@ -2034,3 +2034,77 @@ def run_feature_selection(X, y, select_k_features, random_state=None):
|
|
| 2034 |
clf, threshold=-np.inf, max_features=select_k_features, prefit=True
|
| 2035 |
)
|
| 2036 |
return selector.get_support(indices=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2034 |
clf, threshold=-np.inf, max_features=select_k_features, prefit=True
|
| 2035 |
)
|
| 2036 |
return selector.get_support(indices=True)
|
| 2037 |
+
|
| 2038 |
+
|
| 2039 |
+
def load(
|
| 2040 |
+
equation_file,
|
| 2041 |
+
*,
|
| 2042 |
+
binary_operators,
|
| 2043 |
+
unary_operators,
|
| 2044 |
+
n_features_in,
|
| 2045 |
+
feature_names_in=None,
|
| 2046 |
+
selection_mask=None,
|
| 2047 |
+
nout=1,
|
| 2048 |
+
**pysr_kwargs,
|
| 2049 |
+
):
|
| 2050 |
+
"""
|
| 2051 |
+
Create a model from equations stored as a csv file
|
| 2052 |
+
|
| 2053 |
+
Parameters
|
| 2054 |
+
----------
|
| 2055 |
+
equation_file : str
|
| 2056 |
+
Path to a csv file containing equations.
|
| 2057 |
+
|
| 2058 |
+
binary_operators : list[str], default=["+", "-", "*", "/"]
|
| 2059 |
+
The same binary operators used when creating the model.
|
| 2060 |
+
|
| 2061 |
+
unary_operators : list[str], default=None
|
| 2062 |
+
The same unary operators used when creating the model.
|
| 2063 |
+
|
| 2064 |
+
n_features_in : int
|
| 2065 |
+
Number of features passed to the model.
|
| 2066 |
+
|
| 2067 |
+
feature_names_in : list[str], default=None
|
| 2068 |
+
Names of the features passed to the model.
|
| 2069 |
+
|
| 2070 |
+
selection_mask : list[bool], default=None
|
| 2071 |
+
If using select_k_features, you must pass `model.selection_mask_` here.
|
| 2072 |
+
|
| 2073 |
+
nout : int, default=1
|
| 2074 |
+
Number of outputs of the model.
|
| 2075 |
+
|
| 2076 |
+
pysr_kwargs : dict
|
| 2077 |
+
Any other keyword arguments to initialize the PySRRegressor object.
|
| 2078 |
+
|
| 2079 |
+
Returns
|
| 2080 |
+
-------
|
| 2081 |
+
model : PySRRegressor
|
| 2082 |
+
The model with fitted equations.
|
| 2083 |
+
"""
|
| 2084 |
+
|
| 2085 |
+
# TODO: copy .bkup file if exists.
|
| 2086 |
+
model = PySRRegressor(
|
| 2087 |
+
equation_file=equation_file,
|
| 2088 |
+
binary_operators=binary_operators,
|
| 2089 |
+
unary_operators=unary_operators,
|
| 2090 |
+
**pysr_kwargs,
|
| 2091 |
+
)
|
| 2092 |
+
|
| 2093 |
+
model.equation_file_ = equation_file
|
| 2094 |
+
model.nout_ = nout
|
| 2095 |
+
model.n_features_in_ = n_features_in
|
| 2096 |
+
|
| 2097 |
+
if feature_names_in is None:
|
| 2098 |
+
model.feature_names_in_ = [f"x{i}" for i in range(n_features_in)]
|
| 2099 |
+
else:
|
| 2100 |
+
assert len(feature_names_in) == n_features_in
|
| 2101 |
+
model.feature_names_in_ = feature_names_in
|
| 2102 |
+
|
| 2103 |
+
if selection_mask is None:
|
| 2104 |
+
model.selection_mask_ = np.ones(n_features_in, dtype=bool)
|
| 2105 |
+
else:
|
| 2106 |
+
model.selection_mask_ = selection_mask
|
| 2107 |
+
|
| 2108 |
+
model.refresh()
|
| 2109 |
+
|
| 2110 |
+
return model
|
test/test.py
CHANGED
|
@@ -4,7 +4,7 @@ import inspect
|
|
| 4 |
import unittest
|
| 5 |
import numpy as np
|
| 6 |
from sklearn import model_selection
|
| 7 |
-
from pysr import PySRRegressor
|
| 8 |
from pysr.sr import run_feature_selection, _handle_feature_selection
|
| 9 |
from sklearn.utils.estimator_checks import check_estimator
|
| 10 |
import sympy
|
|
@@ -280,6 +280,32 @@ class TestPipeline(unittest.TestCase):
|
|
| 280 |
model.fit(X.values, y.values, Xresampled=Xresampled.values)
|
| 281 |
self.assertLess(np.average((model.predict(X.values) - y.values) ** 2), 1e-4)
|
| 282 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
|
| 284 |
class TestBest(unittest.TestCase):
|
| 285 |
def setUp(self):
|
|
|
|
| 4 |
import unittest
|
| 5 |
import numpy as np
|
| 6 |
from sklearn import model_selection
|
| 7 |
+
from pysr import PySRRegressor, load
|
| 8 |
from pysr.sr import run_feature_selection, _handle_feature_selection
|
| 9 |
from sklearn.utils.estimator_checks import check_estimator
|
| 10 |
import sympy
|
|
|
|
| 280 |
model.fit(X.values, y.values, Xresampled=Xresampled.values)
|
| 281 |
self.assertLess(np.average((model.predict(X.values) - y.values) ** 2), 1e-4)
|
| 282 |
|
| 283 |
+
def test_load_model(self):
|
| 284 |
+
"""See if we can load a ran model from the equation file."""
|
| 285 |
+
csv_file_data = """
|
| 286 |
+
Complexity|MSE|Equation
|
| 287 |
+
1|0.19951081|1.9762075
|
| 288 |
+
3|0.12717344|(f0 + 1.4724599)
|
| 289 |
+
4|0.104823045|pow_abs(2.2683423, cos(f3))"""
|
| 290 |
+
# Strip the indents:
|
| 291 |
+
csv_file_data = "\n".join([l.strip() for l in csv_file_data.split("\n")])
|
| 292 |
+
with open("equation_file.csv", "w") as f:
|
| 293 |
+
f.write(csv_file_data)
|
| 294 |
+
with open("equation_file.csv.bkup", "w") as f:
|
| 295 |
+
f.write(csv_file_data)
|
| 296 |
+
model = load(
|
| 297 |
+
"equation_file.csv",
|
| 298 |
+
n_features_in=5,
|
| 299 |
+
feature_names_in=["f0", "f1", "f2", "f3", "f4"],
|
| 300 |
+
binary_operators=["+", "*", "/", "-", "^"],
|
| 301 |
+
unary_operators=["cos"],
|
| 302 |
+
)
|
| 303 |
+
X = self.rstate.rand(100, 5)
|
| 304 |
+
y_truth = 2.2683423 ** np.cos(X[:, 3])
|
| 305 |
+
y_test = model.predict(X, 2)
|
| 306 |
+
|
| 307 |
+
np.testing.assert_allclose(y_truth, y_test)
|
| 308 |
+
|
| 309 |
|
| 310 |
class TestBest(unittest.TestCase):
|
| 311 |
def setUp(self):
|