Spaces:
Build error
Build error
Commit
·
9e506b7
1
Parent(s):
880505a
moving on to testing other datasets!
Browse files- .gitignore +1 -0
- app.py +1 -1
- example/main.py +5 -5
- nn/backprop.py → example/test.py +0 -0
- nn/activation.py +3 -6
- nn/nn.py +10 -3
- nn/train.py +51 -22
.gitignore
CHANGED
|
@@ -186,3 +186,4 @@ cython_debug/
|
|
| 186 |
#.idea/
|
| 187 |
|
| 188 |
*.swp
|
|
|
|
|
|
| 186 |
#.idea/
|
| 187 |
|
| 188 |
*.swp
|
| 189 |
+
.vscode
|
app.py
CHANGED
|
@@ -31,7 +31,7 @@ def neural_net():
|
|
| 31 |
|
| 32 |
try:
|
| 33 |
activation.get_activation(nn=net)
|
| 34 |
-
except Exception
|
| 35 |
return Response(
|
| 36 |
response="invalid activation function",
|
| 37 |
status=400,
|
|
|
|
| 31 |
|
| 32 |
try:
|
| 33 |
activation.get_activation(nn=net)
|
| 34 |
+
except Exception:
|
| 35 |
return Response(
|
| 36 |
response="invalid activation function",
|
| 37 |
status=400,
|
example/main.py
CHANGED
|
@@ -4,11 +4,11 @@ with open("iris.csv", "rb") as csv:
|
|
| 4 |
iris_data = csv.read()
|
| 5 |
|
| 6 |
ARGS = {
|
| 7 |
-
"epochs":
|
| 8 |
-
"hidden_size":
|
| 9 |
-
"learning_rate": 0.
|
| 10 |
-
"test_size": 0.
|
| 11 |
-
"activation": "
|
| 12 |
"features": ["sepal width", "sepal length", "petal width", "petal length"],
|
| 13 |
"target": "species",
|
| 14 |
"data": iris_data.decode("utf-8"),
|
|
|
|
| 4 |
iris_data = csv.read()
|
| 5 |
|
| 6 |
ARGS = {
|
| 7 |
+
"epochs": 10000,
|
| 8 |
+
"hidden_size": 8,
|
| 9 |
+
"learning_rate": 0.0001,
|
| 10 |
+
"test_size": 0.1,
|
| 11 |
+
"activation": "relu",
|
| 12 |
"features": ["sepal width", "sepal length", "petal width", "petal length"],
|
| 13 |
"target": "species",
|
| 14 |
"data": iris_data.decode("utf-8"),
|
nn/backprop.py → example/test.py
RENAMED
|
File without changes
|
nn/activation.py
CHANGED
|
@@ -22,14 +22,11 @@ def get_activation(nn: NN) -> Callable:
|
|
| 22 |
|
| 23 |
|
| 24 |
def relu(x):
|
| 25 |
-
return np.
|
| 26 |
|
| 27 |
|
| 28 |
def relu_prime(x):
|
| 29 |
-
|
| 30 |
-
return 1
|
| 31 |
-
else:
|
| 32 |
-
return 0
|
| 33 |
|
| 34 |
|
| 35 |
def sigmoid(x):
|
|
@@ -38,7 +35,7 @@ def sigmoid(x):
|
|
| 38 |
|
| 39 |
def sigmoid_prime(x):
|
| 40 |
s = sigmoid(x)
|
| 41 |
-
return s
|
| 42 |
|
| 43 |
|
| 44 |
def tanh(x):
|
|
|
|
| 22 |
|
| 23 |
|
| 24 |
def relu(x):
|
| 25 |
+
return np.maximum(0.0, x)
|
| 26 |
|
| 27 |
|
| 28 |
def relu_prime(x):
|
| 29 |
+
return np.maximum(0, x)
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
|
| 32 |
def sigmoid(x):
|
|
|
|
| 35 |
|
| 36 |
def sigmoid_prime(x):
|
| 37 |
s = sigmoid(x)
|
| 38 |
+
return s * (1 - s)
|
| 39 |
|
| 40 |
|
| 41 |
def tanh(x):
|
nn/nn.py
CHANGED
|
@@ -29,16 +29,23 @@ class NN:
|
|
| 29 |
self.df: pd.DataFrame = None
|
| 30 |
self.X: pd.DataFrame = None
|
| 31 |
self.y: pd.DataFrame = None
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
def set_df(self, df: pd.DataFrame) -> None:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
assert isinstance(df, pd.DataFrame)
|
| 35 |
self.df = df
|
| 36 |
-
|
| 37 |
-
y = df[self.target]
|
| 38 |
x = df[self.features]
|
| 39 |
-
self.
|
| 40 |
self.X = pd.get_dummies(x, columns=self.features)
|
| 41 |
self.input_size = len(self.X.columns)
|
|
|
|
| 42 |
|
| 43 |
def set_func(self, f: Callable) -> None:
|
| 44 |
assert isinstance(f, Callable)
|
|
|
|
| 29 |
self.df: pd.DataFrame = None
|
| 30 |
self.X: pd.DataFrame = None
|
| 31 |
self.y: pd.DataFrame = None
|
| 32 |
+
self.y_dummy: pd.DataFrame = None
|
| 33 |
+
self.input_size: int = None
|
| 34 |
+
self.output_size: int = None
|
| 35 |
|
| 36 |
def set_df(self, df: pd.DataFrame) -> None:
|
| 37 |
+
|
| 38 |
+
# issue right now here because we need a way to convert
|
| 39 |
+
# back and forth from dummies and non dummy vars
|
| 40 |
+
|
| 41 |
assert isinstance(df, pd.DataFrame)
|
| 42 |
self.df = df
|
| 43 |
+
self.y = df[self.target]
|
|
|
|
| 44 |
x = df[self.features]
|
| 45 |
+
self.y_dummy = pd.get_dummies(self.y, columns=self.target)
|
| 46 |
self.X = pd.get_dummies(x, columns=self.features)
|
| 47 |
self.input_size = len(self.X.columns)
|
| 48 |
+
self.output_size = len(self.y_dummy.columns)
|
| 49 |
|
| 50 |
def set_func(self, f: Callable) -> None:
|
| 51 |
assert isinstance(f, Callable)
|
nn/train.py
CHANGED
|
@@ -1,32 +1,36 @@
|
|
| 1 |
from sklearn.model_selection import train_test_split
|
|
|
|
| 2 |
from typing import Callable
|
| 3 |
from nn.nn import NN
|
| 4 |
import numpy as np
|
| 5 |
|
| 6 |
|
| 7 |
-
def init_weights_biases(nn: NN)
|
|
|
|
| 8 |
bh = np.zeros((1, nn.hidden_size))
|
| 9 |
-
bo = np.zeros((1,
|
| 10 |
wh = np.random.randn(nn.input_size, nn.hidden_size) * \
|
| 11 |
np.sqrt(2 / nn.input_size)
|
| 12 |
-
wo = np.random.randn(nn.hidden_size,
|
|
|
|
| 13 |
return wh, wo, bh, bo
|
| 14 |
|
| 15 |
|
| 16 |
def train(nn: NN) -> dict:
|
| 17 |
wh, wo, bh, bo = init_weights_biases(nn=nn)
|
| 18 |
X_train, X_test, y_train, y_test = train_test_split(
|
| 19 |
-
nn.X,
|
| 20 |
-
nn.
|
| 21 |
test_size=nn.test_size,
|
|
|
|
| 22 |
)
|
| 23 |
|
| 24 |
-
|
| 25 |
loss_hist: list[float] = []
|
| 26 |
for _ in range(nn.epochs):
|
| 27 |
# compute hidden output
|
| 28 |
hidden_output = compute_node(
|
| 29 |
-
data=X_train
|
| 30 |
weights=wh,
|
| 31 |
biases=bh,
|
| 32 |
func=nn.func,
|
|
@@ -41,18 +45,43 @@ def train(nn: NN) -> dict:
|
|
| 41 |
)
|
| 42 |
# compute error & store it
|
| 43 |
error = y_hat - y_train
|
| 44 |
-
mse = mean_squared_error(y_train, y_hat)
|
| 45 |
loss_hist.append(mse)
|
| 46 |
|
|
|
|
| 47 |
# update weights & biases using gradient descent after
|
| 48 |
# computing derivatives.
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
return {
|
| 54 |
-
"
|
| 55 |
-
"loss_hist": loss_hist,
|
| 56 |
}
|
| 57 |
|
| 58 |
|
|
@@ -64,17 +93,17 @@ def mean_squared_error(y: np.array, y_hat: np.array) -> np.array:
|
|
| 64 |
return np.mean((y - y_hat) ** 2)
|
| 65 |
|
| 66 |
|
| 67 |
-
def hidden_weight_prime(data, error):
|
| 68 |
-
return np.dot(data.T, error)
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
def output_weight_prime(hidden_output, error):
|
| 72 |
-
return np.dot(hidden_output.T, error)
|
| 73 |
-
|
| 74 |
-
|
| 75 |
def hidden_bias_prime(error):
|
| 76 |
return np.sum(error, axis=0)
|
| 77 |
|
| 78 |
|
| 79 |
def output_bias_prime(error):
|
| 80 |
return np.sum(error, axis=0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from sklearn.model_selection import train_test_split
|
| 2 |
+
from sklearn.metrics import log_loss, accuracy_score, f1_score
|
| 3 |
from typing import Callable
|
| 4 |
from nn.nn import NN
|
| 5 |
import numpy as np
|
| 6 |
|
| 7 |
|
| 8 |
+
def init_weights_biases(nn: NN):
|
| 9 |
+
# np.random.seed(0)
|
| 10 |
bh = np.zeros((1, nn.hidden_size))
|
| 11 |
+
bo = np.zeros((1, nn.output_size))
|
| 12 |
wh = np.random.randn(nn.input_size, nn.hidden_size) * \
|
| 13 |
np.sqrt(2 / nn.input_size)
|
| 14 |
+
wo = np.random.randn(nn.hidden_size, nn.output_size) * \
|
| 15 |
+
np.sqrt(2 / nn.hidden_size)
|
| 16 |
return wh, wo, bh, bo
|
| 17 |
|
| 18 |
|
| 19 |
def train(nn: NN) -> dict:
|
| 20 |
wh, wo, bh, bo = init_weights_biases(nn=nn)
|
| 21 |
X_train, X_test, y_train, y_test = train_test_split(
|
| 22 |
+
nn.X.to_numpy(),
|
| 23 |
+
nn.y_dummy.to_numpy(),
|
| 24 |
test_size=nn.test_size,
|
| 25 |
+
# random_state=0,
|
| 26 |
)
|
| 27 |
|
| 28 |
+
ce: float = 0.0
|
| 29 |
loss_hist: list[float] = []
|
| 30 |
for _ in range(nn.epochs):
|
| 31 |
# compute hidden output
|
| 32 |
hidden_output = compute_node(
|
| 33 |
+
data=X_train,
|
| 34 |
weights=wh,
|
| 35 |
biases=bh,
|
| 36 |
func=nn.func,
|
|
|
|
| 45 |
)
|
| 46 |
# compute error & store it
|
| 47 |
error = y_hat - y_train
|
| 48 |
+
mse = mean_squared_error(y=y_train, y_hat=y_hat)
|
| 49 |
loss_hist.append(mse)
|
| 50 |
|
| 51 |
+
# compute derivatives of weights & biases
|
| 52 |
# update weights & biases using gradient descent after
|
| 53 |
# computing derivatives.
|
| 54 |
+
dwo = nn.learning_rate * output_weight_prime(hidden_output, error)
|
| 55 |
+
|
| 56 |
+
# Use NumPy to sum along the first axis (axis=0)
|
| 57 |
+
# and then reshape to match the shape of bo
|
| 58 |
+
dbo = nn.learning_rate * np.sum(output_bias_prime(error), axis=0)
|
| 59 |
+
|
| 60 |
+
dhidden = np.dot(error, wo.T) * nn.func_prime(hidden_output)
|
| 61 |
+
dwh = nn.learning_rate * hidden_weight_prime(X_train, dhidden)
|
| 62 |
+
dbh = nn.learning_rate * hidden_bias_prime(dhidden)
|
| 63 |
+
|
| 64 |
+
wh -= dwh
|
| 65 |
+
wo -= dwo
|
| 66 |
+
bh -= dbh
|
| 67 |
+
bo -= dbo
|
| 68 |
+
|
| 69 |
+
# compute final predictions on data not seen
|
| 70 |
+
hidden_output_test = compute_node(
|
| 71 |
+
data=X_test,
|
| 72 |
+
weights=wh,
|
| 73 |
+
biases=bh,
|
| 74 |
+
func=nn.func,
|
| 75 |
+
)
|
| 76 |
+
y_hat = compute_node(
|
| 77 |
+
data=hidden_output_test,
|
| 78 |
+
weights=wo,
|
| 79 |
+
biases=bo,
|
| 80 |
+
func=nn.func,
|
| 81 |
+
)
|
| 82 |
+
|
| 83 |
return {
|
| 84 |
+
"log loss": log_loss(y_true=y_test, y_pred=y_hat)
|
|
|
|
| 85 |
}
|
| 86 |
|
| 87 |
|
|
|
|
| 93 |
return np.mean((y - y_hat) ** 2)
|
| 94 |
|
| 95 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
def hidden_bias_prime(error):
|
| 97 |
return np.sum(error, axis=0)
|
| 98 |
|
| 99 |
|
| 100 |
def output_bias_prime(error):
|
| 101 |
return np.sum(error, axis=0)
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def hidden_weight_prime(data, error):
|
| 105 |
+
return np.dot(data.T, error)
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
def output_weight_prime(hidden_output, error):
|
| 109 |
+
return np.dot(hidden_output.T, error)
|