Spaces:
Build error
Build error
| from dataclasses import dataclass, field | |
| from typing import Callable, Optional | |
| import gradio as gr | |
| import numpy as np | |
| from tqdm import tqdm | |
| from .activation import Activation | |
| from .loss import Loss, LogitsLoss | |
| DTYPE = np.float32 | |
| class NN: | |
| epochs: int | |
| learning_rate: float | |
| hidden_size: int | |
| input_size: int | |
| batch_size: float | |
| output_size: int | |
| hidden_activation_fn: Activation | |
| output_activation_fn: Activation | |
| loss_fn: Loss | LogitsLoss | |
| seed: int | |
| _gradio_app: bool = False | |
| _p_bar: Optional[tqdm | gr.Progress] = field( | |
| default_factory=lambda: None, init=False | |
| ) | |
| _forward_fn: Optional[Callable] = field(default_factory=lambda: None, init=False) | |
| _loss_history: list = field(default_factory=lambda: [], init=False) | |
| _wo: np.ndarray = field(default_factory=lambda: np.ndarray([]), init=False) | |
| _wh: np.ndarray = field(default_factory=lambda: np.ndarray([]), init=False) | |
| _bo: np.ndarray = field(default_factory=lambda: np.ndarray([]), init=False) | |
| _bh: np.ndarray = field(default_factory=lambda: np.ndarray([]), init=False) | |
| def __post_init__(self) -> None: | |
| self._init_weights_and_biases() | |
| self._forward_fn, self._p_bar = self._pre_train() | |
| assert 0 < self.batch_size <= 1 | |
| assert self._forward_fn is not None | |
| assert self._p_bar is not None | |
| def _pre_train(self) -> tuple[Callable, tqdm | gr.Progress]: | |
| def _get_forward_fn() -> Callable: | |
| if isinstance(self.loss_fn, LogitsLoss): | |
| return self._forward_logits | |
| return self._forward | |
| def _get_p_bar() -> tqdm | gr.Progress: | |
| if self._gradio_app: | |
| return gr.Progress().tqdm(range(self.epochs)) | |
| return tqdm(range(self.epochs), unit="epoch", ascii=" >=") | |
| return ( | |
| _get_forward_fn(), | |
| _get_p_bar(), | |
| ) | |
| def from_dict(cls, args: dict) -> "NN": | |
| return cls(**args) | |
| def _init_weights_and_biases(self) -> None: | |
| """ | |
| NN._init_weights_and_biases(): Should only be ran once, right before training loop | |
| in order to initialize the weights and biases randomly. | |
| params: | |
| NN object with hidden layer size, output size, and input size | |
| defined. | |
| returns: | |
| self, modifies _bh, _bo, _wo, _wh NN attributes in place. | |
| """ | |
| np.random.seed(self.seed) | |
| self._bh = np.zeros((1, self.hidden_size), dtype=DTYPE) | |
| self._bo = np.zeros((1, self.output_size), dtype=DTYPE) | |
| self._wh = np.asarray( | |
| np.random.randn(self.input_size, self.hidden_size) | |
| * np.sqrt(2 / self.input_size), | |
| dtype=DTYPE, | |
| ) | |
| self._wo = np.asarray( | |
| np.random.randn(self.hidden_size, self.output_size) | |
| * np.sqrt(2 / self.hidden_size), | |
| dtype=DTYPE, | |
| ) | |
| def _forward(self, X_train: np.ndarray) -> tuple[np.ndarray, np.ndarray]: | |
| hidden_layer_output = self.hidden_activation_fn.forward( | |
| np.dot(X_train, self._wh) + self._bh | |
| ) | |
| # Compute the output layer (prediction layer) using the specified activation function | |
| y_hat = self.output_activation_fn.forward( | |
| np.dot(hidden_layer_output, self._wo) + self._bo | |
| ) | |
| return y_hat, hidden_layer_output | |
| def _forward_logits(self, X_train: np.ndarray) -> tuple[np.ndarray, np.ndarray]: | |
| hidden_layer_output = self.hidden_activation_fn.forward( | |
| np.dot(X_train, self._wh) + self._bh, | |
| ) | |
| # output layer does not apply softmax like other forward function, just return logits | |
| logits = np.dot(hidden_layer_output, self._wo) + self._bo | |
| return logits, hidden_layer_output | |
| def _backward( | |
| self, | |
| X_train: np.ndarray, | |
| y_hat: np.ndarray, | |
| y_train: np.ndarray, | |
| hidden_output: np.ndarray, | |
| ) -> None: | |
| assert self._wo is not None | |
| # Calculate the error at the output | |
| # This should be the derivative of the loss function with respect to the output of the network | |
| error_output = self.loss_fn.backward(y_hat, y_train) | |
| # Calculate gradients for output layer weights and biases | |
| wo_prime = np.dot(hidden_output.T, error_output) * self.learning_rate | |
| bo_prime = np.sum(error_output, axis=0, keepdims=True) * self.learning_rate | |
| # Propagate the error back to the hidden layer | |
| error_hidden = np.dot( | |
| error_output, self._wo.T | |
| ) * self.output_activation_fn.backward(hidden_output) | |
| # Calculate gradients for hidden layer weights and biases | |
| wh_prime = np.dot(X_train.T, error_hidden) * self.learning_rate | |
| bh_prime = np.sum(error_hidden, axis=0, keepdims=True) * self.learning_rate | |
| # Gradient clipping to prevent overflow | |
| max_norm = 1.0 # this is an adjustable threshold | |
| wo_prime = np.clip(wo_prime, -max_norm, max_norm) | |
| bo_prime = np.clip(bo_prime, -max_norm, max_norm) | |
| wh_prime = np.clip(wh_prime, -max_norm, max_norm) | |
| bh_prime = np.clip(bh_prime, -max_norm, max_norm) | |
| # Update weights and biases | |
| self._wo -= wo_prime | |
| self._wh -= wh_prime | |
| self._bo -= bo_prime | |
| self._bh -= bh_prime | |
| def train(self, X_train: np.ndarray, y_train: np.ndarray) -> "NN": | |
| assert self._p_bar is not None | |
| assert self._forward_fn is not None | |
| for _ in self._p_bar: | |
| n_samples = int(self.batch_size * X_train.shape[0]) | |
| batch_indeces = np.random.choice( | |
| X_train.shape[0], size=n_samples, replace=False | |
| ) | |
| X_train_batch = X_train[batch_indeces] | |
| y_train_batch = y_train[batch_indeces] | |
| y_hat, hidden_output = self._forward_fn(X_train=X_train_batch) | |
| loss = self.loss_fn.forward(y_hat=y_hat, y_true=y_train_batch) | |
| self._loss_history.append(loss) | |
| self._backward( | |
| X_train=X_train_batch, | |
| y_hat=y_hat, | |
| y_train=y_train_batch, | |
| hidden_output=hidden_output, | |
| ) | |
| return self | |
| def predict(self, X_test: np.ndarray) -> np.ndarray: | |
| assert self._forward_fn is not None | |
| pred, _ = self._forward_fn(X_test) | |
| return self.output_activation_fn.forward(pred) | |