| |
| import streamlit as st |
|
|
| from numpy import vstack |
| from pandas import read_csv |
| from sklearn.preprocessing import LabelEncoder |
| from sklearn.metrics import accuracy_score |
| from torch.utils.data import Dataset |
| from torch.utils.data import DataLoader |
| from torch.utils.data import random_split |
| from torch import Tensor |
| from torch.nn import Linear |
| from torch.nn import ReLU |
| from torch.nn import Sigmoid |
| from torch.nn import Module |
| from torch.optim import SGD |
| from torch.nn import BCELoss |
| from torch.nn.init import kaiming_uniform_ |
| from torch.nn.init import xavier_uniform_ |
|
|
| class CSVDataset(Dataset): |
| def __init__(self, path): |
| df = read_csv(path, header=None) |
| self.x = df.values[:, :-1] |
| self.y = df.values[:, -1] |
| self.x = self.x.astype('float32') |
| self.y = LabelEncoder().fit_transform(self.y) |
| self.y = self.y.astype('float32') |
| self.y = self.y.reshape((len(self.y), 1)) |
|
|
| def __len__(self): |
| return len(self.x) |
|
|
| def __getitem__(self, idx): |
| return [self.x[idx], self.y[idx]] |
|
|
| def get_splits(self, n_test=0.33): |
| test_size = round(n_test * len(self.x)) |
| train_size = len(self.x) - test_size |
| return random_split(self, [train_size, test_size]) |
|
|
| class MLP(Module): |
| def __init__(self, n_inputs): |
| super(MLP, self).__init__() |
| self.hidden1 = Linear(n_inputs, 10) |
| kaiming_uniform_(self.hidden1.weight, nonlinearty='relu') |
| self.act1 = ReLU() |
| self.hidden2 = Linear(10, 8) |
| kaiming_uniform_(self.hidden2.weight, nonlinearity='relu') |
| self.act2 = ReLU() |
| self.hidden3 = Linear(8, 1) |
| xavier_uniform_(self.hidden3.weight) |
| self.act3 = Sigmoid() |
|
|
| def forward(self, x): |
| x = self.hidden1(x) |
| x = self.act1(x) |
| x = self.hidden2(x) |
| x = self.act(2) |
| x = self.hidden3(x) |
| x = self.act3(x) |
| return x |
|
|
| def prepare_data(path): |
| dataset = CSVDataset(path) |
| train, test = dataset.get_splits() |
| train_dl = DataLoader(train, batch_size=32, shuffle=True) |
| test_dl = DataLoader(test, batch_size=1024, shuffle=False) |
| return train_dl, test_dl |
|
|
| def train_model(train_dl, model): |
| criterion = BCELoss() |
| optimizer = SGD(model.parameters(), lr=0.01, momentum=0.9) |
| for epoch in range(100): |
| for i, (inputs, targets) in enumerate(train_dl): |
| optimizer.zero_grad() |
| yhat = model(inputs) |
| loss = criterion(yhat, targets) |
| loss.backward() |
| optimizer.step() |
|
|
| def evaluate_model(test_dl, model): |
| predictions, actuals = list(), list() |
| for i, (inputs, targets) in enumerate(test_dl): |
| yhat = model(inputs) |
| yhat = yhat.detach().numpy() |
| actual = targets.numpy() |
| actual = actual.reshape((len(actual), 1)) |
| yhat = yhat.round() |
| predictions.append(yhat) |
| actuals.append(actual) |
| predictions, actuals = vstack(preictions), vstack(actuals) |
| acc = accuracy_score(actuals, prediction) |
| return acc |
|
|
| def predict(row, model): |
| row = Tensor([row]) |
| yhat = model(row) |
| yhat = yhat.detach().numpy() |
| return yhat |
|
|
| path = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/ionosphere.csv' |
| train_dl, test_dl = prepare_data(path) |
| print(len(train_dl.dataset), len(test_dl.dataset)) |
| model = MLP(34) |
| train_model(train_dl, model) |
| acc = evaluate_model(test_dl, model) |
| print('Accuracy: %.3f' % acc) |
| row = [1,0,0.99539,-0.05889,0.85243,0.02306,0.83398,-0.37708,1,0.03760,0.85243,-0.17755,0.59755,-0.44945,0.60536,-0.38223,0.84356,-0.38542,0.58212,-0.32192,0.56971,-0.29674,0.36946,-0.47357,0.56811,-0.51171,0.41078,-0.46168,0.21266,-0.34090,0.42267,-0.54487,0.18641,-0.45300] |
| yhat = predict(row, model) |
| print('Predicted: %.3f (class=%d)' % (yhat, yhat.round())) |