Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| from sklearn.impute import SimpleImputer | |
| from sklearn.preprocessing import MinMaxScaler | |
| # Load data | |
| df = pd.read_csv("diabetes.csv") | |
| # Replace 0s with NaN (Glucose, BP, etc.) | |
| cols = ["Glucose", "BloodPressure", "SkinThickness", "Insulin", "BMI"] | |
| df[cols] = df[cols].replace(0, float('nan')) | |
| # Impute missing values with mean | |
| imputer = SimpleImputer(strategy="mean") | |
| df[cols] = imputer.fit_transform(df[cols]) | |
| # Remove outliers using IQR | |
| Q1 = df.quantile(0.25) | |
| Q3 = df.quantile(0.75) | |
| IQR = Q3 - Q1 | |
| df = df[~((df < (Q1 - 1.5 * IQR)) | (df > (Q3 + 1.5 * IQR))).any(axis=1)] | |
| # Feature selection (keep: Pregnancies, Glucose, Insulin, BMI, Age) | |
| X = df[["Pregnancies", "Glucose", "Insulin", "BMI", "Age"]] | |
| y = df["Outcome"] | |
| # Normalize to [0, 1] | |
| scaler = MinMaxScaler() | |
| X = scaler.fit_transform(X) | |
| # Machine Learning Models (DT, KNN, RF, NB, AB, LR, SVM) | |
| from sklearn.model_selection import train_test_split, cross_val_score | |
| from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier | |
| from sklearn.svm import SVC | |
| from sklearn.linear_model import LogisticRegression | |
| from sklearn.tree import DecisionTreeClassifier | |
| from sklearn.neighbors import KNeighborsClassifier | |
| from sklearn.naive_bayes import GaussianNB | |
| # Split data (85% train, 15% test) | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42) | |
| # Initialize models | |
| models = { | |
| "DT": DecisionTreeClassifier(), | |
| "KNN": KNeighborsClassifier(n_neighbors=7), | |
| "RF": RandomForestClassifier(), | |
| "NB": GaussianNB(), | |
| "AB": AdaBoostClassifier(), | |
| "LR": LogisticRegression(), | |
| "SVM": SVC() | |
| } | |
| # Evaluate via k-fold CV (k=10) | |
| for name, model in models.items(): | |
| scores = cross_val_score(model, X, y, cv=10, scoring="accuracy") | |
| print(f"{name} CV Accuracy: {scores.mean():.2%}") | |
| # Evaluate via train-test split | |
| for name, model in models.items(): | |
| model.fit(X_train, y_train) | |
| acc = model.score(X_test, y_test) | |
| print(f"{name} Test Accuracy: {acc:.2%}") | |
| #Neural Network (Keras) | |
| from tensorflow.keras.models import Sequential | |
| from tensorflow.keras.layers import Dense | |
| from tensorflow.keras.optimizers import SGD | |
| # NN with 2 hidden layers (architecture from paper) | |
| model = Sequential([ | |
| Dense(26, activation="relu", input_shape=(5,)), | |
| Dense(5, activation="relu"), | |
| Dense(1, activation="sigmoid") | |
| ]) | |
| # Compile with SGD (lr=0.01) | |
| model.compile(optimizer=SGD(learning_rate=0.01), | |
| loss="binary_crossentropy", | |
| metrics=["accuracy"]) | |
| # Train for 400 epochs | |
| history = model.fit(X_train, y_train, epochs=400, batch_size=32, | |
| validation_data=(X_test, y_test), verbose=0) | |