# -*- coding: utf-8 -*- """app.ipynb Automatically generated by Colab. Original file is located at https://colab.research.google.com/drive/1umH6P4k0xEUEZsizNZfLzFttGrqivmwq """ import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.preprocessing import StandardScaler from sklearn.decomposition import PCA from xgboost import XGBClassifier from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report from sklearn.model_selection import train_test_split import warnings warnings.filterwarnings("ignore") df = pd.read_csv("/content/diabetes_prediction_dataset.csv") df.head(10) df.describe() df.info() df.isnull().sum() print(df.duplicated().sum()) df = df.drop_duplicates() print("________Removed Duplicate________") print(df.duplicated().sum()) #Function to add counts on bars def add_counts(ax): for p in ax.patches: ax.annotate(f'{int(p.get_height())}', (p.get_x()+p.get_width()/2., p.get_height()), ha ='center', va='center', fontsize=10, color='black', xytext=(0,5), textcoords='offset points') #set up the matplotlib figure fig, axes = plt.subplots(3, 2, figsize=(15, 15)) #Plot gender grouped by dibetes ax = sns.countplot(ax=axes[0,0], x='gender', hue='diabetes', data=df) ax.set_title('Gender Grouped by Diabetes') add_counts(ax) #Plot hypertension groupef by diabetes ax = sns.countplot(ax=axes[0,1], x='hypertension', hue='diabetes', data=df) ax.set_title('Hypertension Grouped by Diabetes') add_counts(ax) #Plot heart disease grouped by diabetes ax = sns.countplot(ax=axes[1,0], x='heart_disease', hue='diabetes', data=df) ax.set_title('Heart Disease Grouped by Diabetes') add_counts(ax) #Plot smoking history groupde by diabetes ax = sns.countplot(ax=axes[1,1], x='smoking_history', hue='diabetes', data=df) ax.set_title('Smoking History Grouped by Diabetes') add_counts(ax) # Plot diabetes ax = sns.countplot(ax=axes[2, 0], x='diabetes', data=df) axes[2, 0].set_title('Diabetes Count') add_counts(ax) # Create pie plot for diabetes diabetes_counts = df['diabetes'].value_counts() axes[2, 1].pie(diabetes_counts, labels=diabetes_counts.index, autopct='%1.1f%%', startangle=90) axes[2, 1].set_title('Diabetes Distribution') axes[2, 1].axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle. axes[2, 1].legend(title='Diabetes:', loc='upper right') # Adjust the layout plt.tight_layout() # Show the plots plt.show() #Calculate minimum, maximum, and average age min_age = df['age'].min() max_age = df['age'].max() avg_age = df['age'].mean() #Count of individuals with and without diabetes diabetes_counts = df['diabetes'].value_counts() #Group by dibetes status and calculate min and max ages grouped_ages = df.groupby('diabetes')['age'].agg(['min', 'max']) #Print the results print("Minimum Age:", min_age) print("Maximum Age:", max_age) print("Average Age:", avg_age) print(diabetes_counts) print("Age Statistics by Diabetes Status:") print(grouped_ages) # Plotting fig, ax = plt.subplots(1, 2, figsize=(14, 6)) # Plot for overall min, max, and average age bars = ax[0].bar(['Min Age', 'Max Age', 'Avg Age'], [min_age, max_age, avg_age], color=['blue', 'red', 'green']) ax[0].set_title('Overall Age Statistics') ax[0].set_ylabel('Age') # Annotate bars with their values for bar in bars: yval = bar.get_height() ax[0].text(bar.get_x() + bar.get_width()/2, yval, round(yval, 2), va='bottom') # Add text to the top of the bars # Plot for min and max ages grouped by diabetes status grouped_bars = grouped_ages.plot(kind='bar', ax=ax[1]) ax[1].set_title('Age Statistics by Diabetes Status') ax[1].set_ylabel('Age') # Annotate bars with their values for p in grouped_bars.patches: grouped_bars.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005)) plt.tight_layout() plt.show() cross_table = pd.crosstab(df['diabetes'], df['smoking_history']) # Create subplots fig, ax = plt.subplots(1, 2, figsize=(20, 8)) # Plotting the cross table as a heatmap sns.heatmap(cross_table, cmap='YlOrRd', annot=True, fmt='d', linewidths=0.5, linecolor='black', ax=ax[0]) ax[0].set_title('Diabetes and Smoking History (Heatmap)') ax[0].set_xlabel('Smoking History') ax[0].set_ylabel('Diabetes') # Plotting the cross table with separate bars for smoking history cross_table.plot(kind='bar', stacked=False, ax=ax[1], color=plt.cm.Paired.colors) ax[1].set_title('Diabetes and Smoking History (Bar Plot)') ax[1].set_xlabel('Diabetes') ax[1].set_ylabel('Count') ax[1].legend(title='Smoking History', bbox_to_anchor=(1.05, 1), loc='upper left') # Annotate bars with their values for container in ax[1].containers: ax[1].bar_label(container) plt.tight_layout() plt.show() #incode the data from sklearn.preprocessing import LabelEncoder le = LabelEncoder() df['gender'] = le.fit_transform(df['gender']) df['smoking_history'] = le.fit_transform(df['smoking_history']) df.head() ##Assume df is your datafram #Selecting features and target variable features = ['gender', 'age', 'hypertension', 'heart_disease', 'smoking_history', 'bmi', 'HbA1c_level', 'blood_glucose_level'] X= df[features] Y= df['diabetes'] # Standardizing the features scaler = StandardScaler() X_scaled = scaler.fit_transform(X) # Applying PCA pca = PCA() X_pca = pca.fit_transform(X_scaled) # Plotting the cumulative explained variance plt.figure(figsize=(10, 6)) plt.plot(range(1, len(pca.explained_variance_ratio_) + 1), pca.explained_variance_ratio_.cumsum(), marker='o', linestyle='--') plt.title('Explained Variance by Number of Principal Components') plt.xlabel('Number of Principal Components') plt.ylabel('Cumulative Explained Variance') plt.grid() # Find the index of the maximum cumulative explained variance max_index = pca.explained_variance_ratio_.cumsum().argmax() # Annotate the point with the highest cumulative explained variance plt.annotate(f'Max: PC {max_index + 1}', xy=(max_index + 1, pca.explained_variance_ratio_.cumsum()[max_index]), xytext=(max_index + 2, pca.explained_variance_ratio_.cumsum()[max_index] - 0.05), arrowprops=dict(facecolor='black', arrowstyle='->', color='black')) plt.show() # Printing explained variance ratios for i, ratio in enumerate(pca.explained_variance_ratio_.cumsum()): print(f'Principal Component {i+1}: {ratio:.4f} cumulative explained variance') # Choose the number of components that explain most of the variance n_components = max_index + 1 # Applying PCA with the optimal number of components pca = PCA(n_components=n_components) X_pca = pca.fit_transform(X_scaled) #Splitting the date into traing and testing sets X_train, X_test, y_train, y_test = train_test_split(X_pca, Y, test_size=0.2, random_state=42) #Initializing and training the XGBoost model xgb_model = XGBClassifier() xgb_model.fit(X_train, y_train) #Making predictions on the test set y_pred = xgb_model.predict(X_test) # Evaluating the model accuracy = accuracy_score(y_test, y_pred) print(f'XGBoost Accuracy: {accuracy:.4f}') print(f'XGBoost Classification Report:\n{classification_report(y_test, y_pred)}') # Compute the confusion matrix conf_matrix = confusion_matrix(y_test, y_pred) # Plotting the confusion matrix plt.figure(figsize=(8, 6)) sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['No Diabetes', 'Diabetes'], yticklabels=['No Diabetes', 'Diabetes']) plt.title('Confusion Matrix') plt.xlabel('Predicted') plt.ylabel('Actual') plt.show() import pickle # Save the model with open('Diabetes_model.pkl', 'wb') as f: pickle.dump(xgb_model, f) # Prepare custom data custom_data = [ [1, 45, 0, 0, 1, 25.6, 6.5, 110], [0, 35, 1, 0, 0, 28.2, 7.2, 130], [1, 55, 1, 1, 1, 31.4, 8.0, 150], [0, 42, 0, 1, 0, 26.9, 7.0, 120], [1, 50, 1, 0, 1, 29.7, 7.8, 140] ] # Convert to pandas DataFrame custom_df = pd.DataFrame(custom_data, columns=features) # Standardize the custom data custom_X = scaler.transform(custom_df[features]) # Apply PCA transformation custom_X_pca = pca.transform(custom_X) # Make predictions using the trained XGBoost model custom_predictions = xgb_model.predict(custom_X_pca) # Print the predictions for i, pred in enumerate(custom_predictions): if pred == 0: print(f"Person {i+1} is not predicted to have diabetes.") else: print(f"Person {i+1} is predicted to have diabetes.") import xgboost as xgb from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score # Step 1: Split the data X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42) # Step 2: Instantiate the classifier xgb_clf = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss') # Step 3: Train the model xgb_clf.fit(X_train, y_train) # Step 4: Make predictions y_pred = xgb_clf.predict(X_test) # Step 5: Evaluate the model accuracy = accuracy_score(y_test, y_pred) print(f"Model accuracy: {accuracy:.2f}") # Compute the confusion matrix conf_matrix = confusion_matrix(y_test, y_pred) # Plotting the confusion matrix plt.figure(figsize=(8, 6)) sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['No Hypertension', 'Hypertension'], yticklabels=['No Hypertension', 'Hypertension']) plt.title('Confusion Matrix') plt.xlabel('Predicted') plt.ylabel('Actual') plt.show() import pickle # Save the model with open('hypertension_model.pkl', 'wb') as f: pickle.dump(xgb_model, f) features = ['gender', 'age', 'diabetes', 'heart_disease', 'smoking_history', 'bmi', 'HbA1c_level', 'blood_glucose_level'] customs_data = [ [1, 45, 0, 0, 1, 25.6, 6.5, 110], [0, 35, 1, 0, 0, 28.2, 7.2, 130], [1, 55, 1, 1, 1, 31.4, 8.0, 150], [0, 42, 1, 1, 0, 26.9, 7.0, 120], [1, 50, 1, 0, 1, 29.7, 7.8, 140] ] custom_df = pd.DataFrame(customs_data, columns=features) custom_predictions = xgb_model.predict(custom_df) for i, pred in enumerate(custom_predictions): if pred == 0: print(f"Person {i+1} is not predicted to have hypertension.") else: print(f"Person {i+1} is predicted to have hypertension.") import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from IPython.display import display import cv2 import io from PIL import Image from sklearn.model_selection import train_test_split from sklearn.preprocessing import MinMaxScaler import tensorflow as tf print(tf.__version__) import kagglehub # Download latest version path = kagglehub.dataset_download("borhanitrash/alzheimer-mri-disease-classification-dataset") print("Path to dataset files:", path) train ='/content/train-00000-of-00001-c08a401c53fe5312.parquet' test = '/content/test-00000-of-00001-44110b9df98c5585.parquet' categorias = { 0: 'Mild_Demented', 1: 'Moderate_Demented', 2: 'Non_Demented', 3: 'Very_Mild_Demented' } data_train = pd.read_parquet(train) data_test = pd.read_parquet(test) data_train.head() img_dict = data_train['image'][0] byte_string = img_dict['bytes'] nparr = np.frombuffer(byte_string, np.uint8) img = cv2.imdecode(nparr, cv2.IMREAD_COLOR) image = Image.open(io.BytesIO(byte_string)) display(image) def extraccion_y_transformacion(images_set): et_list_images=[] images_bytes = images_set['image'] for img_dict in images_bytes: byte_string = img_dict['bytes'] nparr = np.frombuffer(byte_string, np.uint8) img = cv2.imdecode(nparr, cv2.IMREAD_COLOR) et_list_images.append(img) return et_list_images def visualizar_imagenes(image_set, categorias, limit=5): fig, axes = plt.subplots(1, limit, figsize=(10, 5)) image_bytes = image_set['image'] for i, (ax, row) in enumerate(zip(axes, image_set.iterrows())): img_dict = row[1]['image'] label = row[1]['label'] name = categorias[label] byte_string = img_dict['bytes'] image = Image.open(io.BytesIO(byte_string)) ax.imshow(image, cmap='gray') ax.set_title(name) ax.axis('off') if i + 1 == limit: break plt.tight_layout() plt.show() train_transformado = extraccion_y_transformacion(data_train) test_transformado = extraccion_y_transformacion(data_test) print(train_transformado[:1]) visualizar_imagenes(data_train, categorias, limit=5) y_test = [] for label in data_test['label']: y_test.append(label) y_train = [] for label in data_train['label']: y_train.append(label) y_train = np.array(y_train) y_test = np.array(y_test) unique, counts = np.unique(y_train, return_counts=True) plt.bar(unique, counts) plt.xlabel('Clases') plt.ylabel('Cantidad') plt.title('Distribucion de clases') plt.xticks(unique) plt.show() y_train = tf.one_hot(y_train.astype(np.int32), depth=4) y_test = tf.one_hot(y_test.astype(np.int32), depth=4) y_train train_transformado = np.array(train_transformado)/255 test_transformado = np.array(test_transformado)/255 train_transformado = [np.expand_dims(img, axis=-1) for img in train_transformado] # agregar el canal de escala de grises test_transformado = [np.expand_dims(img, axis=-1) for img in test_transformado] #test_transformado = [np.expand_dims(img, axis=-1) for img in test_transformado] train_transformado = np.array(train_transformado) test_transformado = np.array(test_transformado) train_transformado[0].shape train_transformado.shape class MinMaxScaler3D(MinMaxScaler): def fit_transform(self, X, y=None): x = np.reshape(X, newshape=(X.shape[0]*X.shape[1], X.shape[2])) return np.reshape(super().fit_transform(x, y=y), newshape=X.shape) scaler = MinMaxScaler3D() train_scaled = [scaler.fit_transform(X=img) for img in train_transformado] train_scaled = np.array(train_scaled) test_scaled = [scaler.fit_transform(X=img) for img in test_transformado] test_scaled = np.array(test_scaled) train_scaled.shape from tensorflow import keras from tensorflow.keras.preprocessing.image import ImageDataGenerator from tensorflow.keras import layers from tensorflow.keras import Sequential, initializers from sklearn.preprocessing import StandardScaler from tensorflow.keras.optimizers import Adam #optimizer = Adam() optimizer = Adam( learning_rate=0.001, # Tasa de aprendizaje beta_1=0.9, # Decay rate del primer momento beta_2=0.999, # Decay rate del segundo momento epsilon=1e-07 # Término de suavizado ) model = Sequential([ layers.Input(shape=(128,128,1)), layers.Conv2D(64, kernel_size=(2,2), activation='relu',kernel_initializer = initializers.HeNormal(seed=42), padding='same'), #layers.BatchNormalization(), layers.MaxPooling2D(pool_size=(2,2)), #layers.Dropout(0.25), layers.Conv2D(64,kernel_size=(2,2), activation='relu', kernel_initializer = initializers.HeNormal(seed=42), padding='same'), #layers.BatchNormalization(), layers.MaxPooling2D(pool_size=(2,2)), #layers.Dropout(0.25), layers.Conv2D(128, kernel_size=(3,3), activation='relu', kernel_initializer = initializers.HeNormal(seed=42), padding='same'), #layers.BatchNormalization(), layers.MaxPooling2D(pool_size=(2,2)), #layers.Dropout(0.25), layers.Flatten(), layers.Dropout(0.25), layers.Dense(256, activation='relu'), layers.Dense(len(categorias), activation='softmax') ]) model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=["accuracy"]) model = Sequential([ Conv2D(32, (3,3), activation='relu', input_shape=(128, 128, 3)), ... ]) class myCallback(tf.keras.callbacks.Callback): def on_epoch_end(self, epoch, logs={}): if (logs.get('accuracy') > 0.995): print("\nReached 99.5% accuracy so cancelling training!") self.model.stop_training = True callbacks = myCallback() history = model.fit( train_scaled, y_train, batch_size=10, epochs=20, validation_split=0.1, callbacks=[callbacks] ) plt.plot(history.history['loss'], label='Train loss') plt.plot(history.history['val_loss'], label='Validation loss') plt.plot(history.history['accuracy'], label='Train accuracy') plt.plot(history.history['val_accuracy'], label='Validation accuracy') plt.legend() plt.title('Loss and accuracy (also validation) per Epoch') plt.show() history.model.layers w, b = history.model.layers[0].get_weights() b.shape test_loss, test_acc = model.evaluate(test_scaled, y_test, verbose=2) print(f'Test accuracy: {test_acc}' predictions = model.predict(test_scaled) predictions[0] np.argmax(predictions[0]) data_test['label'][0] from sklearn.metrics import classification_report predicted_classes = np.argmax(predictions, axis=1) true_classes = np.argmax(y_test, axis=1) report = classification_report(true_classes, predicted_classes) print(report) def plot_image(i, predictions_array, true_label, img): predictions_array, true_label, img = predictions_array, true_label[i], img[i] plt.grid(False) plt.xticks([]) plt.yticks([]) plt.imshow(img, cmap=plt.cm.binary) predicted_label = np.argmax(predictions_array) if predicted_label == true_label: color = 'blue' else: color = 'red' plt.xlabel("{} {:2.0f}% ({})".format(categorias[predicted_label], 100*np.max(predictions_array), categorias[true_label]), color=color) def plot_value_array(i, predictions_array, true_label): predictions_array, true_label = predictions_array, true_label[i] plt.grid(False) plt.xticks(range(4)) plt.yticks([]) thisplot = plt.bar(range(4), predictions_array, color="#777777") plt.ylim([0, 1]) predicted_label = np.argmax(predictions_array) thisplot[predicted_label].set_color('red') thisplot[true_label].set_color('blue') i = 0 plt.figure(figsize=(6,3)) plt.subplot(1,2,1) plot_image(i, predictions[i], np.argmax(y_test, axis=1), test_scaled) plt.subplot(1,2,2) plot_value_array(i, predictions[i], np.argmax(y_test, axis=1)) plt.show() i = 8 plt.figure(figsize=(6,3)) plt.subplot(1,2,1) plot_image(i, predictions[i], np.argmax(y_test, axis=1), test_scaled) plt.subplot(1,2,2) plot_value_array(i, predictions[i], np.argmax(y_test, axis=1)) plt.show() num_rows = 4 num_cols = 3 num_images = num_rows*num_cols plt.figure(figsize=(2*2*num_cols, 2*num_rows)) for i in range(num_images): plt.subplot(num_rows, 2*num_cols, 2*i+1) plot_image(i, predictions[i], np.argmax(y_test, axis=1), test_scaled) plt.subplot(num_rows, 2*num_cols, 2*i+2) plot_value_array(i, predictions[i], np.argmax(y_test, axis=1)) plt.tight_layout() plt.show() !pip install streamlit ngrok with open("app.py", "w") as file: file.write(""" # Streamlit Multi-Page App for Hypertension and Diabetes Prediction import streamlit as st from streamlit_option_menu import option_menu import pandas as pd import numpy as np # Placeholder models (replace with actual models trained in the notebook) class PlaceholderModel: def predict(self, X): return np.random.choice([0, 1], size=(len(X),)) diabetes_model = PlaceholderModel() hypertension_model = PlaceholderModel() # Streamlit App Pages st.set_page_config(page_title="Health Prediction App", layout="wide") # Sidebar Navigation with st.sidebar: selected = option_menu( "Navigation", ["Home", "Hypertension", "Diabetes"], icons=["house", "activity", "heart"], menu_icon="menu-app", default_index=0 ) if selected == "Home": st.title("Health Prediction App") st.write("Select the prediction model from the sidebar to get started.") elif selected == "Hypertension": st.title("Hypertension Prediction") # Input form for Hypertension age = st.number_input("Age", min_value=0, max_value=120, value=30) systolic_bp = st.number_input("Systolic Blood Pressure", min_value=50, max_value=250, value=120) diastolic_bp = st.number_input("Diastolic Blood Pressure", min_value=30, max_value=150, value=80) cholesterol = st.number_input("Cholesterol Level", min_value=50, max_value=400, value=200) smoking = st.selectbox("Smoking Status", ("Non-Smoker", "Former Smoker", "Current Smoker")) activity = st.selectbox("Physical Activity Level", ("Low", "Moderate", "High")) smoking_encoded = {"Non-Smoker": 0, "Former Smoker": 1, "Current Smoker": 2}[smoking] activity_encoded = {"Low": 0, "Moderate": 1, "High": 2}[activity] data = pd.DataFrame({ 'Age': [age], 'SystolicBP': [systolic_bp], 'DiastolicBP': [diastolic_bp], 'Cholesterol': [cholesterol], 'SmokingStatus': [smoking_encoded], 'PhysicalActivity': [activity_encoded] }) st.write("Input Data:", data) if st.button("Predict Hypertension"): prediction = hypertension_model.predict(data) st.subheader("Prediction Result") st.write("Hypertension Detected" if prediction[0] == 1 else "No Hypertension Detected") elif selected == "Diabetes": st.title("Diabetes Prediction") # Input form for Diabetes pregnancies = st.number_input("Pregnancies", min_value=0, max_value=20, value=1) glucose = st.number_input("Glucose Level", min_value=0, max_value=300, value=100) blood_pressure = st.number_input("Blood Pressure", min_value=0, max_value=200, value=80) skin_thickness = st.number_input("Skin Thickness", min_value=0, max_value=100, value=20) insulin = st.number_input("Insulin Level", min_value=0, max_value=900, value=30) bmi = st.number_input("BMI", min_value=0.0, max_value=70.0, value=25.0) dpf = st.number_input("Diabetes Pedigree Function", min_value=0.0, max_value=3.0, value=0.5) age = st.number_input("Age", min_value=0, max_value=120, value=30) data = pd.DataFrame({ 'Pregnancies': [pregnancies], 'Glucose': [glucose], 'BloodPressure': [blood_pressure], 'SkinThickness': [skin_thickness], 'Insulin': [insulin], 'BMI': [bmi], 'DiabetesPedigreeFunction': [dpf], 'Age': [age] }) st.write("Input Data:", data) if st.button("Predict Diabetes"): prediction = diabetes_model.predict(data) st.subheader("Prediction Result") st.write("Diabetes Detected" if prediction[0] == 1 else "No Diabetes Detected") """) !pip install pyngrok !ngrok config add-authtoken 2ubz5Rmqi6qvjBOR7V60Wgzl4uk_64gzCGjEYSRJhNrBKnf9R !pip install streamlit-option-menu from pyngrok import ngrok !streamlit run app.py &>/dev/null& public_url = ngrok.connect(8501) print(f"Streamlit app is live at {public_url}")