DHEIVER's picture
Duplicate from bharat10/heart_disease_prediction
0c2d844
import gradio as gr
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
# %matplotlib inline
import io
def importdata():
#balance_data = pd.read_csv(io.BytesIO(uploaded['heart_disease_data.csv']))
balance_data = pd.read_csv('heart_disease_data.csv')
# Printing the dataswet shape
print ("Dataset Length: ", len(balance_data))
print ("Dataset Shape: ", balance_data.shape)
# Printing the dataset obseravtions
print ("Dataset: ",balance_data.head())
return balance_data
def splitdatasetL(heart_data, input_data):
X = heart_data.drop(columns='target', axis=1)
Y = heart_data['target']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)
model = LogisticRegression()
model.fit(X_train, Y_train)
input_data_as_numpy_array= np.asarray(input_data)
# reshape the numpy array as we are predicting for only on instance
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)
prediction = model.predict(input_data_reshaped)
return prediction[0]
def splitdataset(balance_data):
# Separating the target variable
X = balance_data.values[:, 0:13]
Y = balance_data.values[:, 13]
# Splitting the dataset into train and test
X_train, X_test, y_train, y_test = train_test_split(
X, Y, test_size = 0.3, random_state = 100)
return X, Y, X_train, X_test, y_train, y_test
def train_using_gini(X_train, X_test, y_train):
clf_gini = DecisionTreeClassifier(criterion = "gini",
random_state = 100,max_depth=3, min_samples_leaf=5)
clf_gini.fit(X_train, y_train)
return clf_gini
def tarin_using_entropy(X_train, X_test, y_train):
clf_entropy = DecisionTreeClassifier(
criterion = "entropy", random_state = 100,
max_depth = 3, min_samples_leaf = 5)
clf_entropy.fit(X_train, y_train)
return clf_entropy
# Function to make predictions
def prediction(X_test, clf_object):
# Predicton on test with giniIndex
y_pred = clf_object.predict(X_test)
print("Predicted values:")
print(y_pred)
return y_pred
def RandomF(X_train, y_train, X_test):
rf_clf = RandomForestClassifier(n_estimators=1000, random_state=42)
rf_clf.fit(X_train, y_train)
pred = rf_clf.predict(X_test)
return pred
def SBM(df, X_test):
X = df.drop('target', axis=1)
y = df['target']
X_train, X_T, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)
svm = SVC(kernel='rbf', gamma=0.1)
svm.fit(X_train_scaled, y_train)
y_pred = svm.predict(X_test_scaled)
return y_pred
def SBF(new_data):
df = pd.read_csv('heart_disease_data.csv')
X = df.drop('target', axis=1)
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=5)
svm = SVC(kernel='linear')
svm.fit(X_train, y_train)
y_pred = svm.predict(new_data)
print(y_pred)
print(y_pred[0])
print("MEasdasdaGASDASD")
return y_pred[0]
def heart(age, gender, chestpaintype, restingbloodpressure, serumcholestrol, fastingbloodsugar, resting_ecg_result, maximumheartrate, exerciseinduced_angina, oldpeak, slope, ca, thal):
data = importdata()
X, Y, X_train, X_test, y_train, y_test = splitdataset(data)
clf_gini = train_using_gini(X_train, X_test, y_train)
clf_entropy = tarin_using_entropy(X_train, X_test, y_train)
fbs = 1 if fastingbloodsugar > 120 else 0
g = 0 if gender == "Female" else 1
exang = 0 if exerciseinduced_angina == "No" else 1
cp = 0
if chestpaintype == "Typical Angina" :
cp = 0
elif chestpaintype == "Non Typical Angina" :
cp = 1
elif chestpaintype == "Non Anginal Pain" :
cp = 2
else :
cp = 3
ecg = 0
if resting_ecg_result == "0 - Nothing to note" :
ecg = 0
elif resting_ecg_result == "1 - ST-T abnormality" :
ecg = 1
else :
ecg = 2
XX = np.array([age, g, cp, restingbloodpressure, serumcholestrol, fbs, ecg, maximumheartrate, exang, oldpeak, slope, ca, thal])
X_test[1][0] = age
X_test[1][1] = g
X_test[1][2] = cp
X_test[1][3] = restingbloodpressure
X_test[1][4] = serumcholestrol
X_test[1][5] = fbs
X_test[1][6] = ecg
X_test[1][7] = maximumheartrate
X_test[1][8] = exang
X_test[1][9] = oldpeak
X_test[1][10] = slope
X_test[1][11] = ca
X_test[1][12] = thal
new_data = pd.DataFrame({'age':[age],'sex':[g],'cp':[cp],'trestbps':[restingbloodpressure],
'chol': [serumcholestrol],'fbs':[fbs],'restecg': [ecg],
'thalach':[maximumheartrate],'exang':[exang],'oldpeak': [oldpeak],
'slope':[slope], 'ca':[ca], 'thal':[thal]})
y_pred_gini = prediction(X_test, clf_gini)
k = RandomF(X_train, y_train, X_test)
#m = SBM(data, new_data)
m = SBF(new_data)
print("ASDASDASDADS")
print(type(m))
#m = 0
pred = splitdatasetL(data, XX)
if y_pred_gini[1] == 1.0:
SD = "Based on our Decision Tree Machine Learning model which has an accuracy of 82.42%, you have high chances of having heart disease"
else:
SD = "Based on our Decision Tree Machine Learning model which has an accuracy of 82.42%, you are less likely to have heart disease"
if pred == 1:
SL = "Based on our Logistic Regression Machine Learning model which has an accuracy of 81.97%, you have high chances of having heart disease"
else:
SL = "Based on our Logistic Regression Machine Learning model which has an accuracy of 81.97%, you are less likely to have heart disease"
if k[1] == 1:
SR = "Based on our Random Forest Machine Learning model which has an accuracy of 82.42%, you have high chances of having heart disease"
else:
SR = "Based on our Random Forest Machine Learning model which has an accuracy of 82.42%, you are less likely to have heart disease"
if m == 1:
SS = "Based on our SVM Machine Learning model which has an accuracy of 89.01%, you have high chances of having heart disease"
else:
SS = "Based on our SVM Machine Learning model which has an accuracy of 89.01%, you are less likely to have heart disease"
models = ['Logistic Regression','Decision Tree','SVM','Random Forest']
accuracies = [81.97,82.42,89.01,82.42]
fig, ax = plt.subplots(figsize = (40,40))
ax.bar(models, accuracies)
ax.set_xlabel('Models')
ax.set_ylabel('Accuracy')
ax.set_title('Machine Learning Models Accuracy')
return SL, SD, SS, SR, fig
interface = gr.Interface(
fn=heart,
inputs=["number", gr.Radio(["Male", "Female"]),gr.Dropdown(["Typical Angina", "Non Typical Angina", "Non Anginal Pain", "Asymptomatic"]), "number", "number", "number", gr.Dropdown(["0 - Nothing to note", "1 - ST-T abnormality", "2 - Possible or definite left ventricular hypertrophy"]), "number", gr.Radio(["No", "Yes"]), "number" , "number", "number", "number"],
outputs=[gr.outputs.Label(label="Logistic Regression", type="text"),gr.outputs.Label(label="Decision Tree", type="auto"),gr.outputs.Label(label="Random Forest", type="text"),gr.outputs.Label(label="SVM", type="auto"),"plot"],
)
interface.launch()