Initial Commit Huggingface
Browse files- app.py +36 -0
- eda.py +112 -0
- list_nom_cols.txt +1 -0
- list_num_cols.txt +1 -0
- list_ord_cols.txt +1 -0
- model_pipeline.pkl +3 -0
- requirements.txt +6 -0
- style.css +53 -0
app.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import eda
|
| 3 |
+
import prediction
|
| 4 |
+
from streamlit_option_menu import option_menu
|
| 5 |
+
|
| 6 |
+
with open("./style.css") as f:
|
| 7 |
+
st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
|
| 8 |
+
|
| 9 |
+
st.sidebar.header("Heart Failure Classification")
|
| 10 |
+
|
| 11 |
+
with st.sidebar:
|
| 12 |
+
st.write("Ediashta Revindra - FTDS-020")
|
| 13 |
+
selected = option_menu(
|
| 14 |
+
"Menu",
|
| 15 |
+
[
|
| 16 |
+
"Dataset",
|
| 17 |
+
"Distribution",
|
| 18 |
+
"Boxplot",
|
| 19 |
+
"Correlation Matrix",
|
| 20 |
+
"Prediksi Klasifikasi",
|
| 21 |
+
],
|
| 22 |
+
icons=["database", "bar-chart", "box", "link-45deg", "code-square"],
|
| 23 |
+
menu_icon="cast",
|
| 24 |
+
default_index=1,
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
if selected == "Distribution":
|
| 28 |
+
eda.distribution()
|
| 29 |
+
elif selected == "Boxplot":
|
| 30 |
+
eda.boxplot()
|
| 31 |
+
elif selected == "Dataset":
|
| 32 |
+
eda.dataset()
|
| 33 |
+
elif selected == "Correlation Matrix":
|
| 34 |
+
eda.corr_matrix()
|
| 35 |
+
elif selected == "Prediksi Klasifikasi":
|
| 36 |
+
prediction.run()
|
eda.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import seaborn as sns
|
| 4 |
+
import matplotlib.pyplot as plt
|
| 5 |
+
import plotly.express as px
|
| 6 |
+
from PIL import Image
|
| 7 |
+
|
| 8 |
+
st.set_page_config(
|
| 9 |
+
page_title="FIFA 2022 - EDA", layout="wide", initial_sidebar_state="expanded"
|
| 10 |
+
)
|
| 11 |
+
|
| 12 |
+
# dataset
|
| 13 |
+
dataset = "https://raw.githubusercontent.com/ediashta/p1-ftds-020-rmt-g3-ediashta/main/h8dsft_P1G3_Ediashta.csv"
|
| 14 |
+
data = pd.read_csv(dataset)
|
| 15 |
+
|
| 16 |
+
cat_col = ["anaemia", "diabetes", "high_blood_pressure", "sex", "smoking"]
|
| 17 |
+
num_col = [
|
| 18 |
+
"age",
|
| 19 |
+
"creatinine_phosphokinase",
|
| 20 |
+
"ejection_fraction",
|
| 21 |
+
"platelets",
|
| 22 |
+
"serum_creatinine",
|
| 23 |
+
"serum_sodium",
|
| 24 |
+
"time",
|
| 25 |
+
]
|
| 26 |
+
|
| 27 |
+
data_ord = data[cat_col]
|
| 28 |
+
data_num = data[num_col]
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def dataset():
|
| 32 |
+
# distribution plot
|
| 33 |
+
st.title("Heart Failure Classification")
|
| 34 |
+
st.subheader("Dataset")
|
| 35 |
+
st.dataframe(data)
|
| 36 |
+
|
| 37 |
+
# column description
|
| 38 |
+
st.subheader("Column Description")
|
| 39 |
+
|
| 40 |
+
col1, col2 = st.columns(2)
|
| 41 |
+
col1.markdown("* **age** : Umur ")
|
| 42 |
+
col1.markdown("* **anaemia** : Penurunan Sel Darah Merah (**boolean**)")
|
| 43 |
+
col1.markdown(
|
| 44 |
+
"* **creatinine_phosphokinase** : Tingkat Enzim CPK pada darah (**mcg/L**)"
|
| 45 |
+
)
|
| 46 |
+
col1.markdown("* **diabetes** : Status diabetes (**boolean**)")
|
| 47 |
+
col1.markdown(
|
| 48 |
+
"* **ejection_fraction** : Persentase darah keluar dari jantung pada tiap kontraksi (**percentage**)"
|
| 49 |
+
)
|
| 50 |
+
col1.markdown("* **high_blood_pressure** : Status Hipertensi (**boolean**)")
|
| 51 |
+
col1.markdown("* **platelets** : Platelets pada darah (**kiloplatelets/mL**) ")
|
| 52 |
+
col2.markdown(
|
| 53 |
+
"* **serum_creatinine** : Tingkat serum kreatinin pada darah (**mg/dL**) "
|
| 54 |
+
)
|
| 55 |
+
col2.markdown("* **serum_sodium** : Tingkat serum sodium pada darah (**mEq/L**)")
|
| 56 |
+
col2.markdown("* **sex** : Jenis Kelamin (**binary**)")
|
| 57 |
+
col2.markdown("* **smoking** : Status merokok (**boolean**)")
|
| 58 |
+
col2.markdown("* **time** : Periode follow-up (**days**)")
|
| 59 |
+
col2.markdown("* **DEATH_event** : Status kematian saat follow-up (**boolean**)")
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def distribution():
|
| 63 |
+
# distribution plot
|
| 64 |
+
st.title("Heart Failure Classification")
|
| 65 |
+
st.subheader("Distribution")
|
| 66 |
+
|
| 67 |
+
col1, col2 = st.columns(2)
|
| 68 |
+
|
| 69 |
+
# Death Event Bar Plot
|
| 70 |
+
fig = plt.figure(figsize=(10, 6))
|
| 71 |
+
col1.write("Death Event Barplot")
|
| 72 |
+
sns.countplot(data=data, x="DEATH_EVENT", palette="coolwarm")
|
| 73 |
+
col1.pyplot(fig)
|
| 74 |
+
|
| 75 |
+
# Death Event Bar Plot
|
| 76 |
+
fig = plt.figure(figsize=(10, 6))
|
| 77 |
+
col2.write("Age Distribution")
|
| 78 |
+
sns.histplot(
|
| 79 |
+
data=data, x="age", kde=True, bins=30, hue="DEATH_EVENT", palette="coolwarm"
|
| 80 |
+
)
|
| 81 |
+
col2.pyplot(fig)
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
def boxplot():
|
| 85 |
+
# distribution plot
|
| 86 |
+
st.title("Heart Failure Classification")
|
| 87 |
+
st.subheader("Boxplot")
|
| 88 |
+
|
| 89 |
+
fig, axs = plt.subplots(2, 4, figsize=(20, 10))
|
| 90 |
+
|
| 91 |
+
for i, x in enumerate(data_num):
|
| 92 |
+
if i < 4:
|
| 93 |
+
axs[0, i].set_title(x)
|
| 94 |
+
axs[0, i].boxplot(data[x])
|
| 95 |
+
elif 7 >= i >= 4:
|
| 96 |
+
axs[1, i - 4].set_title(x)
|
| 97 |
+
axs[1, i - 4].boxplot(data[x])
|
| 98 |
+
axs[1, 3].axis("off")
|
| 99 |
+
|
| 100 |
+
fig.tight_layout()
|
| 101 |
+
|
| 102 |
+
st.pyplot(fig)
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def corr_matrix():
|
| 106 |
+
# distribution plot
|
| 107 |
+
st.title("Heart Failure Classification")
|
| 108 |
+
st.subheader("Correlation")
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
if __name__ == "__main__":
|
| 112 |
+
run()
|
list_nom_cols.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["sex"]
|
list_num_cols.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["age", "creatinine_phosphokinase", "ejection_fraction", "platelets", "serum_creatinine", "serum_sodium", "time"]
|
list_ord_cols.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
["anaemia", "high_blood_pressure", "smoking"]
|
model_pipeline.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b62a802d0a75369815408635de99cc36c37b83f44a1694980aa10a260450bd00
|
| 3 |
+
size 197575
|
requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
seaborn
|
| 2 |
+
pandas
|
| 3 |
+
matplotlib
|
| 4 |
+
plotly
|
| 5 |
+
Pillow
|
| 6 |
+
scikit-learn==1.2.2
|
style.css
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/* Logo */
|
| 2 |
+
/* Adapted from Zachary Blackwood */
|
| 3 |
+
[data-testid="stSidebar"] {
|
| 4 |
+
background-image: url(https://streamlit.io/images/brand/streamlit-logo-secondary-colormark-darktext.png);
|
| 5 |
+
background-size: 200px;
|
| 6 |
+
background-repeat: no-repeat;
|
| 7 |
+
background-position: 4px 20px;
|
| 8 |
+
}
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
/* Card */
|
| 12 |
+
/* Adapted from https://startbootstrap.com/theme/sb-admin-2 */
|
| 13 |
+
div.css-1r6slb0.e1tzin5v2 {
|
| 14 |
+
background-color: #FFFFFF;
|
| 15 |
+
border: 1px solid #CCCCCC;
|
| 16 |
+
padding: 5% 5% 5% 10%;
|
| 17 |
+
border-radius: 5px;
|
| 18 |
+
|
| 19 |
+
border-left: 0.5rem solid #9AD8E1 !important;
|
| 20 |
+
box-shadow: 0 0.15rem 1.75rem 0 rgba(58, 59, 69, 0.15) !important;
|
| 21 |
+
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
label.css-mkogse.e16fv1kl2 {
|
| 25 |
+
color: #36b9cc !important;
|
| 26 |
+
font-weight: 700 !important;
|
| 27 |
+
text-transform: uppercase !important;
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
/* Move block container higher */
|
| 32 |
+
div.block-container.css-18e3th9.egzxvld2 {
|
| 33 |
+
margin-top: -5em;
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
/* Hide hamburger menu and footer */
|
| 38 |
+
div.css-r698ls.e8zbici2 {
|
| 39 |
+
display: none;
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
footer.css-ipbk5a.egzxvld4 {
|
| 43 |
+
display: none;
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
footer.css-12gp8ed.eknhn3m4 {
|
| 47 |
+
display: none;
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
div.vg-tooltip-element {
|
| 51 |
+
display: none;
|
| 52 |
+
}
|
| 53 |
+
|