Spaces:

Yashvj123
/

Life_Expectancy_Regression_Model

Sleeping

App Files Files Community

Yashvj123 commited on Mar 21, 2025

Commit

b7885e0

verified ·

1 Parent(s): 20ed088

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -35

app.py CHANGED Viewed

@@ -288,70 +288,55 @@ elif st.session_state.current_page == "Simple EDA":
 elif st.session_state.current_page == "Data Pre-processing":
     st.markdown("<h1 class='title'>Data Preprocessing</h1>", unsafe_allow_html=True)
-    # Title for Handling Missing Values
     st.markdown("<h2 class='subtitle' style='text-align: center;'>Handling Missing Values</h2>", unsafe_allow_html=True)
     st.markdown("<br>", unsafe_allow_html=True)
-    # Using Median Imputation
     st.markdown("""
         <h5 style="text-align: center;">
             <b>Using "Median" Imputation to Fill Highly Skewed Data</b>
         </h5>
     """, unsafe_allow_html=True)
-    code_median = """
-datac['GDP'].fillna(datac['GDP'].median(), inplace=True)
-datac['Population'].fillna(datac['Population'].median(), inplace=True)
-datac['Hepatitis B'].fillna(datac['Hepatitis B'].median(), inplace=True)
-datac['Total expenditure'].fillna(datac['Total expenditure'].median(), inplace=True)
-datac['Adult Mortality'].fillna(datac['Adult Mortality'].median(), inplace=True)
-datac['Alcohol'].fillna(datac['Alcohol'].median(), inplace=True)
-datac['thinness  1-19 years'].fillna(datac['Alcohol'].median(), inplace=True)
-datac['thinness 5-9 years'].fillna(datac['Alcohol'].median(), inplace=True)
-    """
-    st.code(code_median, language="python")
     st.markdown("<br>", unsafe_allow_html=True)
-    # Using Mean Imputation
     st.markdown("""
         <h5 style="text-align: center;">
             <b>Mean Imputation for Columns with Small Missing Values and Normally Distributed Data</b>
         </h5>
     """, unsafe_allow_html=True)
-    code_mean = """
-datac['Diphtheria'].fillna(datac['Diphtheria'].mean(), inplace=True)
-datac['Polio'].fillna(datac['Polio'].mean(), inplace=True)
-datac['BMI'].fillna(datac['BMI'].mean(), inplace=True)
-datac['Income composition of resources'].fillna(datac['Income composition of resources'].mean(), inplace=True)
-datac['Schooling'].fillna(datac['Schooling'].mean(), inplace=True)
-datac['Life expectancy'].fillna(datac['Life expectancy'].mean(), inplace=True)
-    """
-    st.code(code_mean, language="python")
     st.markdown("<br>", unsafe_allow_html=True)
-    # One-Hot Encoding for "Status" Column
     st.markdown("""
         <h5 style="text-align: center;">
             <b>Applying One-Hot Encoding on "Status" Column</b>
         </h5>
     """, unsafe_allow_html=True)
-    code_ohe = """
-from sklearn.preprocessing import OneHotEncoder
-oe = OneHotEncoder(drop="first", sparse_output=False)
-datac["Status"] = oe.fit_transform(datac[["Status"]])
-    """
-    st.code(code_ohe, language="python")
     st.markdown("<br>", unsafe_allow_html=True)
     if st.button("🔙 Go Back to Model Pipeline"):
-        switch_page("Model Pipeline")
 elif st.session_state.current_page == "EDA":
     st.markdown("<h1 class='title'>Exploratory Data Analysis (EDA)</h1>", unsafe_allow_html=True)
@@ -664,6 +649,13 @@ elif st.session_state.current_page == "Final Model":
      caption="50 Trails",
      use_container_width=True)
     st.markdown("<hr style='border:1px solid #ddd;'>", unsafe_allow_html=True)
     st.markdown("<h3 style='text-align: center;'>Selected Best-Fit Model</h3>", unsafe_allow_html=True)

 elif st.session_state.current_page == "Data Pre-processing":
     st.markdown("<h1 class='title'>Data Preprocessing</h1>", unsafe_allow_html=True)
     st.markdown("<h2 class='subtitle' style='text-align: center;'>Handling Missing Values</h2>", unsafe_allow_html=True)
     st.markdown("<br>", unsafe_allow_html=True)
     st.markdown("""
         <h5 style="text-align: center;">
             <b>Using "Median" Imputation to Fill Highly Skewed Data</b>
         </h5>
+        <p style="text-align: justify;">
+            Median imputation is used to handle missing values in columns where data distribution is skewed.
+            This method is more robust than mean imputation in such cases, as it prevents the effect of outliers
+            from distorting the dataset. For example, GDP, Population, and Adult Mortality tend to have extreme values,
+            making median a better choice for filling in missing data.
+        </p>
     """, unsafe_allow_html=True)
     st.markdown("<br>", unsafe_allow_html=True)
     st.markdown("""
         <h5 style="text-align: center;">
             <b>Mean Imputation for Columns with Small Missing Values and Normally Distributed Data</b>
         </h5>
+        <p style="text-align: justify;">
+            Mean imputation is applied to columns where missing values are relatively small and the data follows a normal
+            distribution. This method ensures that the overall distribution remains unchanged. Columns like BMI, Polio,
+            and Schooling are typically well-suited for this approach as they do not contain extreme outliers that could
+            distort the mean.
+        </p>
     """, unsafe_allow_html=True)
     st.markdown("<br>", unsafe_allow_html=True)
     st.markdown("""
         <h5 style="text-align: center;">
             <b>Applying One-Hot Encoding on "Status" Column</b>
         </h5>
+        <p style="text-align: justify;">
+            The "Status" column contains categorical data, differentiating countries as either <b>Developed</b> or
+            <b>Developing</b>. Since machine learning models work better with numerical data, we apply One-Hot Encoding,
+            which converts this categorical variable into a numerical format. We use the "drop='first'" parameter to avoid
+            multicollinearity by keeping only one of the binary categories.
+        </p>
     """, unsafe_allow_html=True)
     st.markdown("<br>", unsafe_allow_html=True)
     if st.button("🔙 Go Back to Model Pipeline"):
+        switch_page("Model Pipeline")
 elif st.session_state.current_page == "EDA":
     st.markdown("<h1 class='title'>Exploratory Data Analysis (EDA)</h1>", unsafe_allow_html=True)
      caption="50 Trails",
      use_container_width=True)
+    st.markdown(
+    "<p style='text-align: center; font-weight: bold; font-size: 16px;'>"
+    "From the above trials, we selected the <b>9th trial</b> as its train score and test score have minimal difference."
+    "</p>",
+    unsafe_allow_html=True
+    )
     st.markdown("<hr style='border:1px solid #ddd;'>", unsafe_allow_html=True)
     st.markdown("<h3 style='text-align: center;'>Selected Best-Fit Model</h3>", unsafe_allow_html=True)