Spaces:
Sleeping
Sleeping
Update pages/2Simple_EDA.py
Browse files- pages/2Simple_EDA.py +11 -7
pages/2Simple_EDA.py
CHANGED
|
@@ -19,21 +19,24 @@ if "df" in st.session_state and st.session_state.df is not None:
|
|
| 19 |
st.dataframe(df.head())
|
| 20 |
|
| 21 |
# Shape of the Data
|
| 22 |
-
st.markdown("<h3 style='color: #843f5b;'>Dataset Shape
|
| 23 |
st.write(f"πΉ The dataset contains **{df.shape[0]} rows** and **{df.shape[1]} columns**.")
|
| 24 |
|
| 25 |
# Column Names & Data Types
|
| 26 |
-
st.markdown("<h3 style='color: #e25822;'>Column Names & Data Types
|
| 27 |
st.write(df.dtypes)
|
| 28 |
|
| 29 |
# π Dataset Information (Equivalent to df.info())
|
| 30 |
-
st.markdown("<
|
| 31 |
|
| 32 |
buffer = io.StringIO()
|
| 33 |
df.info(buf=buffer)
|
| 34 |
info_str = buffer.getvalue()
|
| 35 |
st.text(info_str)
|
| 36 |
|
|
|
|
|
|
|
|
|
|
| 37 |
# Numerical and categorical Columns
|
| 38 |
st.markdown("<h3 style='color: #9400d3;'>Numerical and Categorical Columns</h3>", unsafe_allow_html=True)
|
| 39 |
|
|
@@ -84,6 +87,7 @@ if "df" in st.session_state and st.session_state.df is not None:
|
|
| 84 |
if missing_values.sum() == 0:
|
| 85 |
st.success("No missing values found!")
|
| 86 |
else:
|
|
|
|
| 87 |
st.write("πΉ **Columns with Missing Values:**")
|
| 88 |
st.write(missing_values[missing_values > 0])
|
| 89 |
|
|
@@ -99,7 +103,7 @@ if "df" in st.session_state and st.session_state.df is not None:
|
|
| 99 |
st.dataframe(df[df.duplicated()].head())
|
| 100 |
|
| 101 |
# π Outlier Detection
|
| 102 |
-
st.markdown("<h3 style='color: #e25822;'
|
| 103 |
|
| 104 |
if numerical_cols:
|
| 105 |
outlier_info = {}
|
|
@@ -116,13 +120,13 @@ if "df" in st.session_state and st.session_state.df is not None:
|
|
| 116 |
outlier_info[col] = outliers
|
| 117 |
|
| 118 |
if outlier_info:
|
| 119 |
-
st.warning("
|
| 120 |
for col, count in outlier_info.items():
|
| 121 |
st.write(f"πΉ **{col}:** {count} outliers")
|
| 122 |
else:
|
| 123 |
-
st.success("
|
| 124 |
else:
|
| 125 |
-
st.info("
|
| 126 |
|
| 127 |
|
| 128 |
else:
|
|
|
|
| 19 |
st.dataframe(df.head())
|
| 20 |
|
| 21 |
# Shape of the Data
|
| 22 |
+
st.markdown("<h3 style='color: #843f5b;'>Dataset Shape</h3>", unsafe_allow_html=True)
|
| 23 |
st.write(f"πΉ The dataset contains **{df.shape[0]} rows** and **{df.shape[1]} columns**.")
|
| 24 |
|
| 25 |
# Column Names & Data Types
|
| 26 |
+
st.markdown("<h3 style='color: #e25822;'>Column Names & Data Types</h3>", unsafe_allow_html=True)
|
| 27 |
st.write(df.dtypes)
|
| 28 |
|
| 29 |
# π Dataset Information (Equivalent to df.info())
|
| 30 |
+
st.markdown("<h3 style='color: #9400d3;'>Dataset Informationπ</h3>", unsafe_allow_html=True)
|
| 31 |
|
| 32 |
buffer = io.StringIO()
|
| 33 |
df.info(buf=buffer)
|
| 34 |
info_str = buffer.getvalue()
|
| 35 |
st.text(info_str)
|
| 36 |
|
| 37 |
+
st.markdown(f"<pre style='background-color: #f8f8f8; padding: 10px; border-radius: 5px; font-size: 14px; font-family: monospace;'>{info_str}</pre>", unsafe_allow_html=True)
|
| 38 |
+
|
| 39 |
+
|
| 40 |
# Numerical and categorical Columns
|
| 41 |
st.markdown("<h3 style='color: #9400d3;'>Numerical and Categorical Columns</h3>", unsafe_allow_html=True)
|
| 42 |
|
|
|
|
| 87 |
if missing_values.sum() == 0:
|
| 88 |
st.success("No missing values found!")
|
| 89 |
else:
|
| 90 |
+
st.warning(f"Found missing values in the dataset.")
|
| 91 |
st.write("πΉ **Columns with Missing Values:**")
|
| 92 |
st.write(missing_values[missing_values > 0])
|
| 93 |
|
|
|
|
| 103 |
st.dataframe(df[df.duplicated()].head())
|
| 104 |
|
| 105 |
# π Outlier Detection
|
| 106 |
+
st.markdown("<h3 style='color: #e25822;'>Outlier Detection</h3>", unsafe_allow_html=True)
|
| 107 |
|
| 108 |
if numerical_cols:
|
| 109 |
outlier_info = {}
|
|
|
|
| 120 |
outlier_info[col] = outliers
|
| 121 |
|
| 122 |
if outlier_info:
|
| 123 |
+
st.warning("Outliers detected:")
|
| 124 |
for col, count in outlier_info.items():
|
| 125 |
st.write(f"πΉ **{col}:** {count} outliers")
|
| 126 |
else:
|
| 127 |
+
st.success("No significant outliers detected!")
|
| 128 |
else:
|
| 129 |
+
st.info("No numerical columns detectedβΉοΈ.")
|
| 130 |
|
| 131 |
|
| 132 |
else:
|