Harika22 commited on
Commit
2d76a15
·
verified ·
1 Parent(s): b57a9f3

Update pages/2_Simple_EDA.py

Browse files
Files changed (1) hide show
  1. pages/2_Simple_EDA.py +13 -4
pages/2_Simple_EDA.py CHANGED
@@ -14,16 +14,19 @@ st.markdown("""
14
  if "df" in st.session_state and st.session_state.df is not None:
15
  df = st.session_state.df
16
 
 
17
  st.markdown("<h3 style='color: #2a52be;'>Dataset Preview📌</h3>", unsafe_allow_html=True)
18
  st.dataframe(df.head())
19
 
20
-
21
  st.markdown("<h3 style='color: #843f5b;'>Dataset Shape</h3>", unsafe_allow_html=True)
22
  st.write(f"🔹 The dataset contains **{df.shape[0]} rows** and **{df.shape[1]} columns**.")
23
-
 
24
  st.markdown("<h3 style='color: #e25822;'>Column Names & Data Types</h3>", unsafe_allow_html=True)
25
  st.write(df.dtypes)
26
 
 
27
  st.markdown("<h3 style='color: #9400d3;'>Dataset Information📝</h3>", unsafe_allow_html=True)
28
 
29
  buffer = io.StringIO()
@@ -34,6 +37,7 @@ if "df" in st.session_state and st.session_state.df is not None:
34
  st.markdown(f"<pre style='background-color: #f8f8f8; padding: 10px; border-radius: 5px; font-size: 14px; font-family: monospace;'>{info_str}</pre>", unsafe_allow_html=True)
35
 
36
 
 
37
  st.markdown("<h3 style='color: #9400d3;'>Numerical and Categorical Columns</h3>", unsafe_allow_html=True)
38
 
39
  numerical_cols = df.select_dtypes(include=['int64', 'float64']).columns.tolist()
@@ -42,6 +46,7 @@ if "df" in st.session_state and st.session_state.df is not None:
42
  st.write(f"🔹 **Numerical Columns ({len(numerical_cols)}):** {', '.join(numerical_cols) if numerical_cols else 'None'}")
43
  st.write(f"🔹 **Categorical Columns ({len(categorical_cols)}):** {', '.join(categorical_cols) if categorical_cols else 'None'}")
44
 
 
45
  st.markdown("<h3 style='color: #e25822;'>Unique Values in Categorical Columns</h3>", unsafe_allow_html=True)
46
 
47
  if categorical_cols:
@@ -62,6 +67,7 @@ if "df" in st.session_state and st.session_state.df is not None:
62
  st.info("No categorical columns detectedℹ️.")
63
 
64
 
 
65
  st.markdown("<h3 style='color: #843f5b;'>Summary Statistics for Numerical Columns</h3>", unsafe_allow_html=True)
66
 
67
  st.write("🔹 **Basic statistical insights into the dataset:**")
@@ -73,7 +79,8 @@ if "df" in st.session_state and st.session_state.df is not None:
73
  st.write(df[categorical_cols].describe(include='object'))
74
  else:
75
  st.info("No categorical columns detectedℹ️.")
76
-
 
77
  st.markdown("<h3 style='color: #9400d3;'>Missing Values in the Dataset⚠️</h3>", unsafe_allow_html=True)
78
  missing_values = df.isnull().sum()
79
 
@@ -84,6 +91,7 @@ if "df" in st.session_state and st.session_state.df is not None:
84
  st.write("🔹 **Columns with Missing Values:**")
85
  st.write(missing_values[missing_values > 0])
86
 
 
87
  st.markdown("<h3 style='color: #2a52be;'>Duplicate Records</h3>", unsafe_allow_html=True)
88
  duplicate_count = df.duplicated().sum()
89
 
@@ -94,6 +102,7 @@ if "df" in st.session_state and st.session_state.df is not None:
94
  st.write("🔹 **Example Duplicate Rows:**")
95
  st.dataframe(df[df.duplicated()].head())
96
 
 
97
  st.markdown("<h3 style='color: #e25822;'>Outlier Detection</h3>", unsafe_allow_html=True)
98
 
99
  if numerical_cols:
@@ -121,4 +130,4 @@ if "df" in st.session_state and st.session_state.df is not None:
121
 
122
 
123
  else:
124
- st.warning("No dataset found! Please upload a dataset first⚠️.")
 
14
  if "df" in st.session_state and st.session_state.df is not None:
15
  df = st.session_state.df
16
 
17
+ # Dataset Preview
18
  st.markdown("<h3 style='color: #2a52be;'>Dataset Preview📌</h3>", unsafe_allow_html=True)
19
  st.dataframe(df.head())
20
 
21
+ # Shape of the Data
22
  st.markdown("<h3 style='color: #843f5b;'>Dataset Shape</h3>", unsafe_allow_html=True)
23
  st.write(f"🔹 The dataset contains **{df.shape[0]} rows** and **{df.shape[1]} columns**.")
24
+
25
+ # Column Names & Data Types
26
  st.markdown("<h3 style='color: #e25822;'>Column Names & Data Types</h3>", unsafe_allow_html=True)
27
  st.write(df.dtypes)
28
 
29
+ # 📝 Dataset Information (Equivalent to df.info())
30
  st.markdown("<h3 style='color: #9400d3;'>Dataset Information📝</h3>", unsafe_allow_html=True)
31
 
32
  buffer = io.StringIO()
 
37
  st.markdown(f"<pre style='background-color: #f8f8f8; padding: 10px; border-radius: 5px; font-size: 14px; font-family: monospace;'>{info_str}</pre>", unsafe_allow_html=True)
38
 
39
 
40
+ # Numerical and categorical Columns
41
  st.markdown("<h3 style='color: #9400d3;'>Numerical and Categorical Columns</h3>", unsafe_allow_html=True)
42
 
43
  numerical_cols = df.select_dtypes(include=['int64', 'float64']).columns.tolist()
 
46
  st.write(f"🔹 **Numerical Columns ({len(numerical_cols)}):** {', '.join(numerical_cols) if numerical_cols else 'None'}")
47
  st.write(f"🔹 **Categorical Columns ({len(categorical_cols)}):** {', '.join(categorical_cols) if categorical_cols else 'None'}")
48
 
49
+ # Unique Values in Categorical Columns
50
  st.markdown("<h3 style='color: #e25822;'>Unique Values in Categorical Columns</h3>", unsafe_allow_html=True)
51
 
52
  if categorical_cols:
 
67
  st.info("No categorical columns detectedℹ️.")
68
 
69
 
70
+ # Summary Statistics
71
  st.markdown("<h3 style='color: #843f5b;'>Summary Statistics for Numerical Columns</h3>", unsafe_allow_html=True)
72
 
73
  st.write("🔹 **Basic statistical insights into the dataset:**")
 
79
  st.write(df[categorical_cols].describe(include='object'))
80
  else:
81
  st.info("No categorical columns detectedℹ️.")
82
+
83
+ # Checking for Missing Values
84
  st.markdown("<h3 style='color: #9400d3;'>Missing Values in the Dataset⚠️</h3>", unsafe_allow_html=True)
85
  missing_values = df.isnull().sum()
86
 
 
91
  st.write("🔹 **Columns with Missing Values:**")
92
  st.write(missing_values[missing_values > 0])
93
 
94
+ # Checking for Duplicate Records
95
  st.markdown("<h3 style='color: #2a52be;'>Duplicate Records</h3>", unsafe_allow_html=True)
96
  duplicate_count = df.duplicated().sum()
97
 
 
102
  st.write("🔹 **Example Duplicate Rows:**")
103
  st.dataframe(df[df.duplicated()].head())
104
 
105
+ # 📊 Outlier Detection
106
  st.markdown("<h3 style='color: #e25822;'>Outlier Detection</h3>", unsafe_allow_html=True)
107
 
108
  if numerical_cols:
 
130
 
131
 
132
  else:
133
+ st.warning("No dataset found! Please upload a dataset first⚠️.")