Update pages/3_EDA_and_Feature_Engineering.py
Browse files
pages/3_EDA_and_Feature_Engineering.py
CHANGED
|
@@ -30,7 +30,7 @@ if 'df' in st.session_state:
|
|
| 30 |
ax.set_xlabel("Product Category")
|
| 31 |
ax.set_ylabel("Count")
|
| 32 |
ax.tick_params(axis='x', rotation=45))
|
| 33 |
-
st.
|
| 34 |
st.markdown("""
|
| 35 |
**Insights:**
|
| 36 |
- 5 product categories observed.
|
|
@@ -45,7 +45,7 @@ if 'df' in st.session_state:
|
|
| 45 |
ax.set_xlabel("Product Brand")
|
| 46 |
ax.set_ylabel("Count")
|
| 47 |
ax.tick_params(axis='x', rotation=45)
|
| 48 |
-
st.
|
| 49 |
st.markdown("""
|
| 50 |
**Insights:**
|
| 51 |
- Samsung and HP have the highest frequencies.
|
|
@@ -58,7 +58,7 @@ if 'df' in st.session_state:
|
|
| 58 |
ax.set_title("Product Price Distribution")
|
| 59 |
ax.set_xlabel("Product Price")
|
| 60 |
ax.set_ylabel("Count")
|
| 61 |
-
st.
|
| 62 |
st.markdown("""
|
| 63 |
**Insights:**
|
| 64 |
- Products span a wide price range, from near 0 to 3000.
|
|
@@ -73,7 +73,7 @@ if 'df' in st.session_state:
|
|
| 73 |
ax.set_xlabel("Product Price Bucket")
|
| 74 |
ax.set_ylabel("Count")
|
| 75 |
ax.tick_params(axis='x', rotation=45)
|
| 76 |
-
st.
|
| 77 |
st.markdown("""
|
| 78 |
**Insights:**
|
| 79 |
- "Very High" price bucket has the highest concentration.
|
|
@@ -94,7 +94,7 @@ if 'df' in st.session_state:
|
|
| 94 |
axs[1].set_xlabel("Customer Age")
|
| 95 |
|
| 96 |
plt.tight_layout()
|
| 97 |
-
st.
|
| 98 |
st.markdown("""
|
| 99 |
**Insights:**
|
| 100 |
- Age groups are relatively evenly distributed.
|
|
@@ -110,7 +110,7 @@ if 'df' in st.session_state:
|
|
| 110 |
wedgeprops={'edgecolor': 'black'},
|
| 111 |
ax=ax)
|
| 112 |
ax.set_title("Customer Gender Distribution")
|
| 113 |
-
st.
|
| 114 |
st.markdown("""
|
| 115 |
**Insights:**
|
| 116 |
- Gender distribution is almost equal.
|
|
@@ -123,7 +123,7 @@ if 'df' in st.session_state:
|
|
| 123 |
ax.set_title("Purchase Frequency Distribution")
|
| 124 |
ax.set_xlabel("Purchase Frequency")
|
| 125 |
ax.set_ylabel("Count")
|
| 126 |
-
st.
|
| 127 |
st.markdown("""
|
| 128 |
**Insights:**
|
| 129 |
- Purchase frequencies range from 1 to 19.
|
|
@@ -135,7 +135,7 @@ if 'df' in st.session_state:
|
|
| 135 |
ax.set_title("Customer Satisfaction Distribution")
|
| 136 |
ax.set_xlabel("Customer Satisfaction")
|
| 137 |
ax.set_ylabel("Count")
|
| 138 |
-
st.
|
| 139 |
st.markdown("""
|
| 140 |
**Insights:**
|
| 141 |
- Distinct peaks around integer values (1-5).
|
|
@@ -152,7 +152,7 @@ if 'df' in st.session_state:
|
|
| 152 |
startangle=90,
|
| 153 |
wedgeprops={'edgecolor': 'black'})
|
| 154 |
ax.set_title("Purchase Intent Distribution")
|
| 155 |
-
st.
|
| 156 |
st.markdown("""
|
| 157 |
**Insights:**
|
| 158 |
- Binary classification problem (0: Not Purchase, 1: Purchase).
|
|
@@ -169,12 +169,9 @@ if 'df' in st.session_state:
|
|
| 169 |
fig, ax = plt.subplots(figsize=(12, 8))
|
| 170 |
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f', linewidths=0.5, ax=ax)
|
| 171 |
ax.set_title("Correlation Heatmap")
|
| 172 |
-
st.
|
| 173 |
st.markdown("""
|
| 174 |
**Insights:**
|
| 175 |
- Strong correlations can be observed between certain variables.
|
| 176 |
- Customer Satisfaction and Purchase Intent might have meaningful relationships.
|
| 177 |
""")
|
| 178 |
-
|
| 179 |
-
else:
|
| 180 |
-
st.write("Please upload your data.")
|
|
|
|
| 30 |
ax.set_xlabel("Product Category")
|
| 31 |
ax.set_ylabel("Count")
|
| 32 |
ax.tick_params(axis='x', rotation=45))
|
| 33 |
+
st.plt(fig)
|
| 34 |
st.markdown("""
|
| 35 |
**Insights:**
|
| 36 |
- 5 product categories observed.
|
|
|
|
| 45 |
ax.set_xlabel("Product Brand")
|
| 46 |
ax.set_ylabel("Count")
|
| 47 |
ax.tick_params(axis='x', rotation=45)
|
| 48 |
+
st.plt(fig)
|
| 49 |
st.markdown("""
|
| 50 |
**Insights:**
|
| 51 |
- Samsung and HP have the highest frequencies.
|
|
|
|
| 58 |
ax.set_title("Product Price Distribution")
|
| 59 |
ax.set_xlabel("Product Price")
|
| 60 |
ax.set_ylabel("Count")
|
| 61 |
+
st.plt(fig)
|
| 62 |
st.markdown("""
|
| 63 |
**Insights:**
|
| 64 |
- Products span a wide price range, from near 0 to 3000.
|
|
|
|
| 73 |
ax.set_xlabel("Product Price Bucket")
|
| 74 |
ax.set_ylabel("Count")
|
| 75 |
ax.tick_params(axis='x', rotation=45)
|
| 76 |
+
st.plt(fig)
|
| 77 |
st.markdown("""
|
| 78 |
**Insights:**
|
| 79 |
- "Very High" price bucket has the highest concentration.
|
|
|
|
| 94 |
axs[1].set_xlabel("Customer Age")
|
| 95 |
|
| 96 |
plt.tight_layout()
|
| 97 |
+
st.plt(fig)
|
| 98 |
st.markdown("""
|
| 99 |
**Insights:**
|
| 100 |
- Age groups are relatively evenly distributed.
|
|
|
|
| 110 |
wedgeprops={'edgecolor': 'black'},
|
| 111 |
ax=ax)
|
| 112 |
ax.set_title("Customer Gender Distribution")
|
| 113 |
+
st.plt(fig)
|
| 114 |
st.markdown("""
|
| 115 |
**Insights:**
|
| 116 |
- Gender distribution is almost equal.
|
|
|
|
| 123 |
ax.set_title("Purchase Frequency Distribution")
|
| 124 |
ax.set_xlabel("Purchase Frequency")
|
| 125 |
ax.set_ylabel("Count")
|
| 126 |
+
st.plt(fig)
|
| 127 |
st.markdown("""
|
| 128 |
**Insights:**
|
| 129 |
- Purchase frequencies range from 1 to 19.
|
|
|
|
| 135 |
ax.set_title("Customer Satisfaction Distribution")
|
| 136 |
ax.set_xlabel("Customer Satisfaction")
|
| 137 |
ax.set_ylabel("Count")
|
| 138 |
+
st.plt(fig)
|
| 139 |
st.markdown("""
|
| 140 |
**Insights:**
|
| 141 |
- Distinct peaks around integer values (1-5).
|
|
|
|
| 152 |
startangle=90,
|
| 153 |
wedgeprops={'edgecolor': 'black'})
|
| 154 |
ax.set_title("Purchase Intent Distribution")
|
| 155 |
+
st.plt(fig)
|
| 156 |
st.markdown("""
|
| 157 |
**Insights:**
|
| 158 |
- Binary classification problem (0: Not Purchase, 1: Purchase).
|
|
|
|
| 169 |
fig, ax = plt.subplots(figsize=(12, 8))
|
| 170 |
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f', linewidths=0.5, ax=ax)
|
| 171 |
ax.set_title("Correlation Heatmap")
|
| 172 |
+
st.plt(fig)
|
| 173 |
st.markdown("""
|
| 174 |
**Insights:**
|
| 175 |
- Strong correlations can be observed between certain variables.
|
| 176 |
- Customer Satisfaction and Purchase Intent might have meaningful relationships.
|
| 177 |
""")
|
|
|
|
|
|
|
|
|