Update pages/3_EDA_and_Feature_Engineering.py
Browse files
pages/3_EDA_and_Feature_Engineering.py
CHANGED
|
@@ -84,7 +84,7 @@ if df is not None:
|
|
| 84 |
|
| 85 |
# Product Category Distribution
|
| 86 |
st.write("### Product Category Distribution")
|
| 87 |
-
fig, ax = plt.subplots(figsize=(10, 6))
|
| 88 |
sns.countplot(x='Category', data=df, palette='viridis', ax=ax)
|
| 89 |
ax.set_title("Product Category Distribution")
|
| 90 |
ax.set_xlabel("Product Category")
|
|
@@ -100,7 +100,7 @@ if df is not None:
|
|
| 100 |
|
| 101 |
# Product Brand Distribution
|
| 102 |
st.write("### Product Brand Distribution")
|
| 103 |
-
fig, ax = plt.subplots(figsize=(10, 6))
|
| 104 |
sns.countplot(x='Brand', data=df, palette='cubehelix', ax=ax)
|
| 105 |
ax.set_title("Product Brand Distribution")
|
| 106 |
ax.set_xlabel("Product Brand")
|
|
@@ -115,7 +115,7 @@ if df is not None:
|
|
| 115 |
|
| 116 |
# Price Distribution
|
| 117 |
st.write("### Price Distribution")
|
| 118 |
-
fig, ax = plt.subplots(figsize=(10, 6))
|
| 119 |
sns.histplot(df['Price'], kde=True, color='orange', ax=ax)
|
| 120 |
ax.set_title("Product Price Distribution")
|
| 121 |
ax.set_xlabel("Product Price")
|
|
@@ -133,7 +133,7 @@ if df is not None:
|
|
| 133 |
bins=[100, 500, 1000, 1500, 2000, 3000],
|
| 134 |
labels=['Very Low', 'Low', 'Medium', 'High', 'Very High'])
|
| 135 |
|
| 136 |
-
fig, ax = plt.subplots(figsize=(10, 6))
|
| 137 |
sns.countplot(x='ProductPriceBucket', data=df, palette='icefire', ax=ax)
|
| 138 |
ax.set_title("Product Price Bucket Distribution")
|
| 139 |
ax.set_xlabel("Price Bucket")
|
|
@@ -151,7 +151,7 @@ if df is not None:
|
|
| 151 |
st.write("### Age Distribution and Binning")
|
| 152 |
df['CustomerAgeGroup'] = pd.qcut(df['CustomerAge'], q=4, labels=['Young', 'Middle-aged', 'Mature', 'Senior'])
|
| 153 |
|
| 154 |
-
fig, axs = plt.subplots(1, 2, figsize=(15, 6))
|
| 155 |
|
| 156 |
# Age Group Distribution
|
| 157 |
sns.countplot(x='CustomerAgeGroup', data=df, ax=axs[0], palette='magma')
|
|
@@ -177,7 +177,7 @@ if df is not None:
|
|
| 177 |
|
| 178 |
# Gender Distribution
|
| 179 |
st.write("### Gender Distribution")
|
| 180 |
-
fig, axs = plt.subplots(figsize=(8, 8))
|
| 181 |
|
| 182 |
df['CustomerGender'].value_counts().plot(kind='pie',
|
| 183 |
colors=['lightblue', 'lightpink'],
|
|
@@ -199,7 +199,7 @@ if df is not None:
|
|
| 199 |
|
| 200 |
# Purchase Frequency Distribution
|
| 201 |
st.write("### Purchase Frequency Distribution")
|
| 202 |
-
fig, axs = plt.subplots(1, 1, figsize=(10, 6))
|
| 203 |
sns.histplot(df['PurchaseFrequency'], kde=True, color='purple', bins=30, ax=axs)
|
| 204 |
axs.set_title("Purchase Frequency Distribution")
|
| 205 |
axs.set_xlabel("Purchase Frequency")
|
|
@@ -210,7 +210,7 @@ if df is not None:
|
|
| 210 |
|
| 211 |
# Customer Satisfaction Distribution
|
| 212 |
st.write("### Customer Satisfaction Distribution")
|
| 213 |
-
fig, axs = plt.subplots(1, 1, figsize=(10, 6))
|
| 214 |
sns.histplot(df['CustomerSatisfaction'], kde=True, color=sns.color_palette("crest", n_colors=1)[0], ax=axs)
|
| 215 |
axs.set_title("Customer Satisfaction Distribution")
|
| 216 |
axs.set_xlabel("Customer Satisfaction")
|
|
@@ -226,7 +226,7 @@ if df is not None:
|
|
| 226 |
st.write("### Purchase Intent Distribution")
|
| 227 |
purchase_intent_counts = df['PurchaseIntent'].value_counts()
|
| 228 |
|
| 229 |
-
fig, axs = plt.subplots(1, 1, figsize=(8, 6))
|
| 230 |
wedges, texts, autotexts = axs.pie(purchase_intent_counts,
|
| 231 |
labels=purchase_intent_counts.index,
|
| 232 |
colors=sns.color_palette("coolwarm", n_colors=len(purchase_intent_counts)),
|
|
@@ -255,7 +255,7 @@ if df is not None:
|
|
| 255 |
df_filtered = df.drop(columns=columns_to_exclude)
|
| 256 |
|
| 257 |
# Set up the subplots grid: 1 row and 3 columns
|
| 258 |
-
fig, axs = plt.subplots(1, 3, figsize=(18, 6))
|
| 259 |
axs = axs.flatten() # Flatten the 2D array of axes to easily index
|
| 260 |
|
| 261 |
# Color palettes to cycle through for each subplot
|
|
@@ -292,7 +292,7 @@ if df is not None:
|
|
| 292 |
Purchase Intent: Examine how purchase intent varies across different price points. Are there price ranges where purchase intent is higher or lower? This could reveal price sensitivity or the effectiveness of pricing strategies.''')
|
| 293 |
|
| 294 |
# Set up the subplots grid: 1 row and 3 columns
|
| 295 |
-
fig, axs = plt.subplots(1, 3, figsize=(18, 6))
|
| 296 |
axs = axs.flatten() # Flatten the 2D array of axes to easily index
|
| 297 |
|
| 298 |
# Color palettes to cycle through for each subplot
|
|
@@ -324,7 +324,7 @@ if df is not None:
|
|
| 324 |
|
| 325 |
st.write("### PRODUCT VS BRANDS")
|
| 326 |
# Create the plot
|
| 327 |
-
fig, ax = plt.subplots(figsize=(12, 8))
|
| 328 |
sns.histplot(data=df, x='Category', hue='Brand', multiple="stack", palette='rocket', bins=20, ax=ax)
|
| 329 |
|
| 330 |
# Add title and labels
|
|
@@ -453,7 +453,7 @@ if df is not None:
|
|
| 453 |
corr = df_numeric.corr()
|
| 454 |
|
| 455 |
# Create the heatmap plot
|
| 456 |
-
fig, ax = plt.subplots(figsize=(20, 10))
|
| 457 |
sns.heatmap(corr, annot=True, ax=ax, cmap='coolwarm')
|
| 458 |
|
| 459 |
# Add title
|
|
|
|
| 84 |
|
| 85 |
# Product Category Distribution
|
| 86 |
st.write("### Product Category Distribution")
|
| 87 |
+
fig, ax = plt.subplots(figsize=(10*0.7, 6*0.7))
|
| 88 |
sns.countplot(x='Category', data=df, palette='viridis', ax=ax)
|
| 89 |
ax.set_title("Product Category Distribution")
|
| 90 |
ax.set_xlabel("Product Category")
|
|
|
|
| 100 |
|
| 101 |
# Product Brand Distribution
|
| 102 |
st.write("### Product Brand Distribution")
|
| 103 |
+
fig, ax = plt.subplots(figsize=(10*0.7, 6*0.7))
|
| 104 |
sns.countplot(x='Brand', data=df, palette='cubehelix', ax=ax)
|
| 105 |
ax.set_title("Product Brand Distribution")
|
| 106 |
ax.set_xlabel("Product Brand")
|
|
|
|
| 115 |
|
| 116 |
# Price Distribution
|
| 117 |
st.write("### Price Distribution")
|
| 118 |
+
fig, ax = plt.subplots(figsize=(10*0.7, 6*0.7))
|
| 119 |
sns.histplot(df['Price'], kde=True, color='orange', ax=ax)
|
| 120 |
ax.set_title("Product Price Distribution")
|
| 121 |
ax.set_xlabel("Product Price")
|
|
|
|
| 133 |
bins=[100, 500, 1000, 1500, 2000, 3000],
|
| 134 |
labels=['Very Low', 'Low', 'Medium', 'High', 'Very High'])
|
| 135 |
|
| 136 |
+
fig, ax = plt.subplots(figsize=(10*0.7, 6*0.7))
|
| 137 |
sns.countplot(x='ProductPriceBucket', data=df, palette='icefire', ax=ax)
|
| 138 |
ax.set_title("Product Price Bucket Distribution")
|
| 139 |
ax.set_xlabel("Price Bucket")
|
|
|
|
| 151 |
st.write("### Age Distribution and Binning")
|
| 152 |
df['CustomerAgeGroup'] = pd.qcut(df['CustomerAge'], q=4, labels=['Young', 'Middle-aged', 'Mature', 'Senior'])
|
| 153 |
|
| 154 |
+
fig, axs = plt.subplots(1, 2, figsize=(15*0.7, 6*0.7))
|
| 155 |
|
| 156 |
# Age Group Distribution
|
| 157 |
sns.countplot(x='CustomerAgeGroup', data=df, ax=axs[0], palette='magma')
|
|
|
|
| 177 |
|
| 178 |
# Gender Distribution
|
| 179 |
st.write("### Gender Distribution")
|
| 180 |
+
fig, axs = plt.subplots(figsize=(8*0.7, 8*0.7))
|
| 181 |
|
| 182 |
df['CustomerGender'].value_counts().plot(kind='pie',
|
| 183 |
colors=['lightblue', 'lightpink'],
|
|
|
|
| 199 |
|
| 200 |
# Purchase Frequency Distribution
|
| 201 |
st.write("### Purchase Frequency Distribution")
|
| 202 |
+
fig, axs = plt.subplots(1, 1, figsize=(10*0.7, 6*0.7))
|
| 203 |
sns.histplot(df['PurchaseFrequency'], kde=True, color='purple', bins=30, ax=axs)
|
| 204 |
axs.set_title("Purchase Frequency Distribution")
|
| 205 |
axs.set_xlabel("Purchase Frequency")
|
|
|
|
| 210 |
|
| 211 |
# Customer Satisfaction Distribution
|
| 212 |
st.write("### Customer Satisfaction Distribution")
|
| 213 |
+
fig, axs = plt.subplots(1, 1, figsize=(10*0.7, 6*0.7))
|
| 214 |
sns.histplot(df['CustomerSatisfaction'], kde=True, color=sns.color_palette("crest", n_colors=1)[0], ax=axs)
|
| 215 |
axs.set_title("Customer Satisfaction Distribution")
|
| 216 |
axs.set_xlabel("Customer Satisfaction")
|
|
|
|
| 226 |
st.write("### Purchase Intent Distribution")
|
| 227 |
purchase_intent_counts = df['PurchaseIntent'].value_counts()
|
| 228 |
|
| 229 |
+
fig, axs = plt.subplots(1, 1, figsize=(8*0.7, 6*0.7))
|
| 230 |
wedges, texts, autotexts = axs.pie(purchase_intent_counts,
|
| 231 |
labels=purchase_intent_counts.index,
|
| 232 |
colors=sns.color_palette("coolwarm", n_colors=len(purchase_intent_counts)),
|
|
|
|
| 255 |
df_filtered = df.drop(columns=columns_to_exclude)
|
| 256 |
|
| 257 |
# Set up the subplots grid: 1 row and 3 columns
|
| 258 |
+
fig, axs = plt.subplots(1, 3, figsize=(18*0.7, 6*0.7))
|
| 259 |
axs = axs.flatten() # Flatten the 2D array of axes to easily index
|
| 260 |
|
| 261 |
# Color palettes to cycle through for each subplot
|
|
|
|
| 292 |
Purchase Intent: Examine how purchase intent varies across different price points. Are there price ranges where purchase intent is higher or lower? This could reveal price sensitivity or the effectiveness of pricing strategies.''')
|
| 293 |
|
| 294 |
# Set up the subplots grid: 1 row and 3 columns
|
| 295 |
+
fig, axs = plt.subplots(1, 3, figsize=(18*0.7, 6*0.7))
|
| 296 |
axs = axs.flatten() # Flatten the 2D array of axes to easily index
|
| 297 |
|
| 298 |
# Color palettes to cycle through for each subplot
|
|
|
|
| 324 |
|
| 325 |
st.write("### PRODUCT VS BRANDS")
|
| 326 |
# Create the plot
|
| 327 |
+
fig, ax = plt.subplots(figsize=(12*0.7, 8*0.7))
|
| 328 |
sns.histplot(data=df, x='Category', hue='Brand', multiple="stack", palette='rocket', bins=20, ax=ax)
|
| 329 |
|
| 330 |
# Add title and labels
|
|
|
|
| 453 |
corr = df_numeric.corr()
|
| 454 |
|
| 455 |
# Create the heatmap plot
|
| 456 |
+
fig, ax = plt.subplots(figsize=(20*0.7, 10*0.7))
|
| 457 |
sns.heatmap(corr, annot=True, ax=ax, cmap='coolwarm')
|
| 458 |
|
| 459 |
# Add title
|