trohith89 commited on
Commit
c0164c0
·
verified ·
1 Parent(s): 790a915

Update pages/3_EDA_and_Feature_Engineering.py

Browse files
pages/3_EDA_and_Feature_Engineering.py CHANGED
@@ -84,7 +84,7 @@ if df is not None:
84
 
85
  # Product Category Distribution
86
  st.write("### Product Category Distribution")
87
- fig, ax = plt.subplots(figsize=(10, 6))
88
  sns.countplot(x='Category', data=df, palette='viridis', ax=ax)
89
  ax.set_title("Product Category Distribution")
90
  ax.set_xlabel("Product Category")
@@ -100,7 +100,7 @@ if df is not None:
100
 
101
  # Product Brand Distribution
102
  st.write("### Product Brand Distribution")
103
- fig, ax = plt.subplots(figsize=(10, 6))
104
  sns.countplot(x='Brand', data=df, palette='cubehelix', ax=ax)
105
  ax.set_title("Product Brand Distribution")
106
  ax.set_xlabel("Product Brand")
@@ -115,7 +115,7 @@ if df is not None:
115
 
116
  # Price Distribution
117
  st.write("### Price Distribution")
118
- fig, ax = plt.subplots(figsize=(10, 6))
119
  sns.histplot(df['Price'], kde=True, color='orange', ax=ax)
120
  ax.set_title("Product Price Distribution")
121
  ax.set_xlabel("Product Price")
@@ -133,7 +133,7 @@ if df is not None:
133
  bins=[100, 500, 1000, 1500, 2000, 3000],
134
  labels=['Very Low', 'Low', 'Medium', 'High', 'Very High'])
135
 
136
- fig, ax = plt.subplots(figsize=(10, 6))
137
  sns.countplot(x='ProductPriceBucket', data=df, palette='icefire', ax=ax)
138
  ax.set_title("Product Price Bucket Distribution")
139
  ax.set_xlabel("Price Bucket")
@@ -151,7 +151,7 @@ if df is not None:
151
  st.write("### Age Distribution and Binning")
152
  df['CustomerAgeGroup'] = pd.qcut(df['CustomerAge'], q=4, labels=['Young', 'Middle-aged', 'Mature', 'Senior'])
153
 
154
- fig, axs = plt.subplots(1, 2, figsize=(15, 6))
155
 
156
  # Age Group Distribution
157
  sns.countplot(x='CustomerAgeGroup', data=df, ax=axs[0], palette='magma')
@@ -177,7 +177,7 @@ if df is not None:
177
 
178
  # Gender Distribution
179
  st.write("### Gender Distribution")
180
- fig, axs = plt.subplots(figsize=(8, 8))
181
 
182
  df['CustomerGender'].value_counts().plot(kind='pie',
183
  colors=['lightblue', 'lightpink'],
@@ -199,7 +199,7 @@ if df is not None:
199
 
200
  # Purchase Frequency Distribution
201
  st.write("### Purchase Frequency Distribution")
202
- fig, axs = plt.subplots(1, 1, figsize=(10, 6))
203
  sns.histplot(df['PurchaseFrequency'], kde=True, color='purple', bins=30, ax=axs)
204
  axs.set_title("Purchase Frequency Distribution")
205
  axs.set_xlabel("Purchase Frequency")
@@ -210,7 +210,7 @@ if df is not None:
210
 
211
  # Customer Satisfaction Distribution
212
  st.write("### Customer Satisfaction Distribution")
213
- fig, axs = plt.subplots(1, 1, figsize=(10, 6))
214
  sns.histplot(df['CustomerSatisfaction'], kde=True, color=sns.color_palette("crest", n_colors=1)[0], ax=axs)
215
  axs.set_title("Customer Satisfaction Distribution")
216
  axs.set_xlabel("Customer Satisfaction")
@@ -226,7 +226,7 @@ if df is not None:
226
  st.write("### Purchase Intent Distribution")
227
  purchase_intent_counts = df['PurchaseIntent'].value_counts()
228
 
229
- fig, axs = plt.subplots(1, 1, figsize=(8, 6))
230
  wedges, texts, autotexts = axs.pie(purchase_intent_counts,
231
  labels=purchase_intent_counts.index,
232
  colors=sns.color_palette("coolwarm", n_colors=len(purchase_intent_counts)),
@@ -255,7 +255,7 @@ if df is not None:
255
  df_filtered = df.drop(columns=columns_to_exclude)
256
 
257
  # Set up the subplots grid: 1 row and 3 columns
258
- fig, axs = plt.subplots(1, 3, figsize=(18, 6))
259
  axs = axs.flatten() # Flatten the 2D array of axes to easily index
260
 
261
  # Color palettes to cycle through for each subplot
@@ -292,7 +292,7 @@ if df is not None:
292
  Purchase Intent: Examine how purchase intent varies across different price points. Are there price ranges where purchase intent is higher or lower? This could reveal price sensitivity or the effectiveness of pricing strategies.''')
293
 
294
  # Set up the subplots grid: 1 row and 3 columns
295
- fig, axs = plt.subplots(1, 3, figsize=(18, 6))
296
  axs = axs.flatten() # Flatten the 2D array of axes to easily index
297
 
298
  # Color palettes to cycle through for each subplot
@@ -324,7 +324,7 @@ if df is not None:
324
 
325
  st.write("### PRODUCT VS BRANDS")
326
  # Create the plot
327
- fig, ax = plt.subplots(figsize=(12, 8))
328
  sns.histplot(data=df, x='Category', hue='Brand', multiple="stack", palette='rocket', bins=20, ax=ax)
329
 
330
  # Add title and labels
@@ -453,7 +453,7 @@ if df is not None:
453
  corr = df_numeric.corr()
454
 
455
  # Create the heatmap plot
456
- fig, ax = plt.subplots(figsize=(20, 10))
457
  sns.heatmap(corr, annot=True, ax=ax, cmap='coolwarm')
458
 
459
  # Add title
 
84
 
85
  # Product Category Distribution
86
  st.write("### Product Category Distribution")
87
+ fig, ax = plt.subplots(figsize=(10*0.7, 6*0.7))
88
  sns.countplot(x='Category', data=df, palette='viridis', ax=ax)
89
  ax.set_title("Product Category Distribution")
90
  ax.set_xlabel("Product Category")
 
100
 
101
  # Product Brand Distribution
102
  st.write("### Product Brand Distribution")
103
+ fig, ax = plt.subplots(figsize=(10*0.7, 6*0.7))
104
  sns.countplot(x='Brand', data=df, palette='cubehelix', ax=ax)
105
  ax.set_title("Product Brand Distribution")
106
  ax.set_xlabel("Product Brand")
 
115
 
116
  # Price Distribution
117
  st.write("### Price Distribution")
118
+ fig, ax = plt.subplots(figsize=(10*0.7, 6*0.7))
119
  sns.histplot(df['Price'], kde=True, color='orange', ax=ax)
120
  ax.set_title("Product Price Distribution")
121
  ax.set_xlabel("Product Price")
 
133
  bins=[100, 500, 1000, 1500, 2000, 3000],
134
  labels=['Very Low', 'Low', 'Medium', 'High', 'Very High'])
135
 
136
+ fig, ax = plt.subplots(figsize=(10*0.7, 6*0.7))
137
  sns.countplot(x='ProductPriceBucket', data=df, palette='icefire', ax=ax)
138
  ax.set_title("Product Price Bucket Distribution")
139
  ax.set_xlabel("Price Bucket")
 
151
  st.write("### Age Distribution and Binning")
152
  df['CustomerAgeGroup'] = pd.qcut(df['CustomerAge'], q=4, labels=['Young', 'Middle-aged', 'Mature', 'Senior'])
153
 
154
+ fig, axs = plt.subplots(1, 2, figsize=(15*0.7, 6*0.7))
155
 
156
  # Age Group Distribution
157
  sns.countplot(x='CustomerAgeGroup', data=df, ax=axs[0], palette='magma')
 
177
 
178
  # Gender Distribution
179
  st.write("### Gender Distribution")
180
+ fig, axs = plt.subplots(figsize=(8*0.7, 8*0.7))
181
 
182
  df['CustomerGender'].value_counts().plot(kind='pie',
183
  colors=['lightblue', 'lightpink'],
 
199
 
200
  # Purchase Frequency Distribution
201
  st.write("### Purchase Frequency Distribution")
202
+ fig, axs = plt.subplots(1, 1, figsize=(10*0.7, 6*0.7))
203
  sns.histplot(df['PurchaseFrequency'], kde=True, color='purple', bins=30, ax=axs)
204
  axs.set_title("Purchase Frequency Distribution")
205
  axs.set_xlabel("Purchase Frequency")
 
210
 
211
  # Customer Satisfaction Distribution
212
  st.write("### Customer Satisfaction Distribution")
213
+ fig, axs = plt.subplots(1, 1, figsize=(10*0.7, 6*0.7))
214
  sns.histplot(df['CustomerSatisfaction'], kde=True, color=sns.color_palette("crest", n_colors=1)[0], ax=axs)
215
  axs.set_title("Customer Satisfaction Distribution")
216
  axs.set_xlabel("Customer Satisfaction")
 
226
  st.write("### Purchase Intent Distribution")
227
  purchase_intent_counts = df['PurchaseIntent'].value_counts()
228
 
229
+ fig, axs = plt.subplots(1, 1, figsize=(8*0.7, 6*0.7))
230
  wedges, texts, autotexts = axs.pie(purchase_intent_counts,
231
  labels=purchase_intent_counts.index,
232
  colors=sns.color_palette("coolwarm", n_colors=len(purchase_intent_counts)),
 
255
  df_filtered = df.drop(columns=columns_to_exclude)
256
 
257
  # Set up the subplots grid: 1 row and 3 columns
258
+ fig, axs = plt.subplots(1, 3, figsize=(18*0.7, 6*0.7))
259
  axs = axs.flatten() # Flatten the 2D array of axes to easily index
260
 
261
  # Color palettes to cycle through for each subplot
 
292
  Purchase Intent: Examine how purchase intent varies across different price points. Are there price ranges where purchase intent is higher or lower? This could reveal price sensitivity or the effectiveness of pricing strategies.''')
293
 
294
  # Set up the subplots grid: 1 row and 3 columns
295
+ fig, axs = plt.subplots(1, 3, figsize=(18*0.7, 6*0.7))
296
  axs = axs.flatten() # Flatten the 2D array of axes to easily index
297
 
298
  # Color palettes to cycle through for each subplot
 
324
 
325
  st.write("### PRODUCT VS BRANDS")
326
  # Create the plot
327
+ fig, ax = plt.subplots(figsize=(12*0.7, 8*0.7))
328
  sns.histplot(data=df, x='Category', hue='Brand', multiple="stack", palette='rocket', bins=20, ax=ax)
329
 
330
  # Add title and labels
 
453
  corr = df_numeric.corr()
454
 
455
  # Create the heatmap plot
456
+ fig, ax = plt.subplots(figsize=(20*0.7, 10*0.7))
457
  sns.heatmap(corr, annot=True, ax=ax, cmap='coolwarm')
458
 
459
  # Add title