trohith89 commited on
Commit
c7858e7
·
verified ·
1 Parent(s): 7399cd8

Update pages/2_Data_CLeaning_and_Preprocessing.py

Browse files
pages/2_Data_CLeaning_and_Preprocessing.py CHANGED
@@ -28,24 +28,32 @@ if df is not None:
28
  st.subheader("Shape of the Dataset:")
29
  st.write(df.shape)
30
 
31
- # Visualize Numeric Data
32
  numeric_columns = df.select_dtypes(include=['float64', 'int64']).columns
33
  if len(numeric_columns) > 0:
34
  st.subheader("Histograms for Numeric Columns:")
35
- for col in numeric_columns:
36
- plt.figure(figsize=(10, 5))
37
- sns.histplot(df[col], bins=30, kde=True)
38
- plt.title(f'Histogram of {col}')
39
- st.pyplot(plt)
40
- plt.clf()
 
 
 
 
41
 
42
  st.subheader("Boxplots for Numeric Columns:")
43
- for col in numeric_columns:
44
- plt.figure(figsize=(10, 5))
45
- sns.boxplot(x=df[col])
46
- plt.title(f'Boxplot of {col}')
47
- st.pyplot(plt)
48
- plt.clf()
 
 
 
 
49
  else:
50
  st.warning("No numeric columns available for visualization.")
51
 
@@ -59,7 +67,7 @@ if df is not None:
59
  st.write(df[selected_cat_col].value_counts())
60
 
61
  plt.figure(figsize=(10, 5))
62
- sns.countplot(x=selected_cat_col, data=df)
63
  plt.title(f'Bar Plot of {selected_cat_col}')
64
  st.pyplot(plt)
65
  plt.clf()
@@ -110,7 +118,7 @@ st.markdown(
110
  /* Styling the content to ensure text visibility */
111
  .stMarkdown {{
112
  color: white; /* White text to ensure visibility */
113
- font-size: 50px; /* Adjust font size for better readability */
114
  }}
115
  </style>
116
  """,
 
28
  st.subheader("Shape of the Dataset:")
29
  st.write(df.shape)
30
 
31
+ # Visualize Numeric Data (Histograms and Boxplots in subplots)
32
  numeric_columns = df.select_dtypes(include=['float64', 'int64']).columns
33
  if len(numeric_columns) > 0:
34
  st.subheader("Histograms for Numeric Columns:")
35
+ # Create a subplot for all histograms
36
+ fig, axs = plt.subplots(len(numeric_columns), 1, figsize=(10, 5 * len(numeric_columns)))
37
+
38
+ color_palettes_hist = ['Set1', 'Set2', 'Set3', 'Paired', 'Pastel1'] # Different color palettes for histograms
39
+ for i, col in enumerate(numeric_columns):
40
+ palette = sns.color_palette(color_palettes_hist[i % len(color_palettes_hist)]) # Ensure different palette for each plot
41
+ sns.histplot(df[col], bins=30, kde=True, color=palette[0], ax=axs[i]) # Apply the color palette
42
+ axs[i].set_title(f'Histogram of {col}')
43
+ st.pyplot(fig)
44
+ plt.clf()
45
 
46
  st.subheader("Boxplots for Numeric Columns:")
47
+ # Create a subplot for all boxplots
48
+ fig, axs = plt.subplots(len(numeric_columns), 1, figsize=(10, 5 * len(numeric_columns)))
49
+
50
+ color_palettes_box = ['coolwarm', 'Blues', 'viridis', 'cubehelix', 'crest'] # Different color palettes for boxplots
51
+ for i, col in enumerate(numeric_columns):
52
+ palette = sns.color_palette(color_palettes_box[i % len(color_palettes_box)]) # Ensure different palette for each plot
53
+ sns.boxplot(x=df[col], ax=axs[i], palette=palette)
54
+ axs[i].set_title(f'Boxplot of {col}')
55
+ st.pyplot(fig)
56
+ plt.clf()
57
  else:
58
  st.warning("No numeric columns available for visualization.")
59
 
 
67
  st.write(df[selected_cat_col].value_counts())
68
 
69
  plt.figure(figsize=(10, 5))
70
+ sns.countplot(x=selected_cat_col, data=df, palette='coolwarm') # Unique palette for categorical data
71
  plt.title(f'Bar Plot of {selected_cat_col}')
72
  st.pyplot(plt)
73
  plt.clf()
 
118
  /* Styling the content to ensure text visibility */
119
  .stMarkdown {{
120
  color: white; /* White text to ensure visibility */
121
+ font-size: 30px; /* Adjust font size for better readability */
122
  }}
123
  </style>
124
  """,