trohith89 commited on
Commit
7d15fee
·
verified ·
1 Parent(s): a04898b

Update pages/2_Data_CLeaning_and_Preprocessing.py

Browse files
pages/2_Data_CLeaning_and_Preprocessing.py CHANGED
@@ -3,8 +3,6 @@ import pandas as pd
3
  import os
4
  from io import StringIO
5
  import sys
6
- import streamlit as st
7
- import pandas as pd
8
  import plotly.graph_objects as go
9
  import plotly.express as px
10
  from plotly.subplots import make_subplots
@@ -35,12 +33,12 @@ if df is not None:
35
  st.write(df.shape)
36
 
37
  # Visualize Numeric Data
38
- numeric_columns = data.select_dtypes(include=['float64', 'int64']).columns
39
  if len(numeric_columns) > 0:
40
  st.subheader("Histograms for Numeric Columns:")
41
  fig = make_subplots(rows=len(numeric_columns), cols=1, subplot_titles=numeric_columns)
42
  for i, col in enumerate(numeric_columns):
43
- hist = px.histogram(data, x=col, nbins=30, title=f'Histogram of {col}')
44
  fig.add_trace(hist.data[0], row=i + 1, col=1)
45
 
46
  fig.update_layout(height=500 * len(numeric_columns), title_text="Histograms for Numeric Columns")
@@ -49,7 +47,7 @@ if df is not None:
49
  st.subheader("Boxplots for Numeric Columns:")
50
  fig = make_subplots(rows=len(numeric_columns), cols=1, subplot_titles=numeric_columns)
51
  for i, col in enumerate(numeric_columns):
52
- boxplot = px.box(data, y=col, title=f'Boxplot of {col}')
53
  fig.add_trace(boxplot.data[0], row=i + 1, col=1)
54
 
55
  fig.update_layout(height=500 * len(numeric_columns), title_text="Boxplots for Numeric Columns")
@@ -58,15 +56,15 @@ if df is not None:
58
  st.warning("No numeric columns available for visualization.")
59
 
60
  # Visualize Categorical Data
61
- categorical_columns = data.select_dtypes(include=['object', 'category']).columns
62
  if len(categorical_columns) > 0:
63
  st.subheader("Bar Plots for Categorical Columns:")
64
  selected_cat_col = st.selectbox("Select a Categorical Column", categorical_columns)
65
 
66
  st.write(f"Value Counts for '{selected_cat_col}':")
67
- st.write(data[selected_cat_col].value_counts())
68
 
69
- fig = px.bar(data, x=selected_cat_col, title=f'Bar Plot of {selected_cat_col}', color=selected_cat_col)
70
  st.plotly_chart(fig)
71
  else:
72
  st.warning("No categorical columns available for visualization.")
@@ -74,12 +72,12 @@ if df is not None:
74
  # Correlation Matrix for Numeric Columns
75
  if len(numeric_columns) > 1:
76
  st.subheader("Correlation Matrix:")
77
- corr_matrix = data[numeric_columns].corr()
78
  fig = px.imshow(corr_matrix, title="Correlation Matrix", color_continuous_scale='coolwarm')
79
  st.plotly_chart(fig)
80
 
81
  st.subheader("Cleaned Dataset:")
82
- cleaned_data = data.drop_duplicates()
83
  st.write(cleaned_data)
84
 
85
  cleaned_csv = cleaned_data.to_csv(index=False).encode('utf-8')
@@ -121,10 +119,10 @@ st.markdown(
121
  }}
122
  /* Styling the content to ensure text visibility */
123
  .stMarkdown {{
124
- color: black; /* White text to ensure visibility */
125
- font-size: 30px; /* Adjust font size for better readability */
126
  }}
127
  </style>
128
  """,
129
  unsafe_allow_html=True
130
- )
 
3
  import os
4
  from io import StringIO
5
  import sys
 
 
6
  import plotly.graph_objects as go
7
  import plotly.express as px
8
  from plotly.subplots import make_subplots
 
33
  st.write(df.shape)
34
 
35
  # Visualize Numeric Data
36
+ numeric_columns = df.select_dtypes(include=['float64', 'int64']).columns
37
  if len(numeric_columns) > 0:
38
  st.subheader("Histograms for Numeric Columns:")
39
  fig = make_subplots(rows=len(numeric_columns), cols=1, subplot_titles=numeric_columns)
40
  for i, col in enumerate(numeric_columns):
41
+ hist = px.histogram(df, x=col, nbins=30, title=f'Histogram of {col}')
42
  fig.add_trace(hist.data[0], row=i + 1, col=1)
43
 
44
  fig.update_layout(height=500 * len(numeric_columns), title_text="Histograms for Numeric Columns")
 
47
  st.subheader("Boxplots for Numeric Columns:")
48
  fig = make_subplots(rows=len(numeric_columns), cols=1, subplot_titles=numeric_columns)
49
  for i, col in enumerate(numeric_columns):
50
+ boxplot = px.box(df, y=col, title=f'Boxplot of {col}')
51
  fig.add_trace(boxplot.data[0], row=i + 1, col=1)
52
 
53
  fig.update_layout(height=500 * len(numeric_columns), title_text="Boxplots for Numeric Columns")
 
56
  st.warning("No numeric columns available for visualization.")
57
 
58
  # Visualize Categorical Data
59
+ categorical_columns = df.select_dtypes(include=['object', 'category']).columns
60
  if len(categorical_columns) > 0:
61
  st.subheader("Bar Plots for Categorical Columns:")
62
  selected_cat_col = st.selectbox("Select a Categorical Column", categorical_columns)
63
 
64
  st.write(f"Value Counts for '{selected_cat_col}':")
65
+ st.write(df[selected_cat_col].value_counts())
66
 
67
+ fig = px.bar(df, x=selected_cat_col, title=f'Bar Plot of {selected_cat_col}', color=selected_cat_col)
68
  st.plotly_chart(fig)
69
  else:
70
  st.warning("No categorical columns available for visualization.")
 
72
  # Correlation Matrix for Numeric Columns
73
  if len(numeric_columns) > 1:
74
  st.subheader("Correlation Matrix:")
75
+ corr_matrix = df[numeric_columns].corr()
76
  fig = px.imshow(corr_matrix, title="Correlation Matrix", color_continuous_scale='coolwarm')
77
  st.plotly_chart(fig)
78
 
79
  st.subheader("Cleaned Dataset:")
80
+ cleaned_data = df.drop_duplicates()
81
  st.write(cleaned_data)
82
 
83
  cleaned_csv = cleaned_data.to_csv(index=False).encode('utf-8')
 
119
  }}
120
  /* Styling the content to ensure text visibility */
121
  .stMarkdown {{
122
+ color: white; /* White text to ensure visibility */
123
+ font-size: 100px; /* Adjust font size for better readability */
124
  }}
125
  </style>
126
  """,
127
  unsafe_allow_html=True
128
+ )