wweavishayaknin commited on
Commit
7cf7b1f
·
verified ·
1 Parent(s): 4ae9a20

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -157
app.py CHANGED
@@ -1,162 +1,108 @@
1
  import streamlit as st
2
- import plotly.express as px
3
  import pandas as pd
4
- import os
5
- import warnings
6
-
7
- warnings.filterwarnings('ignore')
8
-
9
- st.set_page_config(page_title="Superstore!!!", page_icon=":bar_chart:", layout="wide")
10
-
11
- st.title(" :bar_chart: Sample SuperStore EDA")
12
- st.markdown('<style>div.block-container{padding-top:1rem;}</style>', unsafe_allow_html=True)
13
-
14
- fl = st.file_uploader(":file_folder: Upload a file", type=(["csv", "txt", "xlsx", "xls"]))
15
- if fl is not None:
16
- filename = fl.name
17
- st.write(filename)
18
- df = pd.read_csv(filename, encoding="ISO-8859-1", sep='\t')
19
-
20
-
21
- col1, col2 = st.columns((2))
22
- df["Order Date"] = pd.to_datetime(df["Order Date"])
23
-
24
- # Getting the min and max date
25
- startDate = pd.to_datetime(df["Order Date"]).min()
26
- endDate = pd.to_datetime(df["Order Date"]).max()
27
-
28
- with col1:
29
- date1 = pd.to_datetime(st.date_input("Start Date", startDate))
30
-
31
- with col2:
32
- date2 = pd.to_datetime(st.date_input("End Date", endDate))
33
-
34
- df = df[(df["Order Date"] >= date1) & (df["Order Date"] <= date2)].copy()
35
 
36
- st.sidebar.header("Choose your filter: ")
37
- # Create for Region
38
- region = st.sidebar.multiselect("Pick your Region", df["Region"].unique())
39
- if not region:
40
- df2 = df.copy()
41
- else:
42
- df2 = df[df["Region"].isin(region)]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
- # Create for State
45
- state = st.sidebar.multiselect("Pick the State", df2["State"].unique())
46
- if not state:
47
- df3 = df2.copy()
48
- else:
49
- df3 = df2[df2["State"].isin(state)]
50
-
51
- # Create for City
52
- city = st.sidebar.multiselect("Pick the City", df3["City"].unique())
53
-
54
- # Filter the data based on Region, State and City
55
-
56
- if not region and not state and not city:
57
- filtered_df = df
58
- elif not state and not city:
59
- filtered_df = df[df["Region"].isin(region)]
60
- elif not region and not city:
61
- filtered_df = df[df["State"].isin(state)]
62
- elif state and city:
63
- filtered_df = df3[df["State"].isin(state) & df3["City"].isin(city)]
64
- elif region and city:
65
- filtered_df = df3[df["Region"].isin(region) & df3["City"].isin(city)]
66
- elif region and state:
67
- filtered_df = df3[df["Region"].isin(region) & df3["State"].isin(state)]
68
- elif city:
69
- filtered_df = df3[df3["City"].isin(city)]
70
  else:
71
- filtered_df = df3[df3["Region"].isin(region) & df3["State"].isin(state) & df3["City"].isin(city)]
72
-
73
- category_df = filtered_df.groupby(by=["Category"], as_index=False)["Sales"].sum()
74
-
75
- with col1:
76
- st.subheader("Category wise Sales")
77
- fig = px.bar(category_df, x="Category", y="Sales", text=['${:,.2f}'.format(x) for x in category_df["Sales"]],
78
- template="seaborn")
79
- st.plotly_chart(fig, use_container_width=True, height=200)
80
-
81
- with col2:
82
- st.subheader("Region wise Sales")
83
- fig = px.pie(filtered_df, values="Sales", names="Region", hole=0.5)
84
- fig.update_traces(text=filtered_df["Region"], textposition="outside")
85
- st.plotly_chart(fig, use_container_width=True)
86
-
87
- cl1, cl2 = st.columns((2))
88
- with cl1:
89
- with st.expander("Category_ViewData"):
90
- st.write(category_df.style.background_gradient(cmap="Blues"))
91
- csv = category_df.to_csv(index=False).encode('utf-8')
92
- st.download_button("Download Data", data=csv, file_name="Category.csv", mime="text/csv",
93
- help='Click here to download the data as a CSV file')
94
-
95
- with cl2:
96
- with st.expander("Region_ViewData"):
97
- region = filtered_df.groupby(by="Region", as_index=False)["Sales"].sum()
98
- st.write(region.style.background_gradient(cmap="Oranges"))
99
- csv = region.to_csv(index=False).encode('utf-8')
100
- st.download_button("Download Data", data=csv, file_name="Region.csv", mime="text/csv",
101
- help='Click here to download the data as a CSV file')
102
-
103
- filtered_df["month_year"] = filtered_df["Order Date"].dt.to_period("M")
104
- st.subheader('Time Series Analysis')
105
-
106
- linechart = pd.DataFrame(
107
- filtered_df.groupby(filtered_df["month_year"].dt.strftime("%Y : %b"))["Sales"].sum()).reset_index()
108
- fig2 = px.line(linechart, x="month_year", y="Sales", labels={"Sales": "Amount"}, height=500, width=1000,
109
- template="gridon")
110
- st.plotly_chart(fig2, use_container_width=True)
111
-
112
- with st.expander("View Data of TimeSeries:"):
113
- st.write(linechart.T.style.background_gradient(cmap="Blues"))
114
- csv = linechart.to_csv(index=False).encode("utf-8")
115
- st.download_button('Download Data', data=csv, file_name="TimeSeries.csv", mime='text/csv')
116
-
117
- # Create a treem based on Region, category, sub-Category
118
- st.subheader("Hierarchical view of Sales using TreeMap")
119
- fig3 = px.treemap(filtered_df, path=["Region", "Category", "Sub-Category"], values="Sales", hover_data=["Sales"],
120
- color="Sub-Category")
121
- fig3.update_layout(width=800, height=650)
122
- st.plotly_chart(fig3, use_container_width=True)
123
-
124
- chart1, chart2 = st.columns((2))
125
- with chart1:
126
- st.subheader('Segment wise Sales')
127
- fig = px.pie(filtered_df, values="Sales", names="Segment", template="plotly_dark")
128
- fig.update_traces(text=filtered_df["Segment"], textposition="inside")
129
- st.plotly_chart(fig, use_container_width=True)
130
-
131
- with chart2:
132
- st.subheader('Category wise Sales')
133
- fig = px.pie(filtered_df, values="Sales", names="Category", template="gridon")
134
- fig.update_traces(text=filtered_df["Category"], textposition="inside")
135
- st.plotly_chart(fig, use_container_width=True)
136
-
137
- import plotly.figure_factory as ff
138
-
139
- st.subheader(":point_right: Month wise Sub-Category Sales Summary")
140
- with st.expander("Summary_Table"):
141
- df_sample = df[0:5][["Region", "State", "City", "Category", "Sales", "Profit", "Quantity"]]
142
- fig = ff.create_table(df_sample, colorscale="Cividis")
143
- st.plotly_chart(fig, use_container_width=True)
144
-
145
- st.markdown("Month wise sub-Category Table")
146
- filtered_df["month"] = filtered_df["Order Date"].dt.month_name()
147
- sub_category_Year = pd.pivot_table(data=filtered_df, values="Sales", index=["Sub-Category"], columns="month")
148
- st.write(sub_category_Year.style.background_gradient(cmap="Blues"))
149
-
150
- # Create a scatter plot
151
- data1 = px.scatter(filtered_df, x="Sales", y="Profit", size="Quantity")
152
- data1['layout'].update(title="Relationship between Sales and Profits using Scatter Plot.",
153
- titlefont=dict(size=20), xaxis=dict(title="Sales", titlefont=dict(size=19)),
154
- yaxis=dict(title="Profit", titlefont=dict(size=19)))
155
- st.plotly_chart(data1, use_container_width=True)
156
-
157
- with st.expander("View Data"):
158
- st.write(filtered_df.iloc[:500, 1:20:2].style.background_gradient(cmap="Oranges"))
159
-
160
- # Download orginal DataSet
161
- csv = df.to_csv(index=False).encode('utf-8')
162
- st.download_button('Download Data', data=csv, file_name="Data.csv", mime="text/csv")
 
1
  import streamlit as st
 
2
  import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+ import seaborn as sns
5
+ import plotly.express as px
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
+ # Set page title
8
+ st.set_page_config(page_title="Interactive BI Dashboard for Excel", layout="wide")
9
+ st.title("Interactive Business Intelligence Dashboard for Excel")
10
+
11
+ # Step 1: Upload Dataset
12
+ st.sidebar.header('Upload your Excel file')
13
+ uploaded_file = st.sidebar.file_uploader("Choose an Excel file", type=["xlsx", "xls"])
14
+
15
+ # Step 2: Data Processing
16
+ if uploaded_file is not None:
17
+ # Load the data from the Excel file
18
+ xls = pd.ExcelFile(uploaded_file)
19
+
20
+ # Show sheet names for easy sheet selection
21
+ sheet_names = xls.sheet_names
22
+ selected_sheet = st.sidebar.selectbox("Select a Sheet", sheet_names)
23
+
24
+ # Read data from the selected sheet
25
+ data = pd.read_excel(uploaded_file, sheet_name=selected_sheet)
26
+
27
+ # Display dataset overview
28
+ st.write(f"### Dataset Overview - {selected_sheet}")
29
+ st.write(data.head()) # Show first few rows
30
+
31
+ # Display dataset statistics
32
+ st.write("### Dataset Statistics")
33
+ st.write(data.describe())
34
+
35
+ # Display column names for easy column selection
36
+ columns = data.columns.tolist()
37
+
38
+ # Step 3: Dynamic Data Exploration and Visualization
39
+ st.sidebar.header('Data Exploration')
40
+
41
+ # Select feature for analysis
42
+ column_name = st.sidebar.selectbox("Select a column to analyze", columns)
43
+
44
+ # Filter numerical data for correlation heatmap
45
+ numeric_columns = data.select_dtypes(include='number').columns.tolist()
46
+ correlation_column = st.sidebar.selectbox("Select column for Correlation Heatmap", numeric_columns)
47
+
48
+ # Sidebar options for visualizations
49
+ chart_type = st.sidebar.selectbox("Choose a Chart Type", ['Bar Chart', 'Pie Chart', 'Line Chart', 'Histogram'])
50
+
51
+ # Step 4: Interactive Plotting and Visualizations
52
+ if chart_type == 'Bar Chart':
53
+ st.write(f"### Bar Chart of {column_name}")
54
+ fig, ax = plt.subplots(figsize=(10, 6))
55
+ data[column_name].value_counts().plot(kind='bar', ax=ax, color='skyblue')
56
+ ax.set_title(f'Bar Chart of {column_name}')
57
+ st.pyplot(fig)
58
+
59
+ elif chart_type == 'Pie Chart':
60
+ st.write(f"### Pie Chart of {column_name}")
61
+ fig, ax = plt.subplots(figsize=(8, 8))
62
+ data[column_name].value_counts().plot(kind='pie', autopct='%1.1f%%', ax=ax,
63
+ colors=sns.color_palette("Set3", len(data[column_name].unique())))
64
+ ax.set_title(f'Pie Chart of {column_name}')
65
+ st.pyplot(fig)
66
+
67
+ elif chart_type == 'Line Chart':
68
+ if 'Date' in data.columns:
69
+ st.write("### Line Chart (Time Series Data)")
70
+ data['Date'] = pd.to_datetime(data['Date'])
71
+ data.set_index('Date', inplace=True)
72
+ st.line_chart(data[column_name])
73
+ else:
74
+ st.write("### Line Chart (No Date Column)")
75
+ st.warning(
76
+ "Your dataset doesn't contain a Date column. Please upload a dataset with a Date column for time-series analysis.")
77
+
78
+ elif chart_type == 'Histogram':
79
+ st.write(f"### Histogram of {column_name}")
80
+ fig, ax = plt.subplots(figsize=(10, 6))
81
+ sns.histplot(data[column_name], kde=True, color='orange', ax=ax)
82
+ ax.set_title(f'Histogram of {column_name}')
83
+ st.pyplot(fig)
84
+
85
+ # Step 5: Correlation Heatmap for Numeric Features
86
+ if st.sidebar.checkbox('Show Correlation Heatmap'):
87
+ st.write("### Correlation Heatmap")
88
+ numeric_data = data.select_dtypes(include='number')
89
+ corr_matrix = numeric_data.corr()
90
+ fig, ax = plt.subplots(figsize=(12, 8))
91
+ sns.heatmap(corr_matrix, annot=True, fmt='.2f', cmap='coolwarm', ax=ax, linewidths=0.5)
92
+ ax.set_title("Correlation Matrix of Numeric Features")
93
+ st.pyplot(fig)
94
+
95
+ # Step 6: Interactive Plot with Plotly (for better interactivity)
96
+ st.sidebar.header("Advanced Interactive Visualization with Plotly")
97
+ if st.sidebar.checkbox('Show Interactive Scatter Plot'):
98
+ st.write("### Interactive Scatter Plot")
99
+ if len(numeric_columns) >= 2:
100
+ x_axis = st.sidebar.selectbox("Select X-axis for Scatter Plot", numeric_columns)
101
+ y_axis = st.sidebar.selectbox("Select Y-axis for Scatter Plot", numeric_columns)
102
+ scatter_fig = px.scatter(data, x=x_axis, y=y_axis, title=f"Scatter Plot: {x_axis} vs {y_axis}")
103
+ st.plotly_chart(scatter_fig)
104
+ else:
105
+ st.warning("Not enough numeric columns to create a scatter plot.")
106
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  else:
108
+ st.write("Please upload an Excel file to get started.")