Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
|
@@ -1,162 +1,108 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
-
import plotly.express as px
|
| 3 |
import pandas as pd
|
| 4 |
-
import
|
| 5 |
-
import
|
| 6 |
-
|
| 7 |
-
warnings.filterwarnings('ignore')
|
| 8 |
-
|
| 9 |
-
st.set_page_config(page_title="Superstore!!!", page_icon=":bar_chart:", layout="wide")
|
| 10 |
-
|
| 11 |
-
st.title(" :bar_chart: Sample SuperStore EDA")
|
| 12 |
-
st.markdown('<style>div.block-container{padding-top:1rem;}</style>', unsafe_allow_html=True)
|
| 13 |
-
|
| 14 |
-
fl = st.file_uploader(":file_folder: Upload a file", type=(["csv", "txt", "xlsx", "xls"]))
|
| 15 |
-
if fl is not None:
|
| 16 |
-
filename = fl.name
|
| 17 |
-
st.write(filename)
|
| 18 |
-
df = pd.read_csv(filename, encoding="ISO-8859-1", sep='\t')
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
col1, col2 = st.columns((2))
|
| 22 |
-
df["Order Date"] = pd.to_datetime(df["Order Date"])
|
| 23 |
-
|
| 24 |
-
# Getting the min and max date
|
| 25 |
-
startDate = pd.to_datetime(df["Order Date"]).min()
|
| 26 |
-
endDate = pd.to_datetime(df["Order Date"]).max()
|
| 27 |
-
|
| 28 |
-
with col1:
|
| 29 |
-
date1 = pd.to_datetime(st.date_input("Start Date", startDate))
|
| 30 |
-
|
| 31 |
-
with col2:
|
| 32 |
-
date2 = pd.to_datetime(st.date_input("End Date", endDate))
|
| 33 |
-
|
| 34 |
-
df = df[(df["Order Date"] >= date1) & (df["Order Date"] <= date2)].copy()
|
| 35 |
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
-
# Create for State
|
| 45 |
-
state = st.sidebar.multiselect("Pick the State", df2["State"].unique())
|
| 46 |
-
if not state:
|
| 47 |
-
df3 = df2.copy()
|
| 48 |
-
else:
|
| 49 |
-
df3 = df2[df2["State"].isin(state)]
|
| 50 |
-
|
| 51 |
-
# Create for City
|
| 52 |
-
city = st.sidebar.multiselect("Pick the City", df3["City"].unique())
|
| 53 |
-
|
| 54 |
-
# Filter the data based on Region, State and City
|
| 55 |
-
|
| 56 |
-
if not region and not state and not city:
|
| 57 |
-
filtered_df = df
|
| 58 |
-
elif not state and not city:
|
| 59 |
-
filtered_df = df[df["Region"].isin(region)]
|
| 60 |
-
elif not region and not city:
|
| 61 |
-
filtered_df = df[df["State"].isin(state)]
|
| 62 |
-
elif state and city:
|
| 63 |
-
filtered_df = df3[df["State"].isin(state) & df3["City"].isin(city)]
|
| 64 |
-
elif region and city:
|
| 65 |
-
filtered_df = df3[df["Region"].isin(region) & df3["City"].isin(city)]
|
| 66 |
-
elif region and state:
|
| 67 |
-
filtered_df = df3[df["Region"].isin(region) & df3["State"].isin(state)]
|
| 68 |
-
elif city:
|
| 69 |
-
filtered_df = df3[df3["City"].isin(city)]
|
| 70 |
else:
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
category_df = filtered_df.groupby(by=["Category"], as_index=False)["Sales"].sum()
|
| 74 |
-
|
| 75 |
-
with col1:
|
| 76 |
-
st.subheader("Category wise Sales")
|
| 77 |
-
fig = px.bar(category_df, x="Category", y="Sales", text=['${:,.2f}'.format(x) for x in category_df["Sales"]],
|
| 78 |
-
template="seaborn")
|
| 79 |
-
st.plotly_chart(fig, use_container_width=True, height=200)
|
| 80 |
-
|
| 81 |
-
with col2:
|
| 82 |
-
st.subheader("Region wise Sales")
|
| 83 |
-
fig = px.pie(filtered_df, values="Sales", names="Region", hole=0.5)
|
| 84 |
-
fig.update_traces(text=filtered_df["Region"], textposition="outside")
|
| 85 |
-
st.plotly_chart(fig, use_container_width=True)
|
| 86 |
-
|
| 87 |
-
cl1, cl2 = st.columns((2))
|
| 88 |
-
with cl1:
|
| 89 |
-
with st.expander("Category_ViewData"):
|
| 90 |
-
st.write(category_df.style.background_gradient(cmap="Blues"))
|
| 91 |
-
csv = category_df.to_csv(index=False).encode('utf-8')
|
| 92 |
-
st.download_button("Download Data", data=csv, file_name="Category.csv", mime="text/csv",
|
| 93 |
-
help='Click here to download the data as a CSV file')
|
| 94 |
-
|
| 95 |
-
with cl2:
|
| 96 |
-
with st.expander("Region_ViewData"):
|
| 97 |
-
region = filtered_df.groupby(by="Region", as_index=False)["Sales"].sum()
|
| 98 |
-
st.write(region.style.background_gradient(cmap="Oranges"))
|
| 99 |
-
csv = region.to_csv(index=False).encode('utf-8')
|
| 100 |
-
st.download_button("Download Data", data=csv, file_name="Region.csv", mime="text/csv",
|
| 101 |
-
help='Click here to download the data as a CSV file')
|
| 102 |
-
|
| 103 |
-
filtered_df["month_year"] = filtered_df["Order Date"].dt.to_period("M")
|
| 104 |
-
st.subheader('Time Series Analysis')
|
| 105 |
-
|
| 106 |
-
linechart = pd.DataFrame(
|
| 107 |
-
filtered_df.groupby(filtered_df["month_year"].dt.strftime("%Y : %b"))["Sales"].sum()).reset_index()
|
| 108 |
-
fig2 = px.line(linechart, x="month_year", y="Sales", labels={"Sales": "Amount"}, height=500, width=1000,
|
| 109 |
-
template="gridon")
|
| 110 |
-
st.plotly_chart(fig2, use_container_width=True)
|
| 111 |
-
|
| 112 |
-
with st.expander("View Data of TimeSeries:"):
|
| 113 |
-
st.write(linechart.T.style.background_gradient(cmap="Blues"))
|
| 114 |
-
csv = linechart.to_csv(index=False).encode("utf-8")
|
| 115 |
-
st.download_button('Download Data', data=csv, file_name="TimeSeries.csv", mime='text/csv')
|
| 116 |
-
|
| 117 |
-
# Create a treem based on Region, category, sub-Category
|
| 118 |
-
st.subheader("Hierarchical view of Sales using TreeMap")
|
| 119 |
-
fig3 = px.treemap(filtered_df, path=["Region", "Category", "Sub-Category"], values="Sales", hover_data=["Sales"],
|
| 120 |
-
color="Sub-Category")
|
| 121 |
-
fig3.update_layout(width=800, height=650)
|
| 122 |
-
st.plotly_chart(fig3, use_container_width=True)
|
| 123 |
-
|
| 124 |
-
chart1, chart2 = st.columns((2))
|
| 125 |
-
with chart1:
|
| 126 |
-
st.subheader('Segment wise Sales')
|
| 127 |
-
fig = px.pie(filtered_df, values="Sales", names="Segment", template="plotly_dark")
|
| 128 |
-
fig.update_traces(text=filtered_df["Segment"], textposition="inside")
|
| 129 |
-
st.plotly_chart(fig, use_container_width=True)
|
| 130 |
-
|
| 131 |
-
with chart2:
|
| 132 |
-
st.subheader('Category wise Sales')
|
| 133 |
-
fig = px.pie(filtered_df, values="Sales", names="Category", template="gridon")
|
| 134 |
-
fig.update_traces(text=filtered_df["Category"], textposition="inside")
|
| 135 |
-
st.plotly_chart(fig, use_container_width=True)
|
| 136 |
-
|
| 137 |
-
import plotly.figure_factory as ff
|
| 138 |
-
|
| 139 |
-
st.subheader(":point_right: Month wise Sub-Category Sales Summary")
|
| 140 |
-
with st.expander("Summary_Table"):
|
| 141 |
-
df_sample = df[0:5][["Region", "State", "City", "Category", "Sales", "Profit", "Quantity"]]
|
| 142 |
-
fig = ff.create_table(df_sample, colorscale="Cividis")
|
| 143 |
-
st.plotly_chart(fig, use_container_width=True)
|
| 144 |
-
|
| 145 |
-
st.markdown("Month wise sub-Category Table")
|
| 146 |
-
filtered_df["month"] = filtered_df["Order Date"].dt.month_name()
|
| 147 |
-
sub_category_Year = pd.pivot_table(data=filtered_df, values="Sales", index=["Sub-Category"], columns="month")
|
| 148 |
-
st.write(sub_category_Year.style.background_gradient(cmap="Blues"))
|
| 149 |
-
|
| 150 |
-
# Create a scatter plot
|
| 151 |
-
data1 = px.scatter(filtered_df, x="Sales", y="Profit", size="Quantity")
|
| 152 |
-
data1['layout'].update(title="Relationship between Sales and Profits using Scatter Plot.",
|
| 153 |
-
titlefont=dict(size=20), xaxis=dict(title="Sales", titlefont=dict(size=19)),
|
| 154 |
-
yaxis=dict(title="Profit", titlefont=dict(size=19)))
|
| 155 |
-
st.plotly_chart(data1, use_container_width=True)
|
| 156 |
-
|
| 157 |
-
with st.expander("View Data"):
|
| 158 |
-
st.write(filtered_df.iloc[:500, 1:20:2].style.background_gradient(cmap="Oranges"))
|
| 159 |
-
|
| 160 |
-
# Download orginal DataSet
|
| 161 |
-
csv = df.to_csv(index=False).encode('utf-8')
|
| 162 |
-
st.download_button('Download Data', data=csv, file_name="Data.csv", mime="text/csv")
|
|
|
|
| 1 |
import streamlit as st
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
+
import matplotlib.pyplot as plt
|
| 4 |
+
import seaborn as sns
|
| 5 |
+
import plotly.express as px
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
+
# Set page title
|
| 8 |
+
st.set_page_config(page_title="Interactive BI Dashboard for Excel", layout="wide")
|
| 9 |
+
st.title("Interactive Business Intelligence Dashboard for Excel")
|
| 10 |
+
|
| 11 |
+
# Step 1: Upload Dataset
|
| 12 |
+
st.sidebar.header('Upload your Excel file')
|
| 13 |
+
uploaded_file = st.sidebar.file_uploader("Choose an Excel file", type=["xlsx", "xls"])
|
| 14 |
+
|
| 15 |
+
# Step 2: Data Processing
|
| 16 |
+
if uploaded_file is not None:
|
| 17 |
+
# Load the data from the Excel file
|
| 18 |
+
xls = pd.ExcelFile(uploaded_file)
|
| 19 |
+
|
| 20 |
+
# Show sheet names for easy sheet selection
|
| 21 |
+
sheet_names = xls.sheet_names
|
| 22 |
+
selected_sheet = st.sidebar.selectbox("Select a Sheet", sheet_names)
|
| 23 |
+
|
| 24 |
+
# Read data from the selected sheet
|
| 25 |
+
data = pd.read_excel(uploaded_file, sheet_name=selected_sheet)
|
| 26 |
+
|
| 27 |
+
# Display dataset overview
|
| 28 |
+
st.write(f"### Dataset Overview - {selected_sheet}")
|
| 29 |
+
st.write(data.head()) # Show first few rows
|
| 30 |
+
|
| 31 |
+
# Display dataset statistics
|
| 32 |
+
st.write("### Dataset Statistics")
|
| 33 |
+
st.write(data.describe())
|
| 34 |
+
|
| 35 |
+
# Display column names for easy column selection
|
| 36 |
+
columns = data.columns.tolist()
|
| 37 |
+
|
| 38 |
+
# Step 3: Dynamic Data Exploration and Visualization
|
| 39 |
+
st.sidebar.header('Data Exploration')
|
| 40 |
+
|
| 41 |
+
# Select feature for analysis
|
| 42 |
+
column_name = st.sidebar.selectbox("Select a column to analyze", columns)
|
| 43 |
+
|
| 44 |
+
# Filter numerical data for correlation heatmap
|
| 45 |
+
numeric_columns = data.select_dtypes(include='number').columns.tolist()
|
| 46 |
+
correlation_column = st.sidebar.selectbox("Select column for Correlation Heatmap", numeric_columns)
|
| 47 |
+
|
| 48 |
+
# Sidebar options for visualizations
|
| 49 |
+
chart_type = st.sidebar.selectbox("Choose a Chart Type", ['Bar Chart', 'Pie Chart', 'Line Chart', 'Histogram'])
|
| 50 |
+
|
| 51 |
+
# Step 4: Interactive Plotting and Visualizations
|
| 52 |
+
if chart_type == 'Bar Chart':
|
| 53 |
+
st.write(f"### Bar Chart of {column_name}")
|
| 54 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
| 55 |
+
data[column_name].value_counts().plot(kind='bar', ax=ax, color='skyblue')
|
| 56 |
+
ax.set_title(f'Bar Chart of {column_name}')
|
| 57 |
+
st.pyplot(fig)
|
| 58 |
+
|
| 59 |
+
elif chart_type == 'Pie Chart':
|
| 60 |
+
st.write(f"### Pie Chart of {column_name}")
|
| 61 |
+
fig, ax = plt.subplots(figsize=(8, 8))
|
| 62 |
+
data[column_name].value_counts().plot(kind='pie', autopct='%1.1f%%', ax=ax,
|
| 63 |
+
colors=sns.color_palette("Set3", len(data[column_name].unique())))
|
| 64 |
+
ax.set_title(f'Pie Chart of {column_name}')
|
| 65 |
+
st.pyplot(fig)
|
| 66 |
+
|
| 67 |
+
elif chart_type == 'Line Chart':
|
| 68 |
+
if 'Date' in data.columns:
|
| 69 |
+
st.write("### Line Chart (Time Series Data)")
|
| 70 |
+
data['Date'] = pd.to_datetime(data['Date'])
|
| 71 |
+
data.set_index('Date', inplace=True)
|
| 72 |
+
st.line_chart(data[column_name])
|
| 73 |
+
else:
|
| 74 |
+
st.write("### Line Chart (No Date Column)")
|
| 75 |
+
st.warning(
|
| 76 |
+
"Your dataset doesn't contain a Date column. Please upload a dataset with a Date column for time-series analysis.")
|
| 77 |
+
|
| 78 |
+
elif chart_type == 'Histogram':
|
| 79 |
+
st.write(f"### Histogram of {column_name}")
|
| 80 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
| 81 |
+
sns.histplot(data[column_name], kde=True, color='orange', ax=ax)
|
| 82 |
+
ax.set_title(f'Histogram of {column_name}')
|
| 83 |
+
st.pyplot(fig)
|
| 84 |
+
|
| 85 |
+
# Step 5: Correlation Heatmap for Numeric Features
|
| 86 |
+
if st.sidebar.checkbox('Show Correlation Heatmap'):
|
| 87 |
+
st.write("### Correlation Heatmap")
|
| 88 |
+
numeric_data = data.select_dtypes(include='number')
|
| 89 |
+
corr_matrix = numeric_data.corr()
|
| 90 |
+
fig, ax = plt.subplots(figsize=(12, 8))
|
| 91 |
+
sns.heatmap(corr_matrix, annot=True, fmt='.2f', cmap='coolwarm', ax=ax, linewidths=0.5)
|
| 92 |
+
ax.set_title("Correlation Matrix of Numeric Features")
|
| 93 |
+
st.pyplot(fig)
|
| 94 |
+
|
| 95 |
+
# Step 6: Interactive Plot with Plotly (for better interactivity)
|
| 96 |
+
st.sidebar.header("Advanced Interactive Visualization with Plotly")
|
| 97 |
+
if st.sidebar.checkbox('Show Interactive Scatter Plot'):
|
| 98 |
+
st.write("### Interactive Scatter Plot")
|
| 99 |
+
if len(numeric_columns) >= 2:
|
| 100 |
+
x_axis = st.sidebar.selectbox("Select X-axis for Scatter Plot", numeric_columns)
|
| 101 |
+
y_axis = st.sidebar.selectbox("Select Y-axis for Scatter Plot", numeric_columns)
|
| 102 |
+
scatter_fig = px.scatter(data, x=x_axis, y=y_axis, title=f"Scatter Plot: {x_axis} vs {y_axis}")
|
| 103 |
+
st.plotly_chart(scatter_fig)
|
| 104 |
+
else:
|
| 105 |
+
st.warning("Not enough numeric columns to create a scatter plot.")
|
| 106 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
else:
|
| 108 |
+
st.write("Please upload an Excel file to get started.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|