akazmi's picture
Update app.py
887e6fa verified
raw
history blame
6.46 kB
import streamlit as st
import pandas as pd
import plotly.express as px
# Title of the App
st.title('Sales Data Visualization App')
# Display Sample Data Format
st.subheader('Sample Data Format:')
st.write("""
The data should be in the following format with the listed columns:
| Customer Name | Date | City | Country | State | Product Name | Product Attribute 1 | Product Attribute 2 | Product Attribute 3 | Product Attribute 4 | Net Sales Value | Margin Amount | Cost |
|----------------|------------|--------------|-------------|-------------|-----------------|---------------------|---------------------|---------------------|---------------------|------------------|---------------|-------|
| John Doe | 2024-01-01 | New York | USA | NY | Product A | Attribute 1A | Attribute 2A | Attribute 3A | Attribute 4A | 1000 | 300 | 700 |
| Jane Smith | 2024-01-02 | Los Angeles | USA | CA | Product B | Attribute 1B | Attribute 2B | Attribute 3B | Attribute 4B | 1500 | 400 | 1100 |
| Bob Johnson | 2024-02-15 | Chicago | USA | IL | Product A | Attribute 1A | Attribute 2A | Attribute 3A | Attribute 4A | 1200 | 350 | 850 |
| Alice Williams | 2024-03-10 | Miami | USA | FL | Product C | Attribute 1C | Attribute 2C | Attribute 3C | Attribute 4C | 2000 | 500 | 1500 |
| Charlie Brown | 2024-04-05 | Houston | USA | TX | Product B | Attribute 1B | Attribute 2B | Attribute 3B | Attribute 4B | 1800 | 450 | 1350 |
### Description of Columns:
- **Customer Name**: The name of the customer (e.g., John Doe).
- **Date**: The date of the sale, formatted as `YYYY-MM-DD` (e.g., `2024-01-01`).
- **City**: The city where the sale took place (e.g., New York).
- **Country**: The country where the sale took place (e.g., USA).
- **State**: The state where the sale took place (e.g., NY).
- **Product Name**: The name of the product (e.g., Product A).
- **Product Attribute 1**: Additional product attribute (e.g., Attribute 1A).
- **Product Attribute 2**: Additional product attribute (e.g., Attribute 2A).
- **Product Attribute 3**: Additional product attribute (e.g., Attribute 3A).
- **Product Attribute 4**: Additional product attribute (e.g., Attribute 4A).
- **Net Sales Value**: The net sales value for the transaction (e.g., `1000`).
- **Margin Amount**: The margin for the transaction (e.g., `300`).
- **Cost**: The cost of the product sold (e.g., `700`).
""")
# Upload File
uploaded_file = st.file_uploader("Upload your Excel file", type=["xlsx"])
if uploaded_file:
# Load Data
df = pd.read_excel(uploaded_file, sheet_name=0)
# Display the first few rows of the dataframe to understand its structure
st.write("Preview of the uploaded data:")
st.dataframe(df.head())
# Data Preprocessing
# Convert 'Date' to datetime
df['Date'] = pd.to_datetime(df['Date'])
# Extract year and month for further analysis
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
# Clean whitespace and ensure consistent case in 'Customer Name' and 'Product Name'
df['Customer Name'] = df['Customer Name'].str.strip().str.lower()
df['Product Name'] = df['Product Name'].str.strip().str.lower()
# Sidebar Filters
st.sidebar.header("Filter Options")
# Text Input for Customer and Product
customer_query = st.sidebar.text_input('Enter Customer Name (partial or full):').strip().lower()
product_query = st.sidebar.text_input('Enter Product Name (partial or full):').strip().lower()
# Date Range Selection
start_date = st.sidebar.date_input('Start Date:', df['Date'].min())
end_date = st.sidebar.date_input('End Date:', df['Date'].max())
# Filter Data by Date Range
filtered_df = df[
(df['Date'] >= pd.to_datetime(start_date)) &
(df['Date'] <= pd.to_datetime(end_date))
]
# Filter Data by Customer Name
if customer_query:
filtered_df = filtered_df[filtered_df['Customer Name'].str.contains(customer_query, case=False, na=False)]
# Filter Data by Product Name
if product_query:
filtered_df = filtered_df[filtered_df['Product Name'].str.contains(product_query, case=False, na=False)]
# Display Filtered Data
st.write(f"Filtered Data: {len(filtered_df)} records found.")
st.dataframe(filtered_df)
# Key Metrics: High Margin Product and Highest Sales Month
if not filtered_df.empty:
st.subheader("Key Metrics")
high_margin_product = filtered_df.loc[filtered_df['Margin Amount'].idxmax(), 'Product Name']
highest_sales_month = filtered_df.loc[filtered_df['Net Sales Value'].idxmax(), 'Month']
st.metric("High Margin Product", high_margin_product)
st.metric("Highest Sales Month", highest_sales_month)
# Visualizations
st.subheader("Sales Trend Over Time")
trend = filtered_df.groupby('Date')['Net Sales Value'].sum().reset_index()
fig_trend = px.line(trend, x='Date', y='Net Sales Value', title='Sales Over Time')
st.plotly_chart(fig_trend)
st.subheader("Top 10 Products by Sales")
top_products = filtered_df.groupby('Product Name')['Net Sales Value'].sum().nlargest(10).reset_index()
fig_top_products = px.bar(top_products, x='Product Name', y='Net Sales Value', title='Top 10 Products')
st.plotly_chart(fig_top_products)
st.subheader("Sales Distribution by Region")
region_sales = filtered_df.groupby('City')['Net Sales Value'].sum().reset_index()
fig_region = px.pie(region_sales, names='City', values='Net Sales Value', title='Sales by City')
st.plotly_chart(fig_region)
st.subheader("Monthly Sales Heatmap")
heatmap_data = filtered_df.pivot_table(values='Net Sales Value', index='Month', columns='Year', aggfunc='sum')
fig_heatmap = px.imshow(heatmap_data, labels=dict(x="Year", y="Month", color="Sales"))
st.plotly_chart(fig_heatmap)
else:
st.write("No data available for the selected filters.")
else:
st.write("Upload a file to begin.")