akazmi commited on
Commit
22ac20e
·
verified ·
1 Parent(s): 1562dda

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +141 -123
app.py CHANGED
@@ -1,131 +1,149 @@
1
  import streamlit as st
2
  import pandas as pd
3
  import plotly.express as px
 
 
 
4
 
5
  # Title of the App
6
  st.title('Sales Data Visualization App')
7
 
8
- # Display Sample Data Format
9
- st.subheader('Sample Data Format:')
10
- st.write("""
11
- The data should be in the following format with the listed columns:
12
-
13
- | Customer Name | Date | City | Country | State | Product Name | Product Attribute 1 | Product Attribute 2 | Product Attribute 3 | Product Attribute 4 | Net Sales Value | Margin Amount | Cost |
14
- |----------------|------------|--------------|-------------|-------------|-----------------|---------------------|---------------------|---------------------|---------------------|------------------|---------------|-------|
15
- | John Doe | 2024-01-01 | New York | USA | NY | Product A | Attribute 1A | Attribute 2A | Attribute 3A | Attribute 4A | 1000 | 300 | 700 |
16
- | Jane Smith | 2024-01-02 | Los Angeles | USA | CA | Product B | Attribute 1B | Attribute 2B | Attribute 3B | Attribute 4B | 1500 | 400 | 1100 |
17
- | Bob Johnson | 2024-02-15 | Chicago | USA | IL | Product A | Attribute 1A | Attribute 2A | Attribute 3A | Attribute 4A | 1200 | 350 | 850 |
18
- | Alice Williams | 2024-03-10 | Miami | USA | FL | Product C | Attribute 1C | Attribute 2C | Attribute 3C | Attribute 4C | 2000 | 500 | 1500 |
19
- | Charlie Brown | 2024-04-05 | Houston | USA | TX | Product B | Attribute 1B | Attribute 2B | Attribute 3B | Attribute 4B | 1800 | 450 | 1350 |
20
-
21
- ### Description of Columns:
22
- - **Customer Name**: The name of the customer (e.g., John Doe).
23
- - **Date**: The date of the sale, formatted as `YYYY-MM-DD` (e.g., `2024-01-01`).
24
- - **City**: The city where the sale took place (e.g., New York).
25
- - **Country**: The country where the sale took place (e.g., USA).
26
- - **State**: The state where the sale took place (e.g., NY).
27
- - **Product Name**: The name of the product (e.g., Product A).
28
- - **Product Attribute 1**: Additional product attribute (e.g., Attribute 1A).
29
- - **Product Attribute 2**: Additional product attribute (e.g., Attribute 2A).
30
- - **Product Attribute 3**: Additional product attribute (e.g., Attribute 3A).
31
- - **Product Attribute 4**: Additional product attribute (e.g., Attribute 4A).
32
- - **Net Sales Value**: The net sales value for the transaction (e.g., `1000`).
33
- - **Margin Amount**: The margin for the transaction (e.g., `300`).
34
- - **Cost**: The cost of the product sold (e.g., `700`).
35
- """)
36
-
37
- # Upload File
38
- uploaded_file = st.file_uploader("Upload your Excel file", type=["xlsx"])
39
-
40
- if uploaded_file:
41
- # Load Data
42
- df = pd.read_excel(uploaded_file, sheet_name=0)
43
-
44
- # Display the first few rows of the dataframe to understand its structure
45
- st.write("Preview of the uploaded data:")
46
- st.dataframe(df.head())
47
-
48
- # Data Preprocessing
49
- # Convert 'Date' to datetime
50
- df['Date'] = pd.to_datetime(df['Date'])
51
-
52
- # Extract year and month for further analysis
53
- df['Year'] = df['Date'].dt.year
54
- df['Month'] = df['Date'].dt.month
55
-
56
- # Clean whitespace and ensure consistent case in 'Customer Name' and 'Product Name'
57
- df['Customer Name'] = df['Customer Name'].str.strip().str.lower()
58
- df['Product Name'] = df['Product Name'].str.strip().str.lower()
59
- df['City'] = df['City'].str.strip().str.lower()
60
- df['State'] = df['State'].str.strip().str.lower()
61
- df['Country'] = df['Country'].str.strip().str.lower()
62
-
63
- # Sidebar Filters
64
- st.sidebar.header("Filter Options")
65
-
66
- # Text Input for Customer and Product
67
- customer_query = st.sidebar.text_input('Enter Customer Name (partial or full):').strip().lower()
68
- product_query = st.sidebar.text_input('Enter Product Name (partial or full):').strip().lower()
69
- city_query = st.sidebar.text_input('Enter City (partial or full):').strip().lower()
70
- state_query = st.sidebar.text_input('Enter State (partial or full):').strip().lower()
71
- country_query = st.sidebar.text_input('Enter Country (partial or full):').strip().lower()
72
-
73
- # Date Range Selection
74
- start_date = st.sidebar.date_input('Start Date:', df['Date'].min())
75
- end_date = st.sidebar.date_input('End Date:', df['Date'].max())
76
-
77
- # Filter Data by Date Range
78
- filtered_df = df[
79
- (df['Date'] >= pd.to_datetime(start_date)) &
80
- (df['Date'] <= pd.to_datetime(end_date))
81
- ]
82
-
83
- # Filter Data by Customer Name
84
- if customer_query:
85
- filtered_df = filtered_df[filtered_df['Customer Name'].str.contains(customer_query, case=False, na=False)]
86
-
87
- # Filter Data by Product Name
88
- if product_query:
89
- filtered_df = filtered_df[filtered_df['Product Name'].str.contains(product_query, case=False, na=False)]
90
-
91
- # Filter Data by City
92
- if city_query:
93
- filtered_df = filtered_df[filtered_df['City'].str.contains(city_query, case=False, na=False)]
94
-
95
- # Filter Data by State
96
- if state_query:
97
- filtered_df = filtered_df[filtered_df['State'].str.contains(state_query, case=False, na=False)]
98
-
99
- # Filter Data by Country
100
- if country_query:
101
- filtered_df = filtered_df[filtered_df['Country'].str.contains(country_query, case=False, na=False)]
102
-
103
- # Display Filtered Data
104
- st.write(f"Filtered Data: {len(filtered_df)} records found.")
105
- st.dataframe(filtered_df)
106
-
107
- # Visualizations
108
- if not filtered_df.empty:
109
- st.subheader("Sales Trend Over Time")
110
- trend = filtered_df.groupby('Date')['Net Sales Value'].sum().reset_index()
111
- fig_trend = px.line(trend, x='Date', y='Net Sales Value', title='Sales Over Time')
112
- st.plotly_chart(fig_trend)
113
-
114
- st.subheader("Top 10 Products by Sales")
115
- top_products = filtered_df.groupby('Product Name')['Net Sales Value'].sum().nlargest(10).reset_index()
116
- fig_top_products = px.bar(top_products, x='Product Name', y='Net Sales Value', title='Top 10 Products')
117
- st.plotly_chart(fig_top_products)
118
-
119
- st.subheader("Sales Distribution by Region")
120
- region_sales = filtered_df.groupby('City')['Net Sales Value'].sum().reset_index()
121
- fig_region = px.pie(region_sales, names='City', values='Net Sales Value', title='Sales by City')
122
- st.plotly_chart(fig_region)
123
-
124
- st.subheader("Monthly Sales Heatmap")
125
- heatmap_data = filtered_df.pivot_table(values='Net Sales Value', index='Month', columns='Year', aggfunc='sum')
126
- fig_heatmap = px.imshow(heatmap_data, labels=dict(x="Year", y="Month", color="Sales"))
127
- st.plotly_chart(fig_heatmap)
128
- else:
129
- st.write("No data available for the selected filters.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  else:
131
- st.write("Upload a file to begin.")
 
1
  import streamlit as st
2
  import pandas as pd
3
  import plotly.express as px
4
+ import plotly.graph_objects as go
5
+ import random
6
+ from datetime import datetime, timedelta
7
 
8
  # Title of the App
9
  st.title('Sales Data Visualization App')
10
 
11
+ # Sample Data Generation (You can remove this part when uploading your own file)
12
+ def generate_sample_data():
13
+ customers = ['Customer A', 'Customer B', 'Customer C', 'Customer D']
14
+ products = ['Product 1', 'Product 2', 'Product 3', 'Product 4']
15
+ cities = ['City 1', 'City 2', 'City 3', 'City 4']
16
+ states = ['State 1', 'State 2', 'State 3', 'State 4']
17
+ countries = ['Country 1', 'Country 2', 'Country 3', 'Country 4']
18
+
19
+ # Generate sample sales data for 100 records
20
+ data = []
21
+ for i in range(100):
22
+ customer = random.choice(customers)
23
+ product = random.choice(products)
24
+ city = random.choice(cities)
25
+ state = random.choice(states)
26
+ country = random.choice(countries)
27
+ date = datetime.today() - timedelta(days=random.randint(1, 365))
28
+ nsv = random.randint(1000, 10000) # Net Sales Value
29
+ cost = random.randint(500, 7000) # Cost
30
+ data.append([customer, product, city, state, country, date, nsv, cost])
31
+
32
+ columns = ['Customer Name', 'Product Name', 'City', 'State', 'Country', 'Date', 'Net Sales Value', 'Cost']
33
+ return pd.DataFrame(data, columns=columns)
34
+
35
+ # Load sample data
36
+ df = generate_sample_data()
37
+
38
+ # Display the first few rows of the dataframe to understand its structure
39
+ st.write("Preview of the sample data:")
40
+ st.dataframe(df.head())
41
+
42
+ # Data Preprocessing
43
+ df['Date'] = pd.to_datetime(df['Date'])
44
+ df['Year'] = df['Date'].dt.year
45
+ df['Month'] = df['Date'].dt.month
46
+
47
+ # Sidebar Filters
48
+ st.sidebar.header("Filter Options")
49
+
50
+ # Text Input for Customer and Product
51
+ customer_query = st.sidebar.text_input('Enter Customer Name (partial or full):').strip().lower()
52
+ product_query = st.sidebar.text_input('Enter Product Name (partial or full):').strip().lower()
53
+ city_query = st.sidebar.text_input('Enter City (partial or full):').strip().lower()
54
+ state_query = st.sidebar.text_input('Enter State (partial or full):').strip().lower()
55
+ country_query = st.sidebar.text_input('Enter Country (partial or full):').strip().lower()
56
+
57
+ # Date Range Selection
58
+ start_date = st.sidebar.date_input('Start Date:', df['Date'].min())
59
+ end_date = st.sidebar.date_input('End Date:', df['Date'].max())
60
+
61
+ # Filter Data by Date Range
62
+ filtered_df = df[
63
+ (df['Date'] >= pd.to_datetime(start_date)) &
64
+ (df['Date'] <= pd.to_datetime(end_date))
65
+ ]
66
+
67
+ # Filter Data by Customer Name
68
+ if customer_query:
69
+ filtered_df = filtered_df[filtered_df['Customer Name'].str.contains(customer_query, case=False, na=False)]
70
+
71
+ # Filter Data by Product Name
72
+ if product_query:
73
+ filtered_df = filtered_df[filtered_df['Product Name'].str.contains(product_query, case=False, na=False)]
74
+
75
+ # Filter Data by City
76
+ if city_query:
77
+ filtered_df = filtered_df[filtered_df['City'].str.contains(city_query, case=False, na=False)]
78
+
79
+ # Filter Data by State
80
+ if state_query:
81
+ filtered_df = filtered_df[filtered_df['State'].str.contains(state_query, case=False, na=False)]
82
+
83
+ # Filter Data by Country
84
+ if country_query:
85
+ filtered_df = filtered_df[filtered_df['Country'].str.contains(country_query, case=False, na=False)]
86
+
87
+ # Display Filtered Data
88
+ st.write(f"Filtered Data: {len(filtered_df)} records found.")
89
+ st.dataframe(filtered_df)
90
+
91
+ if not filtered_df.empty:
92
+ # KPI Metrics
93
+ st.subheader("Key Financial Metrics")
94
+
95
+ # Profit for the Year (Calculated as Net Sales Value - Cost)
96
+ profit_for_the_year = filtered_df['Net Sales Value'] - filtered_df['Cost']
97
+ st.metric("Profit for the Year", f"${profit_for_the_year.sum():,.2f}")
98
+
99
+ # Gross Margin (Net Sales Value - Cost)
100
+ gross_margin = filtered_df['Net Sales Value'] - filtered_df['Cost']
101
+ st.metric("Gross Margin", f"${gross_margin.sum():,.2f}")
102
+
103
+ # Total Sales
104
+ total_sales = filtered_df['Net Sales Value'].sum()
105
+ st.metric("Total Sales", f"${total_sales:,.2f}")
106
+
107
+ # Matrix View (like Power BI Matrix)
108
+ st.subheader("Matrix View of Financial Data")
109
+ matrix_data = filtered_df.pivot_table(
110
+ values='Net Sales Value',
111
+ index=['Year', 'Customer Name'],
112
+ columns=['Product Name'],
113
+ aggfunc='sum',
114
+ fill_value=0
115
+ )
116
+ st.dataframe(matrix_data)
117
+
118
+ # Visualization 1: Sales by Customer (Bar chart)
119
+ st.subheader("Sales by Customer")
120
+ sales_by_customer = filtered_df.groupby('Customer Name')['Net Sales Value'].sum().reset_index()
121
+ fig_customer = px.bar(sales_by_customer, x='Customer Name', y='Net Sales Value', title='Sales by Customer')
122
+ st.plotly_chart(fig_customer)
123
+
124
+ # Visualization 2: Sales by Product (Bar chart)
125
+ st.subheader("Sales by Product")
126
+ sales_by_product = filtered_df.groupby('Product Name')['Net Sales Value'].sum().reset_index()
127
+ fig_product = px.bar(sales_by_product, x='Product Name', y='Net Sales Value', title='Sales by Product')
128
+ st.plotly_chart(fig_product)
129
+
130
+ # Visualization 3: Year-over-Year Sales Trend (Line chart)
131
+ st.subheader("Year-over-Year Sales Trend")
132
+ year_trend = filtered_df.groupby(['Year'])['Net Sales Value'].sum().reset_index()
133
+ fig_year_trend = px.line(year_trend, x='Year', y='Net Sales Value', title='Year-over-Year Sales Trend')
134
+ st.plotly_chart(fig_year_trend)
135
+
136
+ # Visualization 4: Profit and Loss Overview (Simple Table)
137
+ st.subheader("Profit and Loss Overview")
138
+ pnl_data = filtered_df[['Customer Name', 'Product Name', 'Net Sales Value', 'Cost', 'Net Sales Value - Cost']]
139
+ pnl_data.columns = ['Customer', 'Product', 'Sales', 'Cost', 'Profit']
140
+ st.dataframe(pnl_data)
141
+
142
+ # Visualization 5: Gross Margin Distribution (Pie chart)
143
+ st.subheader("Gross Margin Distribution")
144
+ margin_data = filtered_df.groupby('Product Name')['Net Sales Value'].sum().reset_index()
145
+ fig_margin = px.pie(margin_data, names='Product Name', values='Net Sales Value', title='Gross Margin by Product')
146
+ st.plotly_chart(fig_margin)
147
+
148
  else:
149
+ st.write("No data available for the selected filters.")