akazmi commited on
Commit
9c11cee
·
verified ·
1 Parent(s): 369491d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -22
app.py CHANGED
@@ -12,39 +12,64 @@ if uploaded_file:
12
  # Load Data
13
  df = pd.read_excel(uploaded_file, sheet_name=0)
14
 
 
 
 
 
15
  # Data Preprocessing
16
- df['INVOICE_DATE'] = pd.to_datetime(df['INVOICE_DATE'])
17
- df['YEAR'] = df['INVOICE_DATE'].dt.year
18
- df['MONTH'] = df['INVOICE_DATE'].dt.month
 
 
 
19
 
20
- # Clean whitespace and ensure consistent case in 'CUSTOMER_NAME' and 'DESCRIPTION'
21
- df['CUSTOMER_NAME'] = df['CUSTOMER_NAME'].str.strip().str.lower()
22
- df['DESCRIPTION'] = df['DESCRIPTION'].str.strip().str.lower()
 
 
 
23
 
24
  # Sidebar Filters
25
  st.sidebar.header("Filter Options")
26
 
27
- # Text Input for Customer and Product
28
  customer_query = st.sidebar.text_input('Enter Customer Name (partial or full):').strip().lower()
29
  product_query = st.sidebar.text_input('Enter Product Name (partial or full):').strip().lower()
 
 
 
30
 
31
  # Date Range Selection
32
- start_date = st.sidebar.date_input('Start Date:', df['INVOICE_DATE'].min())
33
- end_date = st.sidebar.date_input('End Date:', df['INVOICE_DATE'].max())
34
 
35
  # Filter Data by Date Range
36
  filtered_df = df[
37
- (df['INVOICE_DATE'] >= pd.to_datetime(start_date)) &
38
- (df['INVOICE_DATE'] <= pd.to_datetime(end_date))
39
  ]
40
 
41
  # Filter Data by Customer Name
42
  if customer_query:
43
- filtered_df = filtered_df[filtered_df['CUSTOMER_NAME'].str.contains(customer_query, case=False, na=False)]
44
 
45
  # Filter Data by Product Name
46
  if product_query:
47
- filtered_df = filtered_df[filtered_df['DESCRIPTION'].str.contains(product_query, case=False, na=False)]
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  # Display Filtered Data
50
  st.write(f"Filtered Data: {len(filtered_df)} records found.")
@@ -53,24 +78,34 @@ if uploaded_file:
53
  # Visualizations
54
  if not filtered_df.empty:
55
  st.subheader("Sales Trend Over Time")
56
- trend = filtered_df.groupby('INVOICE_DATE')['NSV'].sum().reset_index()
57
- fig_trend = px.line(trend, x='INVOICE_DATE', y='NSV', title='Sales Over Time')
58
  st.plotly_chart(fig_trend)
59
 
60
  st.subheader("Top 10 Products by Sales")
61
- top_products = filtered_df.groupby('DESCRIPTION')['NSV'].sum().nlargest(10).reset_index()
62
- fig_top_products = px.bar(top_products, x='DESCRIPTION', y='NSV', title='Top 10 Products')
63
  st.plotly_chart(fig_top_products)
64
 
65
- st.subheader("Sales Distribution by Region")
66
- region_sales = filtered_df.groupby('REIGON')['NSV'].sum().reset_index()
67
- fig_region = px.pie(region_sales, names='REIGON', values='NSV', title='Sales by Region')
68
- st.plotly_chart(fig_region)
 
 
 
 
 
69
 
70
  st.subheader("Monthly Sales Heatmap")
71
- heatmap_data = filtered_df.pivot_table(values='NSV', index='MONTH', columns='YEAR', aggfunc='sum')
72
  fig_heatmap = px.imshow(heatmap_data, labels=dict(x="Year", y="Month", color="Sales"))
73
  st.plotly_chart(fig_heatmap)
 
 
 
 
 
74
  else:
75
  st.write("No data available for the selected filters.")
76
  else:
 
12
  # Load Data
13
  df = pd.read_excel(uploaded_file, sheet_name=0)
14
 
15
+ # Display the first few rows of the dataframe to understand its structure
16
+ st.write("Preview of the uploaded data:")
17
+ st.dataframe(df.head())
18
+
19
  # Data Preprocessing
20
+ # Convert 'Date' to datetime
21
+ df['Date'] = pd.to_datetime(df['Date'])
22
+
23
+ # Extract year and month for further analysis
24
+ df['Year'] = df['Date'].dt.year
25
+ df['Month'] = df['Date'].dt.month
26
 
27
+ # Clean whitespace and ensure consistent case in 'Customer Name' and 'Product Name'
28
+ df['Customer Name'] = df['Customer Name'].str.strip().str.lower()
29
+ df['Product Name'] = df['Product Name'].str.strip().str.lower()
30
+ df['City'] = df['City'].str.strip().str.lower()
31
+ df['Country'] = df['Country'].str.strip().str.lower()
32
+ df['State'] = df['State'].str.strip().str.lower()
33
 
34
  # Sidebar Filters
35
  st.sidebar.header("Filter Options")
36
 
37
+ # Text Input for Customer, Product, City, Country, and State
38
  customer_query = st.sidebar.text_input('Enter Customer Name (partial or full):').strip().lower()
39
  product_query = st.sidebar.text_input('Enter Product Name (partial or full):').strip().lower()
40
+ city_query = st.sidebar.text_input('Enter City (partial or full):').strip().lower()
41
+ country_query = st.sidebar.text_input('Enter Country (partial or full):').strip().lower()
42
+ state_query = st.sidebar.text_input('Enter State (partial or full):').strip().lower()
43
 
44
  # Date Range Selection
45
+ start_date = st.sidebar.date_input('Start Date:', df['Date'].min())
46
+ end_date = st.sidebar.date_input('End Date:', df['Date'].max())
47
 
48
  # Filter Data by Date Range
49
  filtered_df = df[
50
+ (df['Date'] >= pd.to_datetime(start_date)) &
51
+ (df['Date'] <= pd.to_datetime(end_date))
52
  ]
53
 
54
  # Filter Data by Customer Name
55
  if customer_query:
56
+ filtered_df = filtered_df[filtered_df['Customer Name'].str.contains(customer_query, case=False, na=False)]
57
 
58
  # Filter Data by Product Name
59
  if product_query:
60
+ filtered_df = filtered_df[filtered_df['Product Name'].str.contains(product_query, case=False, na=False)]
61
+
62
+ # Filter Data by City
63
+ if city_query:
64
+ filtered_df = filtered_df[filtered_df['City'].str.contains(city_query, case=False, na=False)]
65
+
66
+ # Filter Data by Country
67
+ if country_query:
68
+ filtered_df = filtered_df[filtered_df['Country'].str.contains(country_query, case=False, na=False)]
69
+
70
+ # Filter Data by State
71
+ if state_query:
72
+ filtered_df = filtered_df[filtered_df['State'].str.contains(state_query, case=False, na=False)]
73
 
74
  # Display Filtered Data
75
  st.write(f"Filtered Data: {len(filtered_df)} records found.")
 
78
  # Visualizations
79
  if not filtered_df.empty:
80
  st.subheader("Sales Trend Over Time")
81
+ trend = filtered_df.groupby('Date')['Net Sales Value'].sum().reset_index()
82
+ fig_trend = px.line(trend, x='Date', y='Net Sales Value', title='Sales Over Time')
83
  st.plotly_chart(fig_trend)
84
 
85
  st.subheader("Top 10 Products by Sales")
86
+ top_products = filtered_df.groupby('Product Name')['Net Sales Value'].sum().nlargest(10).reset_index()
87
+ fig_top_products = px.bar(top_products, x='Product Name', y='Net Sales Value', title='Top 10 Products')
88
  st.plotly_chart(fig_top_products)
89
 
90
+ st.subheader("Sales Distribution by Country")
91
+ country_sales = filtered_df.groupby('Country')['Net Sales Value'].sum().reset_index()
92
+ fig_country = px.pie(country_sales, names='Country', values='Net Sales Value', title='Sales by Country')
93
+ st.plotly_chart(fig_country)
94
+
95
+ st.subheader("Sales by City")
96
+ city_sales = filtered_df.groupby('City')['Net Sales Value'].sum().reset_index()
97
+ fig_city = px.bar(city_sales, x='City', y='Net Sales Value', title='Sales by City')
98
+ st.plotly_chart(fig_city)
99
 
100
  st.subheader("Monthly Sales Heatmap")
101
+ heatmap_data = filtered_df.pivot_table(values='Net Sales Value', index='Month', columns='Year', aggfunc='sum')
102
  fig_heatmap = px.imshow(heatmap_data, labels=dict(x="Year", y="Month", color="Sales"))
103
  st.plotly_chart(fig_heatmap)
104
+
105
+ st.subheader("Margin vs. Cost")
106
+ margin_cost = filtered_df.groupby('Product Name')[['Margin Amount', 'Cost']].sum().reset_index()
107
+ fig_margin_cost = px.scatter(margin_cost, x='Cost', y='Margin Amount', color='Product Name', title='Margin vs. Cost')
108
+ st.plotly_chart(fig_margin_cost)
109
  else:
110
  st.write("No data available for the selected filters.")
111
  else: