PD03 commited on
Commit
ec9c243
Β·
verified Β·
1 Parent(s): a930538

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +166 -187
app.py CHANGED
@@ -1,5 +1,4 @@
1
  # app.py
2
-
3
  import streamlit as st
4
  import pandas as pd
5
  import numpy as np
@@ -8,31 +7,28 @@ import os
8
  import warnings
9
  warnings.filterwarnings('ignore')
10
 
11
- # --- Page Configuration ---
12
- st.set_page_config(
13
- page_title="SAP Sales KPI Dashboard",
14
- page_icon="πŸ“Š",
15
- layout="wide"
16
- )
17
-
18
- # --- Custom CSS ---
19
  st.markdown("""
20
  <style>
21
- .main-header { font-size: 2.8rem; font-weight: bold; color: #1f4e79; text-align: center; }
22
- .kpi-card { background: #f8f9fa; padding: 1.5rem; border-radius: 10px; border-left: 5px solid #667eea; margin-bottom: 1rem; }
23
- .kpi-value { font-size: 2.5rem; font-weight: bold; color: #1f4e79; }
24
- .kpi-label { font-size: 1rem; color: #555; }
 
 
 
 
 
25
  </style>
26
  """, unsafe_allow_html=True)
27
 
28
- # --- Kaggle API Setup & Data Loading ---
29
  @st.cache_data(ttl=3600)
30
  def load_kaggle_sap_data():
31
  try:
32
- # Check for secrets
33
  if 'KAGGLE_USERNAME' not in st.secrets or 'KAGGLE_KEY' not in st.secrets:
34
  return "Kaggle credentials not found in Streamlit secrets."
35
-
36
  os.environ['KAGGLE_USERNAME'] = st.secrets['KAGGLE_USERNAME']
37
  os.environ['KAGGLE_KEY'] = st.secrets['KAGGLE_KEY']
38
 
@@ -40,236 +36,219 @@ def load_kaggle_sap_data():
40
  dataset_name = "mustafakeser4/sap-dataset-bigquery-dataset"
41
  download_path = "./kaggle_data"
42
 
43
- # Download only if files don't exist
44
  if not os.path.exists(os.path.join(download_path, 'vbak.csv')):
45
- with st.spinner("Downloading dataset from Kaggle... This may take a moment."):
46
  kaggle.api.authenticate()
47
  kaggle.api.dataset_download_files(dataset_name, path=download_path, unzip=True)
48
 
49
- # Load tables
50
  tables = {}
51
- for name, filename in {'vbak': 'vbak.csv', 'vbap': 'vbap.csv', 'kna1': 'kna1.csv', 'makt': 'makt.csv'}.items():
52
- file_path = os.path.join(download_path, filename)
53
- if os.path.exists(file_path):
54
- tables[name] = pd.read_csv(file_path, low_memory=False)
55
- else:
56
- return f"Expected file missing: {filename}"
57
  return tables
58
-
59
  except Exception as e:
60
  return f"Error during Kaggle data loading: {e}"
61
 
62
- # --- Data Processing & Analytics (FIXED) ---
63
  @st.cache_data
64
  def create_sales_analytics(_tables):
65
  try:
66
- # copies
67
  vbak = _tables['vbak'].copy()
68
  vbap = _tables['vbap'].copy()
69
  kna1 = _tables['kna1'].copy()
70
  makt = _tables['makt'].copy()
71
 
72
- # normalize col names
73
  for df in [vbak, vbap, kna1, makt]:
74
  df.columns = [c.upper().strip() for c in df.columns]
75
 
76
- # keep keys as strings (prevents leading-zero loss)
77
- def _as_str(df, cols):
78
  for c in cols:
79
  if c in df.columns:
80
  df[c] = df[c].astype(str).str.strip()
81
 
82
- _as_str(vbak, ['VBELN', 'KUNNR', 'VKORG', 'VTWEG'])
83
- _as_str(vbap, ['VBELN', 'MATNR'])
84
- _as_str(kna1, ['KUNNR'])
85
- _as_str(makt, ['MATNR'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
- # language filter for product text (English)
 
 
 
 
 
 
 
 
88
  makt_en = makt[makt['SPRAS'].eq('E')] if 'SPRAS' in makt.columns else makt
 
 
 
 
 
 
 
89
 
90
- # --- CRITICAL: use item-level NETWR from VBAP and parse robustly ---
91
- if 'NETWR' not in vbap.columns:
92
- return "Expected NETWR in VBAP but didn't find it."
93
-
94
- # clean currency-like strings: remove anything not digit/.,-
95
- netwr_raw = vbap['NETWR'].astype(str).str.replace(r'[^\d,.\-]', '', regex=True)
96
-
97
- # handle European decimals like "1.234,56" β†’ "1234.56" else remove commas as thousands
98
- vbap['NETWR'] = np.where(
99
- netwr_raw.str.contains(',') & netwr_raw.str.contains(r'\.'),
100
- netwr_raw.str.replace('.', '', regex=False).str.replace(',', '.', regex=False),
101
- netwr_raw.str.replace(',', '', regex=False)
102
- )
103
- vbap['NETWR'] = pd.to_numeric(vbap['NETWR'], errors='coerce').fillna(0.0)
104
-
105
- # build narrow tables to avoid duplicate columns / accidental overwrites
106
- vbak_small = vbak[['VBELN', 'KUNNR', 'VKORG', 'VTWEG', 'ERDAT']].drop_duplicates('VBELN')
107
- vbap_small = vbap[['VBELN', 'MATNR', 'NETWR']]
108
- kna1_small = (
109
- kna1[['KUNNR', 'NAME1', 'LAND1']]
110
- if {'KUNNR', 'NAME1', 'LAND1'}.issubset(kna1.columns)
111
- else pd.DataFrame(columns=['KUNNR', 'NAME1', 'LAND1'])
112
- )
113
- makt_small = (
114
- makt_en[['MATNR', 'MAKTX']].drop_duplicates('MATNR')
115
- if {'MATNR', 'MAKTX'}.issubset(makt_en.columns)
116
- else pd.DataFrame(columns=['MATNR', 'MAKTX'])
117
- )
118
-
119
- # enrich items with header fields, then customer & material text
120
- sales = (
121
- vbap_small
122
- .merge(vbak_small, on='VBELN', how='inner')
123
- .merge(kna1_small, on='KUNNR', how='left')
124
- .merge(makt_small, on='MATNR', how='left')
125
- )
126
-
127
- # dates
128
  if 'ERDAT' in sales.columns:
129
  sales['ERDAT'] = pd.to_datetime(sales['ERDAT'], errors='coerce')
130
 
131
- # ensure expected analysis columns exist
132
- for col in ['NETWR', 'LAND1', 'VTWEG', 'NAME1', 'MAKTX', 'VBELN', 'KUNNR']:
133
  if col not in sales.columns:
134
- sales[col] = np.nan if col != 'NETWR' else 0.0
 
 
135
 
 
 
136
  return sales
137
-
138
  except Exception as e:
139
  return f"Error processing sales data: {e}"
140
 
141
- # --- UI Components ---
142
- def create_kpi_card(title, value, format_type="currency"):
143
- """Simple KPI card with direct formatting"""
144
- if format_type == "currency":
145
- formatted_value = f"${value:,.0f}" if isinstance(value, (int, float)) else "$0"
146
- else: # number
147
- formatted_value = f"{value:,.0f}" if isinstance(value, (int, float)) else "0"
148
-
149
- st.markdown(f"""
150
- <div class="kpi-card">
151
- <div class="kpi-value">{formatted_value}</div>
152
- <div class="kpi-label">{title}</div>
153
- </div>
154
- """, unsafe_allow_html=True)
155
-
156
- # --- Main App Logic ---
157
- st.markdown('<h1 class="main-header">🎯 SAP Sales KPI Dashboard</h1>', unsafe_allow_html=True)
158
-
159
- # Cache clearing button
160
- if st.sidebar.button("πŸ”„ Clear Cache & Rerun"):
161
- st.cache_data.clear()
162
- st.rerun()
163
-
164
- st.sidebar.title("Dashboard Controls")
165
-
166
- # Load data and handle errors
167
- raw_tables = load_kaggle_sap_data()
168
- if isinstance(raw_tables, str):
169
- st.error(raw_tables)
170
  st.stop()
171
 
172
- sales_df = create_sales_analytics(raw_tables)
173
  if isinstance(sales_df, str):
174
  st.error(sales_df)
175
  st.stop()
176
 
177
- st.success(f"βœ… Loaded and processed {len(sales_df):,} real SAP sales records!")
178
-
179
- # --- Sidebar Filters ---
180
- st.sidebar.header("Filters")
181
-
182
- top_n_countries = st.sidebar.slider("Top N Countries to Display", 5, 20, 10)
183
-
184
- unique_countries = sorted(sales_df['LAND1'].dropna().unique())
185
- selected_region = st.sidebar.multiselect(
186
- "Select Region (Country)",
187
- options=unique_countries,
188
- default=unique_countries
189
- )
190
-
191
- filtered_df = sales_df[sales_df['LAND1'].isin(selected_region)].copy()
192
-
193
- # --- Main KPIs ---
194
- st.subheader("Sales KPIs from Real SAP Data")
195
- col1, col2, col3, col4 = st.columns(4)
196
- with col1:
197
- create_kpi_card("Total Revenue", float(filtered_df['NETWR'].sum()))
198
- with col2:
199
- create_kpi_card("Active Customers", int(filtered_df['KUNNR'].nunique()), format_type="number")
200
- with col3:
201
- avg_order_value = float(filtered_df.loc[filtered_df['NETWR'] > 0, 'NETWR'].mean() or 0.0)
202
- create_kpi_card("Avg Order Value", avg_order_value)
203
- with col4:
204
- create_kpi_card("Sales Orders", int(filtered_df['VBELN'].nunique()), format_type="number")
205
-
206
- # --- Analytics Tabs ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
  tab1, tab2, tab3, tab4 = st.tabs(["πŸ‘₯ Top Customers", "🌍 Regional Analysis", "πŸ“ˆ Distribution Channels", "πŸ›οΈ Top Products"])
208
 
209
  with tab1:
210
  st.subheader("Top 10 Customers by Revenue")
211
- customer_summary = (
212
- filtered_df.dropna(subset=['NAME1'])
213
- .groupby('NAME1', as_index=False)['NETWR'].sum()
214
- .nlargest(10, 'NETWR')
215
- )
216
- if not customer_summary.empty:
217
- fig = px.bar(
218
- customer_summary, x='NETWR', y='NAME1', orientation='h',
219
- labels={'NETWR': 'Revenue ($)', 'NAME1': 'Customer'},
220
- color='NETWR', color_continuous_scale='Blues'
221
- )
222
- st.plotly_chart(fig.update_layout(yaxis={'categoryorder': 'total ascending'}), use_container_width=True)
223
  else:
224
- st.info("No customer data to display for the selected filters.")
 
 
225
 
226
  with tab2:
227
  st.subheader("Revenue by Country")
228
- regional_summary = (
229
- filtered_df.dropna(subset=['LAND1'])
230
- .groupby('LAND1', as_index=False)['NETWR'].sum()
231
- .nlargest(top_n_countries, 'NETWR')
232
- )
233
- if not regional_summary.empty:
234
- fig = px.pie(regional_summary, values='NETWR', names='LAND1', title=f"Top {top_n_countries} Countries by Revenue")
235
- st.plotly_chart(fig, use_container_width=True)
236
  else:
237
- st.info("No country data to display for the selected filters.")
 
 
238
 
239
  with tab3:
240
  st.subheader("Revenue by Distribution Channel")
241
- channel_summary = (
242
- filtered_df.dropna(subset=['VTWEG'])
243
- .groupby('VTWEG', as_index=False)['NETWR'].sum()
244
- )
245
  channel_summary['VTWEG'] = channel_summary['VTWEG'].astype(str)
246
- if not channel_summary.empty:
247
- fig = px.bar(
248
- channel_summary, x='VTWEG', y='NETWR',
249
- title="Total Revenue by Distribution Channel",
250
- labels={'NETWR': 'Total Revenue ($)', 'VTWEG': 'Distribution Channel'},
251
- color='NETWR', color_continuous_scale='Plasma'
252
- )
253
- st.plotly_chart(fig, use_container_width=True)
254
  else:
255
- st.info("No distribution channel data to display for the selected filters.")
 
 
 
 
256
 
257
  with tab4:
258
  st.subheader("Top 10 Products by Revenue")
259
- product_summary = (
260
- filtered_df.dropna(subset=['MAKTX'])
261
- .groupby('MAKTX', as_index=False)['NETWR'].sum()
262
- .nlargest(10, 'NETWR')
263
- )
264
- if not product_summary.empty:
265
- fig = px.bar(
266
- product_summary, x='NETWR', y='MAKTX', orientation='h',
267
- labels={'NETWR': 'Revenue ($)', 'MAKTX': 'Product'},
268
- color='NETWR', color_continuous_scale='Greens'
269
- )
270
- st.plotly_chart(fig.update_layout(yaxis={'categoryorder': 'total ascending'}), use_container_width=True)
271
  else:
272
- st.info("No product data to display for the selected filters.")
 
 
273
 
274
  st.markdown("---")
275
- st.markdown("<p style='text-align: center;'>Built with Streamlit β€’ 100% Real SAP ERP Data from Kaggle</p>", unsafe_allow_html=True)
 
1
  # app.py
 
2
  import streamlit as st
3
  import pandas as pd
4
  import numpy as np
 
7
  import warnings
8
  warnings.filterwarnings('ignore')
9
 
10
+ # ---------- Page & Styles ----------
11
+ st.set_page_config(page_title="SAP Sales KPI Dashboard", page_icon="πŸ“Š", layout="wide")
 
 
 
 
 
 
12
  st.markdown("""
13
  <style>
14
+ /* hide default sidebar entirely */
15
+ [data-testid="stSidebar"] { display: none; }
16
+ .main-header { font-size: 2.2rem; font-weight: 800; color: #1f4e79; text-align: left; margin-bottom: .25rem; }
17
+ .subtle { color:#6b7280; margin-bottom:1.25rem; }
18
+ .filter-card { background:#f8f9fa; padding: .9rem 1rem; border-radius:12px; border:1px solid #edf2f7; }
19
+ .kpi-card { background: #ffffff; padding: 1.25rem; border-radius: 14px; border:1px solid #e5e7eb; box-shadow: 0 1px 2px rgba(0,0,0,.03); }
20
+ .kpi-value { font-size: 2.1rem; font-weight: 800; color: #1f4e79; line-height:1; }
21
+ .kpi-label { font-size: .95rem; color: #6b7280; }
22
+ .block-container { padding-top: 1.2rem; }
23
  </style>
24
  """, unsafe_allow_html=True)
25
 
26
+ # ---------- Kaggle load ----------
27
  @st.cache_data(ttl=3600)
28
  def load_kaggle_sap_data():
29
  try:
 
30
  if 'KAGGLE_USERNAME' not in st.secrets or 'KAGGLE_KEY' not in st.secrets:
31
  return "Kaggle credentials not found in Streamlit secrets."
 
32
  os.environ['KAGGLE_USERNAME'] = st.secrets['KAGGLE_USERNAME']
33
  os.environ['KAGGLE_KEY'] = st.secrets['KAGGLE_KEY']
34
 
 
36
  dataset_name = "mustafakeser4/sap-dataset-bigquery-dataset"
37
  download_path = "./kaggle_data"
38
 
 
39
  if not os.path.exists(os.path.join(download_path, 'vbak.csv')):
40
+ with st.spinner("Downloading dataset from Kaggle..."):
41
  kaggle.api.authenticate()
42
  kaggle.api.dataset_download_files(dataset_name, path=download_path, unzip=True)
43
 
44
+ needed = {'vbak': 'vbak.csv', 'vbap': 'vbap.csv', 'kna1': 'kna1.csv', 'makt': 'makt.csv'}
45
  tables = {}
46
+ for k, fn in needed.items():
47
+ fp = os.path.join(download_path, fn)
48
+ if not os.path.exists(fp):
49
+ return f"Expected file missing: {fn}"
50
+ tables[k] = pd.read_csv(fp, low_memory=False)
 
51
  return tables
 
52
  except Exception as e:
53
  return f"Error during Kaggle data loading: {e}"
54
 
55
+ # ---------- Processing (robust revenue + safe merges) ----------
56
  @st.cache_data
57
  def create_sales_analytics(_tables):
58
  try:
 
59
  vbak = _tables['vbak'].copy()
60
  vbap = _tables['vbap'].copy()
61
  kna1 = _tables['kna1'].copy()
62
  makt = _tables['makt'].copy()
63
 
64
+ # normalize column names
65
  for df in [vbak, vbap, kna1, makt]:
66
  df.columns = [c.upper().strip() for c in df.columns]
67
 
68
+ # keep SAP keys as strings (avoid leading-zero loss)
69
+ def as_str(df, cols):
70
  for c in cols:
71
  if c in df.columns:
72
  df[c] = df[c].astype(str).str.strip()
73
 
74
+ as_str(vbak, ['VBELN','KUNNR','VKORG','VTWEG'])
75
+ as_str(vbap, ['VBELN','MATNR'])
76
+ as_str(kna1, ['KUNNR'])
77
+ as_str(makt, ['MATNR'])
78
+
79
+ # choose numeric helper
80
+ def pick_numeric(df, cols):
81
+ for c in cols:
82
+ if c in df.columns:
83
+ s = pd.to_numeric(df[c], errors='coerce')
84
+ if s.notna().sum() > 0 and s.abs().sum() > 0:
85
+ return s
86
+ return pd.Series(0.0, index=df.index)
87
+
88
+ # Build item-level REVENUE
89
+ # primary: NETWR at item level (VBAP)
90
+ netwr_item = pick_numeric(vbap, ['NETWR'])
91
+
92
+ # fallback: price * qty using common SAP columns
93
+ price = pick_numeric(vbap, ['NETPR', 'KBETR', 'NETPR_I'])
94
+ qty = pick_numeric(vbap, ['KWMENG', 'KTMNG', 'MENGE'])
95
+ fallback_rev = (price.fillna(0) * qty.fillna(0)).fillna(0)
96
 
97
+ vbap['REVENUE'] = np.where(netwr_item > 0, netwr_item, fallback_rev).astype(float)
98
+
99
+ # header fields (include currency if present)
100
+ keep_vbak = ['VBELN','KUNNR','VKORG','VTWEG','ERDAT'] + (['WAERK'] if 'WAERK' in vbak.columns else [])
101
+ vbak_small = vbak[keep_vbak].drop_duplicates('VBELN')
102
+ vbap_small = vbap[['VBELN','MATNR','REVENUE']]
103
+ kna1_small = kna1[['KUNNR','NAME1','LAND1']] if {'KUNNR','NAME1','LAND1'}.issubset(kna1.columns) else pd.DataFrame(columns=['KUNNR','NAME1','LAND1'])
104
+
105
+ # product text in English
106
  makt_en = makt[makt['SPRAS'].eq('E')] if 'SPRAS' in makt.columns else makt
107
+ makt_small = makt_en[['MATNR','MAKTX']].drop_duplicates('MATNR') if {'MATNR','MAKTX'}.issubset(makt_en.columns) else pd.DataFrame(columns=['MATNR','MAKTX'])
108
+
109
+ # final sales table
110
+ sales = (vbap_small
111
+ .merge(vbak_small, on='VBELN', how='inner')
112
+ .merge(kna1_small, on='KUNNR', how='left')
113
+ .merge(makt_small, on='MATNR', how='left'))
114
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  if 'ERDAT' in sales.columns:
116
  sales['ERDAT'] = pd.to_datetime(sales['ERDAT'], errors='coerce')
117
 
118
+ # ensure columns exist
119
+ for col in ['REVENUE','LAND1','VTWEG','NAME1','MAKTX','VBELN','KUNNR','VKORG']:
120
  if col not in sales.columns:
121
+ sales[col] = np.nan if col != 'REVENUE' else 0.0
122
+ if 'WAERK' not in sales.columns:
123
+ sales['WAERK'] = 'N/A'
124
 
125
+ # drop obvious junk rows
126
+ sales = sales.replace([np.inf, -np.inf], np.nan).dropna(subset=['REVENUE'])
127
  return sales
 
128
  except Exception as e:
129
  return f"Error processing sales data: {e}"
130
 
131
+ # ---------- App ----------
132
+ st.markdown('<div class="main-header">🎯 SAP Sales KPI Dashboard</div><div class="subtle">Real SAP ERP sample data (Kaggle)</div>', unsafe_allow_html=True)
133
+
134
+ tables = load_kaggle_sap_data()
135
+ if isinstance(tables, str):
136
+ st.error(tables)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  st.stop()
138
 
139
+ sales_df = create_sales_analytics(tables)
140
  if isinstance(sales_df, str):
141
  st.error(sales_df)
142
  st.stop()
143
 
144
+ # ---------- Filter Bar (no sidebar) ----------
145
+ with st.container():
146
+ st.markdown('<div class="filter-card">', unsafe_allow_html=True)
147
+ c1, c2, c3, c4 = st.columns([1.2, 1.2, 3, 0.9])
148
+
149
+ # currency filter
150
+ currencies = [c for c in sales_df['WAERK'].dropna().unique().tolist() if c != 'N/A']
151
+ default_cur = sales_df['WAERK'].mode().iat[0] if len(sales_df) and sales_df['WAERK'].notna().any() else 'N/A'
152
+ with c1:
153
+ currency = st.selectbox("Currency", options=(['All'] + sorted(currencies)) if currencies else ['All'], index=0 if currencies else 0)
154
+
155
+ # Top N
156
+ with c2:
157
+ top_n_countries = st.slider("Top N Countries", 5, 20, 10)
158
+
159
+ # Region multiselect inside expander to keep tidy
160
+ with c3:
161
+ with st.expander("Region (Country) – click to choose", expanded=False):
162
+ all_countries = sorted(sales_df['LAND1'].dropna().unique().tolist())
163
+ # buttons to select/clear
164
+ b1, b2 = st.columns([1,1])
165
+ if 'selected_countries' not in st.session_state:
166
+ st.session_state.selected_countries = all_countries
167
+ with b1:
168
+ if st.button("Select All"):
169
+ st.session_state.selected_countries = all_countries
170
+ with b2:
171
+ if st.button("Clear"):
172
+ st.session_state.selected_countries = []
173
+ selected_region = st.multiselect("Countries", options=all_countries, default=st.session_state.selected_countries, key="countries_ms")
174
+
175
+ with c4:
176
+ if st.button("πŸ”„ Clear Cache"):
177
+ st.cache_data.clear()
178
+ st.rerun()
179
+ st.markdown('</div>', unsafe_allow_html=True)
180
+
181
+ # apply filters
182
+ filtered_df = sales_df.copy()
183
+ if currency and currency != 'All':
184
+ filtered_df = filtered_df[filtered_df['WAERK'] == currency]
185
+ if 'countries_ms' in st.session_state:
186
+ filtered_df = filtered_df[filtered_df['LAND1'].isin(st.session_state.countries_ms)]
187
+
188
+ st.success(f"βœ… Loaded and processed {len(filtered_df):,} sales line-items after filters.")
189
+
190
+ # ---------- KPIs ----------
191
+ st.subheader("Sales KPIs")
192
+ k1,k2,k3,k4 = st.columns(4)
193
+ with k1: st.markdown(f'<div class="kpi-card"><div class="kpi-value">${float(filtered_df["REVENUE"].sum()):,.0f}</div><div class="kpi-label">Total Revenue</div></div>', unsafe_allow_html=True)
194
+ with k2: st.markdown(f'<div class="kpi-card"><div class="kpi-value">{int(filtered_df["KUNNR"].nunique())}</div><div class="kpi-label">Active Customers</div></div>', unsafe_allow_html=True)
195
+ with k3:
196
+ aov = float(filtered_df.loc[filtered_df['REVENUE']>0,'REVENUE'].mean() or 0.0)
197
+ st.markdown(f'<div class="kpi-card"><div class="kpi-value">${aov:,.0f}</div><div class="kpi-label">Avg Order Value (item)</div></div>', unsafe_allow_html=True)
198
+ with k4: st.markdown(f'<div class="kpi-card"><div class="kpi-value">{int(filtered_df["VBELN"].nunique())}</div><div class="kpi-label">Sales Orders</div></div>', unsafe_allow_html=True)
199
+
200
+ # ---------- Tabs ----------
201
  tab1, tab2, tab3, tab4 = st.tabs(["πŸ‘₯ Top Customers", "🌍 Regional Analysis", "πŸ“ˆ Distribution Channels", "πŸ›οΈ Top Products"])
202
 
203
  with tab1:
204
  st.subheader("Top 10 Customers by Revenue")
205
+ customer_summary = (filtered_df.dropna(subset=['NAME1'])
206
+ .groupby('NAME1', as_index=False)['REVENUE'].sum()
207
+ .nlargest(10, 'REVENUE'))
208
+ if customer_summary.empty:
209
+ st.info("No customer data to display.")
 
 
 
 
 
 
 
210
  else:
211
+ fig = px.bar(customer_summary, x='REVENUE', y='NAME1', orientation='h',
212
+ labels={'REVENUE':'Revenue','NAME1':'Customer'}, color='REVENUE')
213
+ st.plotly_chart(fig.update_layout(yaxis={'categoryorder':'total ascending'}), use_container_width=True)
214
 
215
  with tab2:
216
  st.subheader("Revenue by Country")
217
+ regional_summary = (filtered_df.dropna(subset=['LAND1'])
218
+ .groupby('LAND1', as_index=False)['REVENUE'].sum()
219
+ .nlargest(top_n_countries, 'REVENUE'))
220
+ if regional_summary.empty:
221
+ st.info("No country data to display.")
 
 
 
222
  else:
223
+ fig = px.pie(regional_summary, values='REVENUE', names='LAND1',
224
+ title=f"Top {top_n_countries} Countries by Revenue")
225
+ st.plotly_chart(fig, use_container_width=True)
226
 
227
  with tab3:
228
  st.subheader("Revenue by Distribution Channel")
229
+ channel_summary = (filtered_df.dropna(subset=['VTWEG'])
230
+ .groupby('VTWEG', as_index=False)['REVENUE'].sum())
 
 
231
  channel_summary['VTWEG'] = channel_summary['VTWEG'].astype(str)
232
+ if channel_summary.empty:
233
+ st.info("No distribution channel data to display.")
 
 
 
 
 
 
234
  else:
235
+ fig = px.bar(channel_summary, x='VTWEG', y='REVENUE',
236
+ title="Total Revenue by Distribution Channel",
237
+ labels={'REVENUE':'Total Revenue','VTWEG':'Distribution Channel'},
238
+ color='REVENUE')
239
+ st.plotly_chart(fig, use_container_width=True)
240
 
241
  with tab4:
242
  st.subheader("Top 10 Products by Revenue")
243
+ product_summary = (filtered_df.dropna(subset=['MAKTX'])
244
+ .groupby('MAKTX', as_index=False)['REVENUE'].sum()
245
+ .nlargest(10, 'REVENUE'))
246
+ if product_summary.empty:
247
+ st.info("No product data to display.")
 
 
 
 
 
 
 
248
  else:
249
+ fig = px.bar(product_summary, x='REVENUE', y='MAKTX', orientation='h',
250
+ labels={'REVENUE':'Revenue','MAKTX':'Product'}, color='REVENUE')
251
+ st.plotly_chart(fig.update_layout(yaxis={'categoryorder':'total ascending'}), use_container_width=True)
252
 
253
  st.markdown("---")
254
+ st.markdown("<p style='text-align:center;'>Built with Streamlit β€’ Real SAP ERP sample data (Kaggle)</p>", unsafe_allow_html=True)