PD03 commited on
Commit
f7dbc16
Β·
verified Β·
1 Parent(s): f325816

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +239 -233
app.py CHANGED
@@ -3,7 +3,7 @@ import pandas as pd
3
  import plotly.express as px
4
  import plotly.graph_objects as go
5
  import numpy as np
6
- from datetime import datetime
7
  import warnings
8
  warnings.filterwarnings('ignore')
9
 
@@ -48,120 +48,65 @@ st.markdown("""
48
  @st.cache_data
49
  def load_kaggle_sap_data():
50
  """
51
- Load real SAP dataset from Kaggle using corrected API
52
  """
53
  try:
54
- # Import kagglehub here to handle version differences
55
- import kagglehub
56
 
57
  dataset_name = "mustafakeser4/sap-dataset-bigquery-dataset"
 
58
 
59
- # Try the new API first, fall back to old if needed
60
- try:
61
- # For newer versions of kagglehub with KaggleDatasetAdapter
62
- try:
63
- from kagglehub import KaggleDatasetAdapter
64
-
65
- # Load tables using dataset_load method
66
- vbak = kagglehub.dataset_load(KaggleDatasetAdapter.PANDAS, dataset_name, "vbak.csv")
67
- vbap = kagglehub.dataset_load(KaggleDatasetAdapter.PANDAS, dataset_name, "vbap.csv")
68
- kna1 = kagglehub.dataset_load(KaggleDatasetAdapter.PANDAS, dataset_name, "kna1.csv")
69
- makt = kagglehub.dataset_load(KaggleDatasetAdapter.PANDAS, dataset_name, "makt.csv")
70
-
71
- except ImportError:
72
- # Fall back to dataset_download method for older versions
73
- path = kagglehub.dataset_download(dataset_name)
74
-
75
- vbak = pd.read_csv(f"{path}/vbak.csv")
76
- vbap = pd.read_csv(f"{path}/vbap.csv")
77
- kna1 = pd.read_csv(f"{path}/kna1.csv")
78
- makt = pd.read_csv(f"{path}/makt.csv")
79
-
80
- except Exception as e:
81
- st.error(f"Error with kagglehub API: {str(e)}")
82
- # Use alternative data loading method
83
- return load_alternative_data()
84
-
85
- # Limit data size for performance
86
- tables = {
87
- 'sales_orders': vbak.head(5000),
88
- 'sales_items': vbap.head(10000),
89
- 'customers': kna1.head(3000),
90
- 'material_texts': makt[makt.get('spras', makt.get('SPRAS', '')) == 'E'].head(3000)
91
- }
92
 
93
  return tables
94
 
95
- except ImportError:
96
- st.error("kagglehub not available. Using alternative data source.")
97
- return load_alternative_data()
98
  except Exception as e:
99
  st.error(f"Error loading Kaggle dataset: {str(e)}")
100
- return load_alternative_data()
101
-
102
- def load_alternative_data():
103
- """
104
- Load sample SAP-like data as fallback when Kaggle is not available
105
- """
106
- np.random.seed(42)
107
-
108
- # Generate realistic SAP sales data structure
109
- n_orders = 2000
110
- n_items = 5000
111
-
112
- # Sales Order Header (VBAK equivalent)
113
- sales_orders = pd.DataFrame({
114
- 'vbeln': [f'{100000000 + i:010d}' for i in range(n_orders)],
115
- 'kunnr': [f'{i%500 + 1:010d}' for i in range(n_orders)],
116
- 'vkorg': np.random.choice(['1000', '2000', '3000'], n_orders),
117
- 'vtweg': np.random.choice(['10', '20', '30'], n_orders),
118
- 'spart': np.random.choice(['01', '02', '03'], n_orders),
119
- 'erdat': pd.date_range('2022-01-01', periods=n_orders, freq='D'),
120
- 'waerk': 'USD'
121
- })
122
-
123
- # Sales Order Items (VBAP equivalent)
124
- sales_items = []
125
- for _, order in sales_orders.iterrows():
126
- items_count = np.random.randint(1, 6)
127
- for j in range(items_count):
128
- sales_items.append({
129
- 'vbeln': order['vbeln'],
130
- 'posnr': f'{(j+1)*10:06d}',
131
- 'matnr': f'MAT{np.random.randint(1, 1000):06d}',
132
- 'kwmeng': np.random.uniform(1, 100),
133
- 'netwr': np.random.uniform(100, 50000),
134
- 'werks': np.random.choice(['1000', '2000', '3000'])
135
- })
136
-
137
- sales_items_df = pd.DataFrame(sales_items)
138
-
139
- # Customer Master (KNA1 equivalent)
140
- customers = pd.DataFrame({
141
- 'kunnr': [f'{i+1:010d}' for i in range(500)],
142
- 'name1': [f'Customer Company {chr(65 + i%26)}{i:03d}' for i in range(500)],
143
- 'land1': np.random.choice(['US', 'DE', 'CN', 'IN', 'BR', 'FR', 'UK', 'JP'], 500),
144
- 'regio': [f'REG{i%10:02d}' for i in range(500)]
145
- })
146
-
147
- # Material Text (MAKT equivalent)
148
- materials = pd.DataFrame({
149
- 'matnr': [f'MAT{i:06d}' for i in range(1, 1001)],
150
- 'maktx': [f'Product {i:04d} - {np.random.choice(["Software", "Hardware", "Service", "Cloud", "Analytics"])}' for i in range(1, 1001)],
151
- 'spras': 'E'
152
- })
153
-
154
- return {
155
- 'sales_orders': sales_orders,
156
- 'sales_items': sales_items_df,
157
- 'customers': customers,
158
- 'material_texts': materials
159
- }
160
 
161
  def create_sales_analytics(tables):
162
  """
163
- Create sales analytics from SAP data
164
  """
 
 
 
165
  try:
166
  vbak = tables.get('sales_orders', pd.DataFrame())
167
  vbap = tables.get('sales_items', pd.DataFrame())
@@ -169,40 +114,56 @@ def create_sales_analytics(tables):
169
  makt = tables.get('material_texts', pd.DataFrame())
170
 
171
  if vbak.empty or vbap.empty:
 
172
  return None
173
 
174
- # Normalize column names to lowercase
175
- vbak.columns = vbak.columns.str.lower()
176
- vbap.columns = vbap.columns.str.lower()
177
  if not kna1.empty:
178
- kna1.columns = kna1.columns.str.lower()
179
  if not makt.empty:
180
- makt.columns = makt.columns.str.lower()
181
 
182
  # Join sales orders with items
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
  sales_data = pd.merge(
184
- vbak[['vbeln', 'kunnr', 'vkorg', 'vtweg', 'spart', 'erdat', 'waerk']],
185
- vbap[['vbeln', 'posnr', 'matnr', 'kwmeng', 'netwr', 'werks']],
186
- on='vbeln',
187
  how='inner'
188
  )
189
 
190
- # Add customer info
191
- if not kna1.empty and 'name1' in kna1.columns:
192
- customer_info = kna1[['kunnr', 'name1', 'land1', 'regio']]
193
- sales_data = pd.merge(sales_data, customer_info, on='kunnr', how='left')
194
 
195
- # Add material descriptions
196
- if not makt.empty and 'maktx' in makt.columns:
197
- material_info = makt[['matnr', 'maktx']]
198
- sales_data = pd.merge(sales_data, material_info, on='matnr', how='left')
199
 
200
  # Clean data
201
- sales_data['netwr'] = pd.to_numeric(sales_data['netwr'], errors='coerce').fillna(0)
202
- sales_data['kwmeng'] = pd.to_numeric(sales_data['kwmeng'], errors='coerce').fillna(0)
203
 
204
- if 'erdat' in sales_data.columns:
205
- sales_data['erdat'] = pd.to_datetime(sales_data['erdat'], errors='coerce')
206
 
207
  return sales_data
208
 
@@ -233,61 +194,78 @@ def main():
233
  st.markdown("""
234
  <div style="text-align: center; margin-bottom: 2rem;">
235
  <p style="font-size: 1.2rem; color: #666;">
236
- SAP ERP Sales Data Analytics | Customer β€’ Regional β€’ Channel β€’ Product KPIs
237
  </p>
238
  </div>
239
  """, unsafe_allow_html=True)
240
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
  # Load data
242
- with st.spinner("Loading SAP dataset..."):
243
  tables = load_kaggle_sap_data()
244
 
245
  if not tables:
246
- st.error("Failed to load dataset")
 
247
  return
248
 
249
  # Process sales data
250
- with st.spinner("Processing sales analytics..."):
251
  sales_df = create_sales_analytics(tables)
252
 
253
  if sales_df is None or sales_df.empty:
254
- st.error("No sales data available for analysis")
255
  return
256
 
257
- # Data summary
258
- st.success(f"βœ… Loaded {len(sales_df):,} sales records")
259
 
260
- # Sidebar
261
- st.sidebar.header("πŸ“Š Dataset Information")
262
- st.sidebar.info(f"""
263
- **SAP Tables:**
264
  - VBAK: Sales Orders ({len(tables.get('sales_orders', []))})
265
  - VBAP: Sales Items ({len(tables.get('sales_items', []))})
266
  - KNA1: Customers ({len(tables.get('customers', []))})
267
  - MAKT: Materials ({len(tables.get('material_texts', []))})
268
 
269
  **Analysis Records:** {len(sales_df):,}
 
270
  """)
271
 
272
  # Main KPIs
273
- st.subheader("🎯 Key Performance Indicators")
274
 
275
  col1, col2, col3, col4 = st.columns(4)
276
 
277
  with col1:
278
- total_revenue = sales_df['netwr'].sum()
279
  create_kpi_card("Total Revenue", total_revenue, "currency")
280
 
281
  with col2:
282
- unique_customers = sales_df['kunnr'].nunique()
283
  create_kpi_card("Active Customers", unique_customers, "number")
284
 
285
  with col3:
286
- avg_order_value = sales_df['netwr'].mean()
287
  create_kpi_card("Avg Order Value", avg_order_value, "currency")
288
 
289
  with col4:
290
- total_orders = sales_df['vbeln'].nunique()
291
  create_kpi_card("Sales Orders", total_orders, "number")
292
 
293
  # Analytics Tabs
@@ -299,141 +277,169 @@ def main():
299
  ])
300
 
301
  with tab1:
302
- st.subheader("πŸ‘₯ Top 10 Customers by Revenue")
303
 
304
- if 'name1' in sales_df.columns:
305
- customer_sales = sales_df.groupby(['kunnr', 'name1'])['netwr'].sum().reset_index()
 
306
  else:
307
- customer_sales = sales_df.groupby('kunnr')['netwr'].sum().reset_index()
308
- customer_sales['name1'] = customer_sales['kunnr']
309
-
310
- top_customers = customer_sales.nlargest(10, 'netwr')
311
-
312
- fig = px.bar(
313
- top_customers,
314
- x='netwr',
315
- y='name1',
316
- orientation='h',
317
- title="Top 10 Customers by Revenue",
318
- labels={'netwr': 'Revenue ($)', 'name1': 'Customer'},
319
- color='netwr',
320
- color_continuous_scale='Blues'
321
- )
322
- fig.update_layout(height=500, yaxis={'categoryorder': 'total ascending'})
323
- st.plotly_chart(fig, use_container_width=True)
324
 
325
- # Customer table
326
- display_customers = top_customers.copy()
327
- display_customers['Revenue'] = display_customers['netwr'].apply(lambda x: f"${x:,.0f}")
328
- st.dataframe(display_customers[['kunnr', 'name1', 'Revenue']], use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
329
 
330
  with tab2:
331
- st.subheader("🌍 Regional Sales Analysis")
332
 
333
- if 'land1' in sales_df.columns:
334
- regional_sales = sales_df.groupby('land1')['netwr'].sum().reset_index()
335
- regional_sales = regional_sales.sort_values('netwr', ascending=False).head(10)
336
-
337
- col1, col2 = st.columns(2)
338
 
339
- with col1:
340
- fig = px.bar(
341
- regional_sales,
342
- x='netwr',
343
- y='land1',
344
- orientation='h',
345
- title="Revenue by Country",
346
- labels={'netwr': 'Revenue ($)', 'land1': 'Country'},
347
- color='netwr',
348
- color_continuous_scale='Viridis'
349
- )
350
- fig.update_layout(height=400, yaxis={'categoryorder': 'total ascending'})
351
- st.plotly_chart(fig, use_container_width=True)
352
-
353
- with col2:
354
- fig = px.pie(
355
- regional_sales,
356
- values='netwr',
357
- names='land1',
358
- title="Revenue Distribution by Country"
359
- )
360
- fig.update_layout(height=400)
361
- st.plotly_chart(fig, use_container_width=True)
 
 
 
362
  else:
363
- st.info("Regional data not available")
364
 
365
  with tab3:
366
- st.subheader("πŸ“ˆ Distribution Channel Performance")
367
 
368
- if 'vtweg' in sales_df.columns:
369
- channel_sales = sales_df.groupby('vtweg')['netwr'].sum().reset_index()
370
- channel_sales = channel_sales.sort_values('netwr', ascending=False)
371
 
372
  fig = px.bar(
373
  channel_sales,
374
- x='vtweg',
375
- y='netwr',
376
  title="Revenue by Distribution Channel",
377
- labels={'netwr': 'Revenue ($)', 'vtweg': 'Channel'},
378
- color='netwr',
379
  color_continuous_scale='Plasma'
380
  )
381
  fig.update_layout(height=400)
382
  st.plotly_chart(fig, use_container_width=True)
383
 
384
- # Organization analysis
385
- if 'vkorg' in sales_df.columns:
386
- org_sales = sales_df.groupby('vkorg')['netwr'].sum().reset_index()
387
- org_sales = org_sales.sort_values('netwr', ascending=False)
388
 
389
  st.subheader("πŸ“Š Sales Organization Performance")
390
  fig = px.bar(
391
  org_sales,
392
- x='vkorg',
393
- y='netwr',
394
  title="Revenue by Sales Organization",
395
- labels={'netwr': 'Revenue ($)', 'vkorg': 'Sales Org'},
396
- color='netwr',
397
  color_continuous_scale='Cividis'
398
  )
399
  fig.update_layout(height=400)
400
  st.plotly_chart(fig, use_container_width=True)
 
 
401
 
402
  with tab4:
403
- st.subheader("πŸ›οΈ Top 10 Products by Revenue")
404
 
405
- if 'maktx' in sales_df.columns:
406
- product_sales = sales_df.groupby(['matnr', 'maktx'])['netwr'].sum().reset_index()
 
407
  else:
408
- product_sales = sales_df.groupby('matnr')['netwr'].sum().reset_index()
409
- product_sales['maktx'] = product_sales['matnr']
410
-
411
- top_products = product_sales.nlargest(10, 'netwr')
412
-
413
- fig = px.bar(
414
- top_products,
415
- x='netwr',
416
- y='maktx',
417
- orientation='h',
418
- title="Top 10 Products by Revenue",
419
- labels={'netwr': 'Revenue ($)', 'maktx': 'Product'},
420
- color='netwr',
421
- color_continuous_scale='Set3'
422
- )
423
- fig.update_layout(height=500, yaxis={'categoryorder': 'total ascending'})
424
- st.plotly_chart(fig, use_container_width=True)
425
 
426
- # Product table
427
- display_products = top_products.copy()
428
- display_products['Revenue'] = display_products['netwr'].apply(lambda x: f"${x:,.0f}")
429
- st.dataframe(display_products[['matnr', 'maktx', 'Revenue']], use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
430
 
431
  # Footer
432
  st.markdown("---")
433
  st.markdown("""
434
  <div style="text-align: center; color: #666; margin-top: 2rem;">
435
- <p><strong>SAP Sales Analytics Dashboard</strong></p>
436
- <p>Built with Streamlit β€’ Data Source: Kaggle SAP Dataset</p>
 
437
  </div>
438
  """, unsafe_allow_html=True)
439
 
 
3
  import plotly.express as px
4
  import plotly.graph_objects as go
5
  import numpy as np
6
+ import os
7
  import warnings
8
  warnings.filterwarnings('ignore')
9
 
 
48
  @st.cache_data
49
  def load_kaggle_sap_data():
50
  """
51
+ Load real SAP dataset from Kaggle using proper Kaggle API
52
  """
53
  try:
54
+ # Import kaggle API
55
+ import kaggle
56
 
57
  dataset_name = "mustafakeser4/sap-dataset-bigquery-dataset"
58
+ download_path = "./kaggle_data"
59
 
60
+ # Download dataset using kaggle API
61
+ kaggle.api.authenticate()
62
+ kaggle.api.dataset_download_files(dataset_name, path=download_path, unzip=True)
63
+
64
+ # Load key SAP tables
65
+ tables = {}
66
+
67
+ # Sales Order Header (VBAK)
68
+ vbak_path = f"{download_path}/vbak.csv"
69
+ if os.path.exists(vbak_path):
70
+ vbak = pd.read_csv(vbak_path)
71
+ tables['sales_orders'] = vbak.head(5000) # Limit for performance
72
+
73
+ # Sales Order Items (VBAP)
74
+ vbap_path = f"{download_path}/vbap.csv"
75
+ if os.path.exists(vbap_path):
76
+ vbap = pd.read_csv(vbap_path)
77
+ tables['sales_items'] = vbap.head(10000)
78
+
79
+ # Customer Master (KNA1)
80
+ kna1_path = f"{download_path}/kna1.csv"
81
+ if os.path.exists(kna1_path):
82
+ kna1 = pd.read_csv(kna1_path)
83
+ tables['customers'] = kna1.head(3000)
84
+
85
+ # Material Descriptions (MAKT)
86
+ makt_path = f"{download_path}/makt.csv"
87
+ if os.path.exists(makt_path):
88
+ makt = pd.read_csv(makt_path)
89
+ # Filter for English descriptions only
90
+ makt_en = makt[makt.get('spras', makt.get('SPRAS', '')) == 'E']
91
+ tables['material_texts'] = makt_en.head(3000)
 
92
 
93
  return tables
94
 
 
 
 
95
  except Exception as e:
96
  st.error(f"Error loading Kaggle dataset: {str(e)}")
97
+ st.error("Please ensure you have:")
98
+ st.error("1. Kaggle API installed: `pip install kaggle`")
99
+ st.error("2. Kaggle credentials configured (kaggle.json file)")
100
+ st.error("3. Internet connection to download dataset")
101
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
  def create_sales_analytics(tables):
104
  """
105
+ Create sales analytics from real SAP data
106
  """
107
+ if not tables:
108
+ return None
109
+
110
  try:
111
  vbak = tables.get('sales_orders', pd.DataFrame())
112
  vbap = tables.get('sales_items', pd.DataFrame())
 
114
  makt = tables.get('material_texts', pd.DataFrame())
115
 
116
  if vbak.empty or vbap.empty:
117
+ st.error("Required SAP tables (VBAK, VBAP) not found in dataset")
118
  return None
119
 
120
+ # Normalize column names
121
+ vbak.columns = vbak.columns.str.upper()
122
+ vbap.columns = vbap.columns.str.upper()
123
  if not kna1.empty:
124
+ kna1.columns = kna1.columns.str.upper()
125
  if not makt.empty:
126
+ makt.columns = makt.columns.str.upper()
127
 
128
  # Join sales orders with items
129
+ required_vbak_cols = ['VBELN', 'KUNNR', 'VKORG', 'VTWEG', 'SPART', 'ERDAT']
130
+ required_vbap_cols = ['VBELN', 'POSNR', 'MATNR', 'KWMENG', 'NETWR', 'WERKS']
131
+
132
+ # Check if required columns exist
133
+ missing_vbak = [col for col in required_vbak_cols if col not in vbak.columns]
134
+ missing_vbap = [col for col in required_vbap_cols if col not in vbap.columns]
135
+
136
+ if missing_vbak:
137
+ st.error(f"Missing columns in VBAK: {missing_vbak}")
138
+ return None
139
+ if missing_vbap:
140
+ st.error(f"Missing columns in VBAP: {missing_vbap}")
141
+ return None
142
+
143
+ # Join tables
144
  sales_data = pd.merge(
145
+ vbak[required_vbak_cols],
146
+ vbap[required_vbap_cols],
147
+ on='VBELN',
148
  how='inner'
149
  )
150
 
151
+ # Add customer info if available
152
+ if not kna1.empty and all(col in kna1.columns for col in ['KUNNR', 'NAME1', 'LAND1']):
153
+ customer_info = kna1[['KUNNR', 'NAME1', 'LAND1', 'REGIO']]
154
+ sales_data = pd.merge(sales_data, customer_info, on='KUNNR', how='left')
155
 
156
+ # Add material descriptions if available
157
+ if not makt.empty and all(col in makt.columns for col in ['MATNR', 'MAKTX']):
158
+ material_info = makt[['MATNR', 'MAKTX']]
159
+ sales_data = pd.merge(sales_data, material_info, on='MATNR', how='left')
160
 
161
  # Clean data
162
+ sales_data['NETWR'] = pd.to_numeric(sales_data['NETWR'], errors='coerce').fillna(0)
163
+ sales_data['KWMENG'] = pd.to_numeric(sales_data['KWMENG'], errors='coerce').fillna(0)
164
 
165
+ if 'ERDAT' in sales_data.columns:
166
+ sales_data['ERDAT'] = pd.to_datetime(sales_data['ERDAT'], errors='coerce')
167
 
168
  return sales_data
169
 
 
194
  st.markdown("""
195
  <div style="text-align: center; margin-bottom: 2rem;">
196
  <p style="font-size: 1.2rem; color: #666;">
197
+ Real SAP ERP Sales Data from Kaggle | Customer β€’ Regional β€’ Channel β€’ Product KPIs
198
  </p>
199
  </div>
200
  """, unsafe_allow_html=True)
201
 
202
+ # Instructions for setup
203
+ with st.expander("πŸ“‹ Setup Instructions", expanded=False):
204
+ st.markdown("""
205
+ **To use this dashboard, you need:**
206
+
207
+ 1. **Install Kaggle API**: `pip install kaggle`
208
+ 2. **Configure Kaggle credentials**:
209
+ - Go to Kaggle β†’ Account β†’ API β†’ Create New Token
210
+ - Download kaggle.json file
211
+ - Place it in ~/.kaggle/kaggle.json (Linux/Mac) or C:/Users/{username}/.kaggle/kaggle.json (Windows)
212
+ 3. **Set permissions**: `chmod 600 ~/.kaggle/kaggle.json`
213
+
214
+ **Dataset Source**: [mustafakeser4/sap-dataset-bigquery-dataset](https://www.kaggle.com/datasets/mustafakeser4/sap-dataset-bigquery-dataset)
215
+ """)
216
+
217
  # Load data
218
+ with st.spinner("Loading real SAP dataset from Kaggle..."):
219
  tables = load_kaggle_sap_data()
220
 
221
  if not tables:
222
+ st.error("❌ Failed to load SAP dataset from Kaggle")
223
+ st.info("Please check the setup instructions above and ensure your Kaggle API is properly configured.")
224
  return
225
 
226
  # Process sales data
227
+ with st.spinner("Processing sales analytics from real SAP data..."):
228
  sales_df = create_sales_analytics(tables)
229
 
230
  if sales_df is None or sales_df.empty:
231
+ st.error("❌ No valid sales data available for analysis")
232
  return
233
 
234
+ # Success message
235
+ st.success(f"βœ… Successfully loaded {len(sales_df):,} real SAP sales records from Kaggle!")
236
 
237
+ # Sidebar information
238
+ st.sidebar.header("πŸ“Š Real SAP Dataset Information")
239
+ st.sidebar.success(f"""
240
+ **Loaded SAP Tables:**
241
  - VBAK: Sales Orders ({len(tables.get('sales_orders', []))})
242
  - VBAP: Sales Items ({len(tables.get('sales_items', []))})
243
  - KNA1: Customers ({len(tables.get('customers', []))})
244
  - MAKT: Materials ({len(tables.get('material_texts', []))})
245
 
246
  **Analysis Records:** {len(sales_df):,}
247
+ **Data Source:** Kaggle - Real SAP ERP Data
248
  """)
249
 
250
  # Main KPIs
251
+ st.subheader("🎯 Sales KPIs from Real SAP Data")
252
 
253
  col1, col2, col3, col4 = st.columns(4)
254
 
255
  with col1:
256
+ total_revenue = sales_df['NETWR'].sum()
257
  create_kpi_card("Total Revenue", total_revenue, "currency")
258
 
259
  with col2:
260
+ unique_customers = sales_df['KUNNR'].nunique()
261
  create_kpi_card("Active Customers", unique_customers, "number")
262
 
263
  with col3:
264
+ avg_order_value = sales_df['NETWR'].mean()
265
  create_kpi_card("Avg Order Value", avg_order_value, "currency")
266
 
267
  with col4:
268
+ total_orders = sales_df['VBELN'].nunique()
269
  create_kpi_card("Sales Orders", total_orders, "number")
270
 
271
  # Analytics Tabs
 
277
  ])
278
 
279
  with tab1:
280
+ st.subheader("πŸ‘₯ Top 10 Customers by Revenue (Real SAP Data)")
281
 
282
+ if 'NAME1' in sales_df.columns:
283
+ customer_sales = sales_df.groupby(['KUNNR', 'NAME1'])['NETWR'].sum().reset_index()
284
+ customer_display_col = 'NAME1'
285
  else:
286
+ customer_sales = sales_df.groupby('KUNNR')['NETWR'].sum().reset_index()
287
+ customer_sales['NAME1'] = customer_sales['KUNNR']
288
+ customer_display_col = 'KUNNR'
289
+
290
+ top_customers = customer_sales.nlargest(10, 'NETWR')
 
 
 
 
 
 
 
 
 
 
 
 
291
 
292
+ if not top_customers.empty:
293
+ fig = px.bar(
294
+ top_customers,
295
+ x='NETWR',
296
+ y=customer_display_col,
297
+ orientation='h',
298
+ title="Top 10 Customers by Revenue",
299
+ labels={'NETWR': 'Revenue ($)', customer_display_col: 'Customer'},
300
+ color='NETWR',
301
+ color_continuous_scale='Blues'
302
+ )
303
+ fig.update_layout(height=500, yaxis={'categoryorder': 'total ascending'})
304
+ st.plotly_chart(fig, use_container_width=True)
305
+
306
+ # Customer table
307
+ display_customers = top_customers.copy()
308
+ display_customers['Revenue'] = display_customers['NETWR'].apply(lambda x: f"${x:,.0f}")
309
+ st.dataframe(display_customers[['KUNNR', customer_display_col, 'Revenue']], use_container_width=True)
310
+ else:
311
+ st.warning("No customer data available")
312
 
313
  with tab2:
314
+ st.subheader("🌍 Regional Sales Analysis (Real SAP Data)")
315
 
316
+ if 'LAND1' in sales_df.columns:
317
+ regional_sales = sales_df.groupby('LAND1')['NETWR'].sum().reset_index()
318
+ regional_sales = regional_sales.sort_values('NETWR', ascending=False).head(10)
 
 
319
 
320
+ if not regional_sales.empty:
321
+ col1, col2 = st.columns(2)
322
+
323
+ with col1:
324
+ fig = px.bar(
325
+ regional_sales,
326
+ x='NETWR',
327
+ y='LAND1',
328
+ orientation='h',
329
+ title="Revenue by Country",
330
+ labels={'NETWR': 'Revenue ($)', 'LAND1': 'Country'},
331
+ color='NETWR',
332
+ color_continuous_scale='Viridis'
333
+ )
334
+ fig.update_layout(height=400, yaxis={'categoryorder': 'total ascending'})
335
+ st.plotly_chart(fig, use_container_width=True)
336
+
337
+ with col2:
338
+ fig = px.pie(
339
+ regional_sales,
340
+ values='NETWR',
341
+ names='LAND1',
342
+ title="Revenue Distribution by Country"
343
+ )
344
+ fig.update_layout(height=400)
345
+ st.plotly_chart(fig, use_container_width=True)
346
  else:
347
+ st.warning("Regional data (LAND1) not available in the loaded dataset")
348
 
349
  with tab3:
350
+ st.subheader("πŸ“ˆ Distribution Channel Performance (Real SAP Data)")
351
 
352
+ if 'VTWEG' in sales_df.columns:
353
+ channel_sales = sales_df.groupby('VTWEG')['NETWR'].sum().reset_index()
354
+ channel_sales = channel_sales.sort_values('NETWR', ascending=False)
355
 
356
  fig = px.bar(
357
  channel_sales,
358
+ x='VTWEG',
359
+ y='NETWR',
360
  title="Revenue by Distribution Channel",
361
+ labels={'NETWR': 'Revenue ($)', 'VTWEG': 'Distribution Channel'},
362
+ color='NETWR',
363
  color_continuous_scale='Plasma'
364
  )
365
  fig.update_layout(height=400)
366
  st.plotly_chart(fig, use_container_width=True)
367
 
368
+ # Sales organization analysis
369
+ if 'VKORG' in sales_df.columns:
370
+ org_sales = sales_df.groupby('VKORG')['NETWR'].sum().reset_index()
371
+ org_sales = org_sales.sort_values('NETWR', ascending=False)
372
 
373
  st.subheader("πŸ“Š Sales Organization Performance")
374
  fig = px.bar(
375
  org_sales,
376
+ x='VKORG',
377
+ y='NETWR',
378
  title="Revenue by Sales Organization",
379
+ labels={'NETWR': 'Revenue ($)', 'VKORG': 'Sales Organization'},
380
+ color='NETWR',
381
  color_continuous_scale='Cividis'
382
  )
383
  fig.update_layout(height=400)
384
  st.plotly_chart(fig, use_container_width=True)
385
+ else:
386
+ st.warning("Distribution channel data (VTWEG) not available")
387
 
388
  with tab4:
389
+ st.subheader("πŸ›οΈ Top 10 Products by Revenue (Real SAP Data)")
390
 
391
+ if 'MAKTX' in sales_df.columns:
392
+ product_sales = sales_df.groupby(['MATNR', 'MAKTX'])['NETWR'].sum().reset_index()
393
+ product_display_col = 'MAKTX'
394
  else:
395
+ product_sales = sales_df.groupby('MATNR')['NETWR'].sum().reset_index()
396
+ product_sales['MAKTX'] = product_sales['MATNR']
397
+ product_display_col = 'MATNR'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
398
 
399
+ top_products = product_sales.nlargest(10, 'NETWR')
400
+
401
+ if not top_products.empty:
402
+ fig = px.bar(
403
+ top_products,
404
+ x='NETWR',
405
+ y=product_display_col,
406
+ orientation='h',
407
+ title="Top 10 Products by Revenue",
408
+ labels={'NETWR': 'Revenue ($)', product_display_col: 'Product'},
409
+ color='NETWR',
410
+ color_continuous_scale='Set3'
411
+ )
412
+ fig.update_layout(height=500, yaxis={'categoryorder': 'total ascending'})
413
+ st.plotly_chart(fig, use_container_width=True)
414
+
415
+ # Product table
416
+ display_products = top_products.copy()
417
+ display_products['Revenue'] = display_products['NETWR'].apply(lambda x: f"${x:,.0f}")
418
+ st.dataframe(display_products[['MATNR', product_display_col, 'Revenue']], use_container_width=True)
419
+ else:
420
+ st.warning("No product data available")
421
+
422
+ # Raw data viewer
423
+ with st.expander("πŸ” View Raw SAP Data", expanded=False):
424
+ st.subheader("Raw Sales Data Sample")
425
+ st.dataframe(sales_df.head(100), use_container_width=True)
426
+
427
+ # Download option
428
+ csv = sales_df.to_csv(index=False)
429
+ st.download_button(
430
+ label="πŸ“₯ Download Real SAP Sales Data (CSV)",
431
+ data=csv,
432
+ file_name="real_sap_sales_data.csv",
433
+ mime="text/csv"
434
+ )
435
 
436
  # Footer
437
  st.markdown("---")
438
  st.markdown("""
439
  <div style="text-align: center; color: #666; margin-top: 2rem;">
440
+ <p><strong>Real SAP Sales Analytics Dashboard</strong></p>
441
+ <p>Data Source: <a href="https://www.kaggle.com/datasets/mustafakeser4/sap-dataset-bigquery-dataset" target="_blank">Kaggle SAP Dataset</a></p>
442
+ <p>Built with Streamlit β€’ No Synthetic Data β€’ 100% Real SAP ERP Data</p>
443
  </div>
444
  """, unsafe_allow_html=True)
445