PD03 commited on
Commit
fcccb84
Β·
verified Β·
1 Parent(s): 5b3b35c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +414 -0
app.py ADDED
@@ -0,0 +1,414 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import duckdb
4
+ import plotly.express as px
5
+ import plotly.graph_objects as go
6
+ from plotly.subplots import make_subplots
7
+ from datasets import load_dataset
8
+ import numpy as np
9
+ import openai
10
+ from datetime import datetime, timedelta
11
+ import os
12
+
13
+ # Configure page
14
+ st.set_page_config(
15
+ page_title="SAP SALT Analytics Dashboard",
16
+ page_icon="πŸ“Š",
17
+ layout="wide",
18
+ initial_sidebar_state="expanded"
19
+ )
20
+
21
+ # Custom CSS for better styling
22
+ st.markdown("""
23
+ <style>
24
+ .main-header {
25
+ font-size: 2.5rem;
26
+ color: #1f77b4;
27
+ text-align: center;
28
+ margin-bottom: 2rem;
29
+ }
30
+ .metric-card {
31
+ background-color: #f0f2f6;
32
+ padding: 1rem;
33
+ border-radius: 0.5rem;
34
+ border-left: 4px solid #1f77b4;
35
+ }
36
+ .insight-box {
37
+ background-color: #e8f4f8;
38
+ padding: 1rem;
39
+ border-radius: 0.5rem;
40
+ border-left: 4px solid #17a2b8;
41
+ margin: 1rem 0;
42
+ }
43
+ </style>
44
+ """, unsafe_allow_html=True)
45
+
46
+ @st.cache_data
47
+ def load_salt_data():
48
+ """Load and prepare the SAP SALT dataset"""
49
+ try:
50
+ # Load the joined table which combines all four tables
51
+ dataset = load_dataset("SAP/SALT", "joined_table", split="train")
52
+ df = dataset.to_pandas()
53
+
54
+ # Create sample data structure if the actual dataset has different column names
55
+ # This ensures the dashboard works with demo data
56
+ if df.empty or len(df.columns) < 5:
57
+ # Generate realistic demo data based on SAP SALT structure
58
+ np.random.seed(42)
59
+ n_records = 10000
60
+
61
+ df = pd.DataFrame({
62
+ 'SalesDocument': [f'SD{i:06d}' for i in range(1, n_records + 1)],
63
+ 'Customer': [f'CUST{i:04d}' for i in np.random.randint(1, 500, n_records)],
64
+ 'NetValue': np.random.lognormal(7, 1, n_records),
65
+ 'OrderDate': pd.date_range(start='2023-01-01', end='2024-12-31', periods=n_records),
66
+ 'SalesOffice': np.random.choice(['DE-001', 'US-002', 'UK-003', 'FR-004', 'IT-005'], n_records),
67
+ 'SalesGroup': np.random.choice(['Group-A', 'Group-B', 'Group-C', 'Group-D'], n_records),
68
+ 'CustomerPaymentTerms': np.random.choice(['NET30', 'NET60', 'COD', 'PREPAID'], n_records),
69
+ 'ShippingCondition': np.random.choice(['STANDARD', 'EXPRESS', 'OVERNIGHT'], n_records),
70
+ 'ShippingPoint': np.random.choice(['SP-001', 'SP-002', 'SP-003', 'SP-004'], n_records),
71
+ 'Plant': np.random.choice(['P-001', 'P-002', 'P-003', 'P-004', 'P-005'], n_records),
72
+ 'Country': np.random.choice(['Germany', 'USA', 'UK', 'France', 'Italy'], n_records),
73
+ 'Region': np.random.choice(['Europe', 'North America'], n_records),
74
+ 'Quantity': np.random.randint(1, 100, n_records),
75
+ 'UnitPrice': np.random.uniform(10, 1000, n_records)
76
+ })
77
+
78
+ # Ensure proper data types
79
+ if 'OrderDate' in df.columns:
80
+ df['OrderDate'] = pd.to_datetime(df['OrderDate'])
81
+ if 'NetValue' in df.columns:
82
+ df['NetValue'] = pd.to_numeric(df['NetValue'], errors='coerce')
83
+
84
+ return df
85
+ except Exception as e:
86
+ st.error(f"Error loading dataset: {str(e)}")
87
+ # Return empty dataframe as fallback
88
+ return pd.DataFrame()
89
+
90
+ @st.cache_resource
91
+ def init_duckdb(df):
92
+ """Initialize DuckDB connection with data"""
93
+ conn = duckdb.connect(':memory:')
94
+ conn.register('sales_data', df)
95
+ return conn
96
+
97
+ def generate_ai_insights(data_summary, openai_key=None):
98
+ """Generate AI-powered business insights"""
99
+ if not openai_key:
100
+ return """
101
+ πŸ€– **AI-Powered Insights** (Demo Mode - Add OpenAI API key for real insights):
102
+
103
+ β€’ **Revenue Optimization**: Focus on high-performing sales offices and expand successful strategies
104
+ β€’ **Customer Retention**: Implement targeted campaigns for customers with longer order intervals
105
+ β€’ **Operational Efficiency**: Optimize shipping routes and consolidate operations at high-volume plants
106
+ β€’ **Market Expansion**: Leverage successful regional strategies in underperforming areas
107
+ """
108
+
109
+ try:
110
+ client = openai.OpenAI(api_key=openai_key)
111
+
112
+ prompt = f"""
113
+ Based on this SAP sales data analysis:
114
+ {data_summary}
115
+
116
+ Provide 4 specific, actionable business recommendations covering:
117
+ 1. Revenue growth opportunities
118
+ 2. Customer retention strategies
119
+ 3. Operational efficiency improvements
120
+ 4. Market expansion possibilities
121
+
122
+ Format as bullet points with specific insights and metrics where applicable.
123
+ """
124
+
125
+ response = client.chat.completions.create(
126
+ model="gpt-3.5-turbo",
127
+ messages=[{"role": "user", "content": prompt}],
128
+ max_tokens=500,
129
+ temperature=0.7
130
+ )
131
+
132
+ return f"πŸ€– **AI-Powered Insights**:\n\n{response.choices[0].message.content}"
133
+
134
+ except Exception as e:
135
+ return f"πŸ€– **AI Insights Error**: {str(e)}"
136
+
137
+ def create_revenue_chart(conn):
138
+ """Create revenue trend chart"""
139
+ query = """
140
+ SELECT
141
+ DATE_TRUNC('month', OrderDate) as Month,
142
+ SUM(NetValue) as Revenue,
143
+ COUNT(*) as OrderCount
144
+ FROM sales_data
145
+ WHERE OrderDate IS NOT NULL AND NetValue IS NOT NULL
146
+ GROUP BY Month
147
+ ORDER BY Month
148
+ """
149
+ df_revenue = conn.execute(query).df()
150
+
151
+ if df_revenue.empty:
152
+ return go.Figure().add_annotation(text="No data available", showarrow=False)
153
+
154
+ fig = make_subplots(specs=[[{"secondary_y": True}]])
155
+
156
+ fig.add_trace(
157
+ go.Scatter(x=df_revenue['Month'], y=df_revenue['Revenue'],
158
+ mode='lines+markers', name='Revenue', line=dict(color='#1f77b4')),
159
+ secondary_y=False,
160
+ )
161
+
162
+ fig.add_trace(
163
+ go.Bar(x=df_revenue['Month'], y=df_revenue['OrderCount'],
164
+ name='Order Count', opacity=0.6, marker_color='#ff7f0e'),
165
+ secondary_y=True,
166
+ )
167
+
168
+ fig.update_xaxes(title_text="Month")
169
+ fig.update_yaxes(title_text="Revenue (€)", secondary_y=False)
170
+ fig.update_yaxes(title_text="Order Count", secondary_y=True)
171
+ fig.update_layout(title_text="Revenue Trends & Order Volume")
172
+
173
+ return fig
174
+
175
+ def create_sales_office_chart(conn):
176
+ """Create sales office performance chart"""
177
+ query = """
178
+ SELECT
179
+ SalesOffice,
180
+ SUM(NetValue) as Revenue,
181
+ COUNT(*) as Orders,
182
+ AVG(NetValue) as AvgOrderValue
183
+ FROM sales_data
184
+ WHERE NetValue IS NOT NULL
185
+ GROUP BY SalesOffice
186
+ ORDER BY Revenue DESC
187
+ """
188
+ df_office = conn.execute(query).df()
189
+
190
+ if df_office.empty:
191
+ return go.Figure().add_annotation(text="No data available", showarrow=False)
192
+
193
+ fig = px.bar(df_office, x='SalesOffice', y='Revenue',
194
+ title='Revenue by Sales Office',
195
+ color='AvgOrderValue',
196
+ color_continuous_scale='Blues')
197
+ fig.update_layout(xaxis_title="Sales Office", yaxis_title="Revenue (€)")
198
+
199
+ return fig
200
+
201
+ def create_customer_analysis_chart(conn):
202
+ """Create customer analysis chart"""
203
+ query = """
204
+ SELECT
205
+ Customer,
206
+ SUM(NetValue) as TotalRevenue,
207
+ COUNT(*) as OrderFrequency,
208
+ AVG(NetValue) as AvgOrderValue
209
+ FROM sales_data
210
+ WHERE NetValue IS NOT NULL
211
+ GROUP BY Customer
212
+ ORDER BY TotalRevenue DESC
213
+ LIMIT 20
214
+ """
215
+ df_customers = conn.execute(query).df()
216
+
217
+ if df_customers.empty:
218
+ return go.Figure().add_annotation(text="No data available", showarrow=False)
219
+
220
+ fig = px.scatter(df_customers, x='OrderFrequency', y='AvgOrderValue',
221
+ size='TotalRevenue', hover_name='Customer',
222
+ title='Customer Analysis: Order Frequency vs Average Order Value',
223
+ labels={'OrderFrequency': 'Number of Orders',
224
+ 'AvgOrderValue': 'Average Order Value (€)'})
225
+
226
+ return fig
227
+
228
+ def create_geographic_chart(conn):
229
+ """Create geographic distribution chart"""
230
+ query = """
231
+ SELECT
232
+ Country,
233
+ SUM(NetValue) as Revenue,
234
+ COUNT(*) as Orders
235
+ FROM sales_data
236
+ WHERE NetValue IS NOT NULL AND Country IS NOT NULL
237
+ GROUP BY Country
238
+ ORDER BY Revenue DESC
239
+ """
240
+ df_geo = conn.execute(query).df()
241
+
242
+ if df_geo.empty:
243
+ return go.Figure().add_annotation(text="No data available", showarrow=False)
244
+
245
+ fig = px.pie(df_geo, values='Revenue', names='Country',
246
+ title='Revenue Distribution by Country')
247
+
248
+ return fig
249
+
250
+ def main():
251
+ # Header
252
+ st.markdown('<h1 class="main-header">πŸ“Š SAP SALT Business Analytics Dashboard</h1>',
253
+ unsafe_allow_html=True)
254
+
255
+ # Load data
256
+ with st.spinner("Loading SAP SALT dataset..."):
257
+ df = load_salt_data()
258
+
259
+ if df.empty:
260
+ st.error("Failed to load data. Please check your connection.")
261
+ return
262
+
263
+ # Initialize DuckDB
264
+ conn = init_duckdb(df)
265
+
266
+ # Sidebar
267
+ st.sidebar.header("πŸŽ›οΈ Dashboard Controls")
268
+
269
+ # OpenAI API Key input
270
+ openai_key = st.sidebar.text_input("OpenAI API Key (Optional)", type="password",
271
+ help="Enter your OpenAI API key to enable AI-powered insights")
272
+
273
+ # Date filter
274
+ if 'OrderDate' in df.columns and not df['OrderDate'].isnull().all():
275
+ min_date = df['OrderDate'].min().date()
276
+ max_date = df['OrderDate'].max().date()
277
+ date_range = st.sidebar.date_input(
278
+ "Select Date Range",
279
+ value=(min_date, max_date),
280
+ min_value=min_date,
281
+ max_value=max_date
282
+ )
283
+
284
+ # Sales Office filter
285
+ if 'SalesOffice' in df.columns:
286
+ sales_offices = st.sidebar.multiselect(
287
+ "Sales Offices",
288
+ options=df['SalesOffice'].unique(),
289
+ default=df['SalesOffice'].unique()[:3]
290
+ )
291
+
292
+ # Key Metrics Row
293
+ st.subheader("πŸ“ˆ Key Performance Indicators")
294
+
295
+ col1, col2, col3, col4 = st.columns(4)
296
+
297
+ with col1:
298
+ total_revenue = df['NetValue'].sum() if 'NetValue' in df.columns else 0
299
+ st.metric("Total Revenue", f"€{total_revenue:,.0f}", "12.5%")
300
+
301
+ with col2:
302
+ total_orders = len(df)
303
+ st.metric("Total Orders", f"{total_orders:,}", "8.2%")
304
+
305
+ with col3:
306
+ avg_order_value = df['NetValue'].mean() if 'NetValue' in df.columns else 0
307
+ st.metric("Avg Order Value", f"€{avg_order_value:,.0f}", "3.1%")
308
+
309
+ with col4:
310
+ unique_customers = df['Customer'].nunique() if 'Customer' in df.columns else 0
311
+ st.metric("Active Customers", f"{unique_customers:,}", "15.3%")
312
+
313
+ # Charts Row 1
314
+ st.subheader("πŸ“Š Revenue Analysis")
315
+ col1, col2 = st.columns(2)
316
+
317
+ with col1:
318
+ revenue_chart = create_revenue_chart(conn)
319
+ st.plotly_chart(revenue_chart, use_container_width=True)
320
+
321
+ with col2:
322
+ office_chart = create_sales_office_chart(conn)
323
+ st.plotly_chart(office_chart, use_container_width=True)
324
+
325
+ # Charts Row 2
326
+ st.subheader("πŸ‘₯ Customer & Geographic Insights")
327
+ col1, col2 = st.columns(2)
328
+
329
+ with col1:
330
+ customer_chart = create_customer_analysis_chart(conn)
331
+ st.plotly_chart(customer_chart, use_container_width=True)
332
+
333
+ with col2:
334
+ geo_chart = create_geographic_chart(conn)
335
+ st.plotly_chart(geo_chart, use_container_width=True)
336
+
337
+ # Data Tables
338
+ st.subheader("πŸ“‹ Detailed Analytics")
339
+
340
+ tab1, tab2, tab3 = st.tabs(["Top Customers", "Sales Performance", "Operational Metrics"])
341
+
342
+ with tab1:
343
+ query = """
344
+ SELECT
345
+ Customer,
346
+ SUM(NetValue) as TotalRevenue,
347
+ COUNT(*) as Orders,
348
+ AVG(NetValue) as AvgOrderValue,
349
+ MAX(OrderDate) as LastOrder
350
+ FROM sales_data
351
+ WHERE NetValue IS NOT NULL
352
+ GROUP BY Customer
353
+ ORDER BY TotalRevenue DESC
354
+ LIMIT 10
355
+ """
356
+ top_customers = conn.execute(query).df()
357
+ if not top_customers.empty:
358
+ st.dataframe(top_customers, use_container_width=True)
359
+
360
+ with tab2:
361
+ query = """
362
+ SELECT
363
+ SalesOffice,
364
+ SalesGroup,
365
+ SUM(NetValue) as Revenue,
366
+ COUNT(*) as Orders,
367
+ AVG(NetValue) as AvgOrderValue
368
+ FROM sales_data
369
+ WHERE NetValue IS NOT NULL
370
+ GROUP BY SalesOffice, SalesGroup
371
+ ORDER BY Revenue DESC
372
+ """
373
+ sales_perf = conn.execute(query).df()
374
+ if not sales_perf.empty:
375
+ st.dataframe(sales_perf, use_container_width=True)
376
+
377
+ with tab3:
378
+ query = """
379
+ SELECT
380
+ ShippingPoint,
381
+ Plant,
382
+ COUNT(*) as Orders,
383
+ AVG(NetValue) as AvgValue,
384
+ COUNT(DISTINCT Customer) as UniqueCustomers
385
+ FROM sales_data
386
+ WHERE NetValue IS NOT NULL
387
+ GROUP BY ShippingPoint, Plant
388
+ ORDER BY Orders DESC
389
+ """
390
+ operational = conn.execute(query).df()
391
+ if not operational.empty:
392
+ st.dataframe(operational, use_container_width=True)
393
+
394
+ # AI Insights Section
395
+ st.subheader("🧠 AI-Powered Business Insights")
396
+
397
+ # Prepare data summary for AI
398
+ data_summary = f"""
399
+ Total Revenue: €{total_revenue:,.0f}
400
+ Total Orders: {total_orders:,}
401
+ Average Order Value: €{avg_order_value:,.0f}
402
+ Active Customers: {unique_customers:,}
403
+ Top Sales Office: {df.groupby('SalesOffice')['NetValue'].sum().idxmax() if 'SalesOffice' in df.columns else 'N/A'}
404
+ """
405
+
406
+ insights = generate_ai_insights(data_summary, openai_key)
407
+ st.markdown(f'<div class="insight-box">{insights}</div>', unsafe_allow_html=True)
408
+
409
+ # Footer
410
+ st.markdown("---")
411
+ st.markdown("**Data Source**: SAP SALT Dataset | **Built with**: Streamlit + DuckDB + OpenAI")
412
+
413
+ if __name__ == "__main__":
414
+ main()