PD03 commited on
Commit
73f080a
Β·
verified Β·
1 Parent(s): 4303c4f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -149
app.py CHANGED
@@ -9,24 +9,16 @@ warnings.filterwarnings('ignore')
9
  st.set_page_config(
10
  page_title="SAP Sales KPI Dashboard",
11
  page_icon="πŸ“Š",
12
- layout="wide",
13
- initial_sidebar_state="expanded"
14
  )
15
 
16
  # --- Custom CSS ---
17
  st.markdown("""
18
  <style>
19
- .main-header {
20
- font-size: 2.8rem; font-weight: bold; color: #1f4e79;
21
- text-align: center; margin-bottom: 1rem;
22
- }
23
- .kpi-card {
24
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
25
- padding: 1.5rem; border-radius: 15px; color: white;
26
- margin: 0.5rem 0; box-shadow: 0 4px 6px rgba(0,0,0,0.1);
27
- }
28
- .kpi-value { font-size: 2.5rem; font-weight: bold; margin-bottom: 0.5rem; }
29
- .kpi-label { font-size: 1rem; opacity: 0.9; }
30
  </style>
31
  """, unsafe_allow_html=True)
32
 
@@ -34,185 +26,139 @@ st.markdown("""
34
  @st.cache_data(ttl=3600)
35
  def load_kaggle_sap_data():
36
  try:
 
37
  if 'KAGGLE_USERNAME' not in st.secrets or 'KAGGLE_KEY' not in st.secrets:
38
- st.error("Kaggle credentials not found in Streamlit secrets.")
39
- return None
40
 
41
  os.environ['KAGGLE_USERNAME'] = st.secrets['KAGGLE_USERNAME']
42
  os.environ['KAGGLE_KEY'] = st.secrets['KAGGLE_KEY']
43
-
44
  import kaggle
45
-
46
  dataset_name = "mustafakeser4/sap-dataset-bigquery-dataset"
47
  download_path = "./kaggle_data"
48
 
 
49
  if not os.path.exists(os.path.join(download_path, 'vbak.csv')):
50
- kaggle.api.authenticate()
51
- kaggle.api.dataset_download_files(dataset_name, path=download_path, unzip=True)
52
- st.success("Successfully downloaded dataset from Kaggle.")
53
- else:
54
- st.info("Using cached Kaggle dataset.")
55
 
 
56
  tables = {}
57
- required_files = {
58
- 'vbak': 'vbak.csv', 'vbap': 'vbap.csv',
59
- 'kna1': 'kna1.csv', 'makt': 'makt.csv'
60
- }
61
- for name, filename in required_files.items():
62
  file_path = os.path.join(download_path, filename)
63
  if os.path.exists(file_path):
64
  tables[name] = pd.read_csv(file_path, low_memory=False)
65
- else:
66
- tables[name] = pd.DataFrame()
67
-
68
  return tables
69
 
70
  except Exception as e:
71
- st.error(f"Error during Kaggle data loading: {e}")
72
- return None
73
 
74
  # --- Data Processing & Analytics ---
75
  @st.cache_data
76
  def create_sales_analytics(_tables):
77
- if not _tables: return None
78
-
79
  try:
80
  vbak = _tables['vbak'].copy()
81
  vbap = _tables['vbap'].copy()
82
  kna1 = _tables['kna1'].copy()
83
  makt = _tables['makt'].copy()
84
 
 
85
  for df in [vbak, vbap, kna1, makt]:
86
  df.columns = [col.upper() for col in df.columns]
87
 
88
  makt_en = makt[makt['SPRAS'] == 'E']
89
 
90
- sales_data = pd.merge(
91
- vbak[['VBELN', 'KUNNR', 'VKORG', 'VTWEG', 'ERDAT']],
92
- vbap[['VBELN', 'MATNR', 'NETWR']],
93
- on='VBELN', how='inner'
94
- )
95
- sales_data = sales_data.merge(kna1[['KUNNR', 'NAME1', 'LAND1']], on='KUNNR', how='left')
96
- sales_data = sales_data.merge(makt_en[['MATNR', 'MAKTX']], on='MATNR', how='left')
97
-
98
  sales_data['NETWR'] = pd.to_numeric(sales_data['NETWR'], errors='coerce').fillna(0)
99
  sales_data['ERDAT'] = pd.to_datetime(sales_data['ERDAT'], errors='coerce')
100
 
101
- # Limit the dataset size after merging for better performance
102
- return sales_data.head(20000)
103
 
104
  except Exception as e:
105
- st.error(f"Error processing sales data: {e}")
106
- return None
107
 
108
  # --- UI Components ---
109
- def create_kpi_card(title, value, format_type="currency"):
110
- if format_type == "currency": value_str = f"${value:,.0f}"
111
- elif format_type == "number": value_str = f"{value:,.0f}"
112
- else: value_str = str(value)
113
-
114
  st.markdown(f"""
115
  <div class="kpi-card">
116
- <div class="kpi-value">{value_str}</div>
117
  <div class="kpi-label">{title}</div>
118
  </div>
119
  """, unsafe_allow_html=True)
120
 
121
- def main():
122
- # --- Header ---
123
- st.markdown('<h1 class="main-header">🎯 SAP Sales KPI Dashboard</h1>', unsafe_allow_html=True)
124
- st.markdown("""
125
- <div style="text-align: center; margin-bottom: 2rem;">
126
- <p style="font-size: 1.2rem; color: #666;">
127
- Real SAP ERP Sales Data from Kaggle | Customer β€’ Regional β€’ Channel β€’ Product KPIs
128
- </p>
129
- </div>
130
- """, unsafe_allow_html=True)
131
 
132
- # --- Setup Instructions ---
133
- with st.expander("πŸ”‘ Kaggle API Setup Instructions", expanded=True):
134
- st.info("This app requires Kaggle credentials. If it fails to load, please add your credentials to the secrets.")
135
- st.markdown("""
136
- 1. Go to **Kaggle > Account > API > Create New Token** to download `kaggle.json`.
137
- 2. In your Hugging Face Space, go to **Settings > Secrets**.
138
- 3. Add two secrets: `KAGGLE_USERNAME` and `KAGGLE_KEY` with the values from your file.
139
- 4. Restart the Space.
140
- """)
141
-
142
- # --- Data Loading and Processing ---
143
- tables = load_kaggle_sap_data()
144
- if not tables: return
145
-
146
- sales_df = create_sales_analytics(tables)
147
- if sales_df is None or sales_df.empty: return
148
-
149
- st.success(f"βœ… Loaded and processed {len(sales_df):,} real SAP sales records!")
150
-
151
- # --- Main Dashboard ---
152
- st.sidebar.header("πŸ“Š Real SAP Dataset")
153
- st.sidebar.success(f"**Analysis Records:** {len(sales_df):,}\n\n**Data Source:** Kaggle")
154
-
155
- # --- KPIs ---
156
- st.subheader("Sales KPIs from Real SAP Data")
157
- col1, col2, col3, col4 = st.columns(4)
158
- with col1: create_kpi_card("Total Revenue", sales_df['NETWR'].sum())
159
- with col2: create_kpi_card("Active Customers", sales_df['KUNNR'].nunique(), "number")
160
- with col3: create_kpi_card("Avg Order Value", sales_df[sales_df['NETWR'] > 0]['NETWR'].mean())
161
- with col4: create_kpi_card("Sales Orders", sales_df['VBELN'].nunique(), "number")
162
-
163
- # --- Analytics Tabs ---
164
- tab1, tab2, tab3, tab4 = st.tabs(["πŸ‘₯ Top Customers", "🌍 Regional Analysis", "πŸ“ˆ Distribution Channels", "πŸ›οΈ Top Products"])
165
-
166
- # Customer Analysis
167
- with tab1:
168
- st.subheader("Top 10 Customers by Revenue")
169
- customer_summary = sales_df.groupby('NAME1')['NETWR'].sum().reset_index()
170
- top_customers = customer_summary.nlargest(10, 'NETWR')
171
- fig = px.bar(top_customers, x='NETWR', y='NAME1', orientation='h', title="Top 10 Customers", labels={'NETWR': 'Revenue ($)', 'NAME1': 'Customer'}, color='NETWR', color_continuous_scale='Blues')
172
- st.plotly_chart(fig.update_layout(yaxis={'categoryorder': 'total ascending'}), use_container_width=True)
173
-
174
- # Regional Analysis
175
- with tab2:
176
- st.subheader("Revenue by Country")
177
- # **FIXED: Create a clean summary DataFrame for the pie chart**
178
- regional_summary = sales_df.groupby('LAND1')['NETWR'].sum().reset_index()
179
- top_regions = regional_summary.nlargest(10, 'NETWR')
180
-
181
- fig = px.pie(top_regions,
182
- values='NETWR',
183
- names='LAND1',
184
- title="Top 10 Countries by Revenue")
185
- st.plotly_chart(fig, use_container_width=True)
186
-
187
- # Distribution Channel Analysis
188
- with tab3:
189
- st.subheader("Revenue by Distribution Channel")
190
- # **FIXED: Create a clean summary DataFrame for the bar chart**
191
- channel_summary = sales_df.groupby('VTWEG')['NETWR'].sum().reset_index()
192
-
193
- # Rename channel codes for clarity if they are numeric
194
- channel_summary['VTWEG'] = channel_summary['VTWEG'].astype(str)
195
-
196
- fig = px.bar(channel_summary,
197
- x='VTWEG',
198
- y='NETWR',
199
- title="Total Revenue by Distribution Channel",
200
- labels={'NETWR': 'Total Revenue ($)', 'VTWEG': 'Distribution Channel'},
201
- color='NETWR',
202
- color_continuous_scale='Plasma')
203
- st.plotly_chart(fig, use_container_width=True)
204
-
205
- # Product Analysis
206
- with tab4:
207
- st.subheader("Top 10 Products by Revenue")
208
- product_summary = sales_df.groupby('MAKTX')['NETWR'].sum().reset_index()
209
- top_products = product_summary.nlargest(10, 'NETWR')
210
- fig = px.bar(top_products, x='NETWR', y='MAKTX', orientation='h', title="Top 10 Products", labels={'NETWR': 'Revenue ($)', 'MAKTX': 'Product'}, color='NETWR', color_continuous_scale='Greens')
211
- st.plotly_chart(fig.update_layout(yaxis={'categoryorder': 'total ascending'}), use_container_width=True)
212
-
213
- # --- Footer ---
214
- st.markdown("---")
215
- st.markdown("<p style='text-align: center;'>Built with Streamlit β€’ 100% Real SAP ERP Data from Kaggle</p>", unsafe_allow_html=True)
216
-
217
- if __name__ == "__main__":
218
- main()
 
9
  st.set_page_config(
10
  page_title="SAP Sales KPI Dashboard",
11
  page_icon="πŸ“Š",
12
+ layout="wide"
 
13
  )
14
 
15
  # --- Custom CSS ---
16
  st.markdown("""
17
  <style>
18
+ .main-header { font-size: 2.8rem; font-weight: bold; color: #1f4e79; text-align: center; }
19
+ .kpi-card { background: #f8f9fa; padding: 1.5rem; border-radius: 10px; border-left: 5px solid #667eea; margin-bottom: 1rem; }
20
+ .kpi-value { font-size: 2.5rem; font-weight: bold; color: #1f4e79; }
21
+ .kpi-label { font-size: 1rem; color: #555; }
 
 
 
 
 
 
 
22
  </style>
23
  """, unsafe_allow_html=True)
24
 
 
26
  @st.cache_data(ttl=3600)
27
  def load_kaggle_sap_data():
28
  try:
29
+ # Check for secrets
30
  if 'KAGGLE_USERNAME' not in st.secrets or 'KAGGLE_KEY' not in st.secrets:
31
+ return "Kaggle credentials not found in Streamlit secrets."
 
32
 
33
  os.environ['KAGGLE_USERNAME'] = st.secrets['KAGGLE_USERNAME']
34
  os.environ['KAGGLE_KEY'] = st.secrets['KAGGLE_KEY']
35
+
36
  import kaggle
 
37
  dataset_name = "mustafakeser4/sap-dataset-bigquery-dataset"
38
  download_path = "./kaggle_data"
39
 
40
+ # Download only if files don't exist
41
  if not os.path.exists(os.path.join(download_path, 'vbak.csv')):
42
+ with st.spinner("Downloading dataset from Kaggle... This may take a moment."):
43
+ kaggle.api.authenticate()
44
+ kaggle.api.dataset_download_files(dataset_name, path=download_path, unzip=True)
 
 
45
 
46
+ # Load tables
47
  tables = {}
48
+ for name, filename in {'vbak': 'vbak.csv', 'vbap': 'vbap.csv', 'kna1': 'kna1.csv', 'makt': 'makt.csv'}.items():
 
 
 
 
49
  file_path = os.path.join(download_path, filename)
50
  if os.path.exists(file_path):
51
  tables[name] = pd.read_csv(file_path, low_memory=False)
 
 
 
52
  return tables
53
 
54
  except Exception as e:
55
+ return f"Error during Kaggle data loading: {e}"
 
56
 
57
  # --- Data Processing & Analytics ---
58
  @st.cache_data
59
  def create_sales_analytics(_tables):
 
 
60
  try:
61
  vbak = _tables['vbak'].copy()
62
  vbap = _tables['vbap'].copy()
63
  kna1 = _tables['kna1'].copy()
64
  makt = _tables['makt'].copy()
65
 
66
+ # Normalize column names
67
  for df in [vbak, vbap, kna1, makt]:
68
  df.columns = [col.upper() for col in df.columns]
69
 
70
  makt_en = makt[makt['SPRAS'] == 'E']
71
 
72
+ # Merge and process data
73
+ sales_data = pd.merge(vbak, vbap, on='VBELN', how='inner')
74
+ sales_data = sales_data.merge(kna1, on='KUNNR', how='left')
75
+ sales_data = sales_data.merge(makt_en, on='MATNR', how='left')
76
+
 
 
 
77
  sales_data['NETWR'] = pd.to_numeric(sales_data['NETWR'], errors='coerce').fillna(0)
78
  sales_data['ERDAT'] = pd.to_datetime(sales_data['ERDAT'], errors='coerce')
79
 
80
+ return sales_data
 
81
 
82
  except Exception as e:
83
+ return f"Error processing sales data: {e}"
 
84
 
85
  # --- UI Components ---
86
+ def create_kpi_card(title, value, format_str="${:,.0f}"):
 
 
 
 
87
  st.markdown(f"""
88
  <div class="kpi-card">
89
+ <div class="kpi-value">{value:{format_str.split('{')[1].split('}')[0]}}</div>
90
  <div class="kpi-label">{title}</div>
91
  </div>
92
  """, unsafe_allow_html=True)
93
 
94
+ # --- Main App Logic ---
95
+ st.markdown('<h1 class="main-header">🎯 SAP Sales KPI Dashboard</h1>', unsafe_allow_html=True)
 
 
 
 
 
 
 
 
96
 
97
+ # Cache clearing button
98
+ if st.sidebar.button("πŸ”„ Clear Cache & Rerun"):
99
+ st.cache_data.clear()
100
+ st.rerun()
101
+
102
+ st.sidebar.title("Dashboard Controls")
103
+
104
+ # Load data and handle errors
105
+ raw_tables = load_kaggle_sap_data()
106
+ if isinstance(raw_tables, str):
107
+ st.error(raw_tables)
108
+ st.stop()
109
+
110
+ sales_df = create_sales_analytics(raw_tables)
111
+ if isinstance(sales_df, str):
112
+ st.error(sales_df)
113
+ st.stop()
114
+
115
+ st.success(f"βœ… Loaded and processed {len(sales_df):,} real SAP sales records!")
116
+
117
+ # --- Sidebar Filters ---
118
+ st.sidebar.header("Filters")
119
+ selected_region = st.sidebar.multiselect(
120
+ "Select Region (Country)",
121
+ options=sales_df['LAND1'].unique(),
122
+ default=sales_df['LAND1'].unique()
123
+ )
124
+
125
+ filtered_df = sales_df[sales_df['LAND1'].isin(selected_region)]
126
+
127
+ # --- Main KPIs ---
128
+ st.subheader("Sales KPIs from Real SAP Data")
129
+ col1, col2, col3, col4 = st.columns(4)
130
+ with col1: create_kpi_card("Total Revenue", filtered_df['NETWR'].sum())
131
+ with col2: create_kpi_card("Active Customers", filtered_df['KUNNR'].nunique(), format_str="{:,.0f}")
132
+ with col3: create_kpi_card("Avg Order Value", filtered_df[filtered_df['NETWR'] > 0]['NETWR'].mean())
133
+ with col4: create_kpi_card("Sales Orders", filtered_df['VBELN'].nunique(), format_str="{:,.0f}")
134
+
135
+ # --- Analytics Tabs ---
136
+ tab1, tab2, tab3, tab4 = st.tabs(["πŸ‘₯ Top Customers", "🌍 Regional Analysis", "πŸ“ˆ Distribution Channels", "πŸ›οΈ Top Products"])
137
+
138
+ with tab1:
139
+ st.subheader("Top 10 Customers by Revenue")
140
+ customer_summary = filtered_df.groupby('NAME1')['NETWR'].sum().nlargest(10).reset_index()
141
+ fig = px.bar(customer_summary, x='NETWR', y='NAME1', orientation='h', labels={'NETWR': 'Revenue ($)', 'NAME1': 'Customer'}, color='NETWR', color_continuous_scale='Blues')
142
+ st.plotly_chart(fig.update_layout(yaxis={'categoryorder': 'total ascending'}), use_container_width=True)
143
+
144
+ with tab2:
145
+ st.subheader("Revenue by Country")
146
+ regional_summary = filtered_df.groupby('LAND1')['NETWR'].sum().nlargest(10).reset_index()
147
+ fig = px.pie(regional_summary, values='NETWR', names='LAND1', title="Top 10 Countries by Revenue")
148
+ st.plotly_chart(fig, use_container_width=True)
149
+
150
+ with tab3:
151
+ st.subheader("Revenue by Distribution Channel")
152
+ channel_summary = filtered_df.groupby('VTWEG')['NETWR'].sum().reset_index()
153
+ channel_summary['VTWEG'] = channel_summary['VTWEG'].astype(str)
154
+ fig = px.bar(channel_summary, x='VTWEG', y='NETWR', title="Total Revenue by Distribution Channel", labels={'NETWR': 'Total Revenue ($)', 'VTWEG': 'Distribution Channel'}, color='NETWR', color_continuous_scale='Plasma')
155
+ st.plotly_chart(fig, use_container_width=True)
156
+
157
+ with tab4:
158
+ st.subheader("Top 10 Products by Revenue")
159
+ product_summary = filtered_df.groupby('MAKTX')['NETWR'].sum().nlargest(10).reset_index()
160
+ fig = px.bar(product_summary, x='NETWR', y='MAKTX', orientation='h', labels={'NETWR': 'Revenue ($)', 'MAKTX': 'Product'}, color='NETWR', color_continuous_scale='Greens')
161
+ st.plotly_chart(fig.update_layout(yaxis={'categoryorder': 'total ascending'}), use_container_width=True)
162
+
163
+ st.markdown("---")
164
+ st.markdown("<p style='text-align: center;'>Built with Streamlit β€’ 100% Real SAP ERP Data from Kaggle</p>", unsafe_allow_html=True)