nadish1210 commited on
Commit
bd4b1bd
·
verified ·
1 Parent(s): 65d92f3

Update backend.py

Browse files
Files changed (1) hide show
  1. backend.py +83 -321
backend.py CHANGED
@@ -1,360 +1,122 @@
1
  import pandas as pd
2
  import plotly.express as px
3
  from datetime import datetime
4
- import kaleido # Helps prevent some write_image issues
5
  import os
 
 
 
6
 
7
- # ====================== SALES INSIGHTS FUNCTION ======================
8
- def sales_insights(file_path: str, analysis_type: str = "region") -> tuple:
9
- """
10
- Main function to analyze sales data and generate charts.
11
 
12
- Args:
13
- file_path (str): Path to the uploaded CSV or Excel file.
14
- analysis_type (str): Type of analysis to perform.
15
- Options: "region", "month", "product", "profit", "top5_profit", "low5_sales".
16
-
17
- Returns:
18
- tuple: (plotly.graph_objects.Figure, chart_image_path, summary_excel_path)
19
-
20
- Raises:
21
- ValueError: If the file cannot be read or required columns are missing.
22
- """
23
  try:
24
- # Read the uploaded file
25
  if file_path.endswith('.csv'):
26
  df = pd.read_csv(file_path)
27
  else:
28
  df = pd.read_excel(file_path)
29
-
30
- except Exception as e:
31
- raise ValueError(f"Error reading file: {str(e)}")
32
-
33
- # Check required columns
34
- required_columns = ["Region", "Sales", "Product", "Profit", "Date"]
35
- missing_cols = [col for col in required_columns if col not in df.columns]
36
- if missing_cols:
37
- raise ValueError(f"Missing columns in file: {missing_cols}")
38
-
39
- # Ensure numeric columns are actually numeric
40
- for col in ["Sales", "Profit"]:
41
- df[col] = pd.to_numeric(df[col], errors='coerce')
42
-
43
- # Drop rows where critical numeric data is missing
44
- df = df.dropna(subset=["Sales", "Profit"])
45
 
46
- # ====================== ANALYSIS LOGIC ======================
47
- if analysis_type == "region":
48
- summary = df.groupby("Region")["Sales"].sum().reset_index()
49
- fig = px.bar(
50
- summary,
51
- x="Region",
52
- y="Sales",
53
- title="Sales by Region",
54
- text="Sales",
55
- color="Region"
56
- )
57
 
58
- elif analysis_type == "month":
59
- df["Date"] = pd.to_datetime(df["Date"], errors='coerce')
60
- df = df.dropna(subset=["Date"])
61
-
62
- # Create a proper monthly period for sorting, but use string for display
63
- df["Month_Period"] = df["Date"].dt.to_period("M")
64
- df["Month_Name"] = df["Date"].dt.strftime("%b %Y")
65
-
66
- summary = df.groupby(["Month_Period", "Month_Name"])["Sales"].sum().reset_index()
67
- # Sort chronologically by the Period object, then drop it
68
- summary = summary.sort_values("Month_Period")
69
-
70
- fig = px.line(
71
- summary,
72
- x="Month_Name",
73
- y="Sales",
74
- title="Monthly Sales Trend",
75
- markers=True
76
- )
77
 
78
- elif analysis_type == "product":
79
- summary = df.groupby("Product")["Sales"].sum().reset_index()
80
- fig = px.bar(
81
- summary,
82
- x="Product",
83
- y="Sales",
84
- title="Sales by Product",
85
- text="Sales",
86
- color="Product"
87
- )
88
 
89
- elif analysis_type == "profit":
90
- summary = df.groupby("Product")["Profit"].sum().reset_index()
91
- fig = px.bar(
92
- summary,
93
- x="Product",
94
- y="Profit",
95
- title="Profit by Product",
96
- text="Profit",
97
- color="Product"
98
- )
99
 
100
- elif analysis_type == "top5_profit":
101
- summary = (
102
- df.groupby("Product")["Profit"]
103
- .sum()
104
- .reset_index()
105
- .sort_values("Profit", ascending=False)
106
- .head(5)
107
- )
108
- fig = px.bar(
109
- summary,
110
- x="Product",
111
- y="Profit",
112
- title="Top 5 Products by Profit",
113
- text="Profit",
114
- color="Product"
115
- )
116
 
117
- elif analysis_type == "low5_sales":
118
- summary = (
119
- df.groupby("Product")["Sales"]
120
- .sum()
121
- .reset_index()
122
- .sort_values("Sales", ascending=True)
123
- .head(5)
124
- )
125
- fig = px.bar(
126
- summary,
127
- x="Product",
128
- y="Sales",
129
- title="Bottom 5 Products by Sales",
130
- text="Sales",
131
- color="Product"
132
- )
133
-
134
- else:
135
- # Default fallback
136
- summary = df.groupby("Product")["Sales"].sum().reset_index()
137
- fig = px.bar(
138
- summary,
139
- x="Product",
140
- y="Sales",
141
- title="Sales by Product",
142
- color="Product"
143
- )
144
-
145
- # Improve chart appearance
146
- fig.update_layout(
147
- xaxis_tickangle=-45,
148
- height=600,
149
- title_x=0.5,
150
- template="plotly_white"
151
- )
152
- fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
153
-
154
- import pandas as pd
155
- import plotly.express as px
156
- from datetime import datetime
157
- import os
158
 
159
- # ====================== SALES INSIGHTS FUNCTION ======================
160
- def sales_insights(file_path: str, analysis_type: str = "region") -> tuple:
161
- """
162
- Main function to analyze sales data and generate charts.
163
 
164
- Args:
165
- file_path (str): Path to the uploaded CSV or Excel file.
166
- analysis_type (str): Type of analysis: "region", "month", "product",
167
- "profit", "top5_profit", "low5_sales"
168
 
169
- Returns:
170
- tuple: (plotly Figure, chart_image_path, summary_excel_path)
171
- """
172
- try:
173
- # Read the file
174
- if file_path.endswith('.csv'):
175
- df = pd.read_csv(file_path)
176
  else:
177
- df = pd.read_excel(file_path)
178
-
179
- except Exception as e:
180
- raise ValueError(f"Error reading file: {str(e)}")
181
-
182
- # Check required columns
183
- required_columns = ["Region", "Sales", "Product", "Profit", "Date"]
184
- missing_cols = [col for col in required_columns if col not in df.columns]
185
- if missing_cols:
186
- raise ValueError(f"Missing required columns: {missing_cols}")
187
-
188
- # Convert numeric columns safely
189
- for col in ["Sales", "Profit"]:
190
- df[col] = pd.to_numeric(df[col], errors='coerce')
191
-
192
- # Drop rows with missing critical numeric data
193
- df = df.dropna(subset=["Sales", "Profit"]).copy()
194
-
195
- # ====================== ANALYSIS LOGIC ======================
196
- if analysis_type == "region":
197
- summary = df.groupby("Region", as_index=False)["Sales"].sum()
198
- fig = px.bar(
199
- summary,
200
- x="Region",
201
- y="Sales",
202
- title="Sales by Region",
203
- text="Sales",
204
- color="Region"
205
- )
206
 
207
- elif analysis_type == "month":
208
- df["Date"] = pd.to_datetime(df["Date"], errors='coerce')
209
- df = df.dropna(subset=["Date"]).copy()
210
-
211
- df["Month_Period"] = df["Date"].dt.to_period("M")
212
- df["Month_Name"] = df["Date"].dt.strftime("%b %Y")
213
-
214
- summary = df.groupby(["Month_Period", "Month_Name"], as_index=False)["Sales"].sum()
215
- summary = summary.sort_values("Month_Period")
216
-
217
- fig = px.line(
218
- summary,
219
- x="Month_Name",
220
- y="Sales",
221
- title="Monthly Sales Trend",
222
- markers=True,
223
- line_shape="linear"
224
- )
225
 
226
- elif analysis_type == "product":
227
- summary = df.groupby("Product", as_index=False)["Sales"].sum()
228
- fig = px.bar(
229
- summary, x="Product", y="Sales", title="Sales by Product",
230
- text="Sales", color="Product"
231
- )
232
 
233
- elif analysis_type == "profit":
234
- summary = df.groupby("Product", as_index=False)["Profit"].sum()
235
- fig = px.bar(
236
- summary, x="Product", y="Profit", title="Profit by Product",
237
- text="Profit", color="Product"
238
- )
239
-
240
- elif analysis_type == "top5_profit":
241
- summary = (
242
- df.groupby("Product", as_index=False)["Profit"]
243
- .sum()
244
- .sort_values("Profit", ascending=False)
245
- .head(5)
246
- )
247
- fig = px.bar(
248
- summary, x="Product", y="Profit", title="Top 5 Products by Profit",
249
- text="Profit", color="Product"
250
- )
251
-
252
- elif analysis_type == "low5_sales":
253
- summary = (
254
- df.groupby("Product", as_index=False)["Sales"]
255
- .sum()
256
- .sort_values("Sales", ascending=True)
257
- .head(5)
258
- )
259
- fig = px.bar(
260
- summary, x="Product", y="Sales", title="Bottom 5 Products by Sales",
261
- text="Sales", color="Product"
262
- )
263
-
264
- else:
265
- # Default fallback
266
- summary = df.groupby("Product", as_index=False)["Sales"].sum()
267
- fig = px.bar(
268
- summary, x="Product", y="Sales", title="Sales by Product",
269
- text="Sales", color="Product"
270
- )
271
-
272
- # Improve layout
273
- fig.update_layout(
274
- xaxis_tickangle=-45,
275
- height=600,
276
- title_x=0.5,
277
- template="plotly_white",
278
- margin=dict(l=40, r=40, t=60, b=100)
279
- )
280
- fig.update_traces(
281
- texttemplate='%{text:.2s}',
282
- textposition='outside',
283
- marker_line_color='white',
284
- marker_line_width=1
285
- )
286
-
287
- # ====================== SAVE OUTPUTS ======================
288
- output_chart_path = "output_chart.png"
289
- output_data_path = "output_data.xlsx"
290
-
291
- # Save chart with better error handling for HF Spaces
292
- try:
293
- fig.write_image(output_chart_path, width=1200, height=700, scale=2, engine="kaleido")
294
- except Exception as e:
295
- print(f"Warning: High-res image save failed: {e}")
296
  try:
297
- # Fallback without scale
298
- fig.write_image(output_chart_path, width=1200, height=700)
299
- except Exception as e2:
300
- print(f"Error saving chart: {e2}")
301
- # Ultimate fallback - save as static image
302
- fig.write_image(output_chart_path, width=1000, height=600)
 
 
 
 
 
 
 
 
 
 
 
 
303
 
304
- # Save summary data
305
- try:
306
- summary.to_excel(output_data_path, index=False)
307
  except Exception as e:
308
- print(f"Error saving summary Excel: {e}")
309
 
310
- return fig, output_chart_path, output_data_path
311
 
312
-
313
- # ====================== FEEDBACK FUNCTION ======================
314
  def save_feedback(name: str, comment: str, stars: int) -> str:
315
- """
316
- Save user feedback to feedback.xlsx with better robustness.
317
- """
318
  feedback_file = "feedback.xlsx"
319
-
320
- # Validate and sanitize inputs
321
  try:
322
- stars = int(stars)
323
- stars = max(1, min(5, stars))
324
- except (ValueError, TypeError):
325
  stars = 3
326
 
327
  new_entry = {
328
  "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
329
- "Name": str(name).strip()[:100] if name else "Anonymous", # Limit length
330
- "Comment": str(comment).strip()[:1000] if comment else "", # Limit length
331
  "Stars": stars
332
  }
333
 
334
- try:
335
- if os.path.exists(feedback_file):
336
- df = pd.read_excel(feedback_file)
337
- # Ensure all expected columns exist
338
- expected_cols = ["Timestamp", "Name", "Comment", "Stars"]
339
- for col in expected_cols:
340
- if col not in df.columns:
341
- df[col] = None
342
- df = df[expected_cols]
343
- else:
344
- df = pd.DataFrame(columns=["Timestamp", "Name", "Comment", "Stars"])
345
-
346
- except Exception as e:
347
- print(f"Feedback file read error: {e}")
348
- df = pd.DataFrame(columns=["Timestamp", "Name", "Comment", "Stars"])
349
-
350
- # Append new feedback
351
- new_df = pd.DataFrame([new_entry])
352
- df = pd.concat([df, new_df], ignore_index=True)
353
-
354
- # Save with error handling
355
- try:
356
- df.to_excel(feedback_file, index=False)
357
- return "✅ Thank you! Your feedback has been saved successfully."
358
- except Exception as e:
359
- print(f"Error writing feedback: {e}")
360
- return f"❌ Error saving feedback: {str(e)}"
 
1
  import pandas as pd
2
  import plotly.express as px
3
  from datetime import datetime
 
4
  import os
5
+ import time
6
+ from PIL import Image
7
+ import numpy as np
8
 
9
+ # Early import for kaleido
10
+ import kaleido
 
 
11
 
12
+ # ====================== SALES INSIGHTS ======================
13
+ def sales_insights(file_path: str, analysis_type: str = "region"):
 
 
 
 
 
 
 
 
 
14
  try:
15
+ # Read data
16
  if file_path.endswith('.csv'):
17
  df = pd.read_csv(file_path)
18
  else:
19
  df = pd.read_excel(file_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
+ required = ["Region", "Sales", "Product", "Profit", "Date"]
22
+ missing = [col for col in required if col not in df.columns]
23
+ if missing:
24
+ raise ValueError(f"Missing columns: {missing}")
 
 
 
 
 
 
 
25
 
26
+ for col in ["Sales", "Profit"]:
27
+ df[col] = pd.to_numeric(df[col], errors='coerce')
28
+ df = df.dropna(subset=["Sales", "Profit"]).copy()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
+ # Analysis
31
+ if analysis_type == "region":
32
+ summary = df.groupby("Region", as_index=False)["Sales"].sum()
33
+ fig = px.bar(summary, x="Region", y="Sales", title="Sales by Region", color="Region", text="Sales")
 
 
 
 
 
 
34
 
35
+ elif analysis_type == "month":
36
+ df["Date"] = pd.to_datetime(df["Date"], errors='coerce')
37
+ df = df.dropna(subset=["Date"]).copy()
38
+ df["Month_Name"] = df["Date"].dt.strftime("%b %Y")
39
+ summary = df.groupby("Month_Name", as_index=False)["Sales"].sum()
40
+ fig = px.line(summary, x="Month_Name", y="Sales", title="Monthly Sales Trend", markers=True)
 
 
 
 
41
 
42
+ elif analysis_type == "product":
43
+ summary = df.groupby("Product", as_index=False)["Sales"].sum()
44
+ fig = px.bar(summary, x="Product", y="Sales", title="Sales by Product", color="Product", text="Sales")
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
+ elif analysis_type == "profit":
47
+ summary = df.groupby("Product", as_index=False)["Profit"].sum()
48
+ fig = px.bar(summary, x="Product", y="Profit", title="Profit by Product", color="Product", text="Profit")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
+ elif analysis_type == "top5_profit":
51
+ summary = df.groupby("Product", as_index=False)["Profit"].sum().nlargest(5, "Profit")
52
+ fig = px.bar(summary, x="Product", y="Profit", title="Top 5 Products by Profit", color="Product", text="Profit")
 
53
 
54
+ elif analysis_type == "low5_sales":
55
+ summary = df.groupby("Product", as_index=False)["Sales"].sum().nsmallest(5, "Sales")
56
+ fig = px.bar(summary, x="Product", y="Sales", title="Bottom 5 Products by Sales", color="Product", text="Sales")
 
57
 
 
 
 
 
 
 
 
58
  else:
59
+ summary = df.groupby("Product", as_index=False)["Sales"].sum()
60
+ fig = px.bar(summary, x="Product", y="Sales", title="Sales by Product", color="Product", text="Sales")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
+ fig.update_layout(xaxis_tickangle=-45, height=600, title_x=0.5, template="plotly_white")
63
+ fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
+ # ====================== SAVE IMAGE (Safe with Fallbacks) ======================
66
+ chart_path = "output_chart.png"
67
+ data_path = "output_data.xlsx"
 
 
 
68
 
69
+ # Try 1: Normal write_image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  try:
71
+ fig.write_image(chart_path, width=1100, height=650, scale=1.2)
72
+ except:
73
+ # Try 2: Without scale
74
+ try:
75
+ fig.write_image(chart_path, width=1100, height=650)
76
+ except:
77
+ # Try 3: Create blank image as fallback
78
+ try:
79
+ blank = Image.new('RGB', (1100, 650), color='#f0f0f0')
80
+ blank.save(chart_path)
81
+ print("Warning: Used blank image as fallback")
82
+ except:
83
+ pass
84
+
85
+ # Save Excel
86
+ summary.to_excel(data_path, index=False)
87
+
88
+ return fig, chart_path, data_path
89
 
 
 
 
90
  except Exception as e:
91
+ raise ValueError(f"Analysis failed: {str(e)}")
92
 
 
93
 
94
+ # Feedback function (already safe from previous version)
 
95
  def save_feedback(name: str, comment: str, stars: int) -> str:
 
 
 
96
  feedback_file = "feedback.xlsx"
 
 
97
  try:
98
+ stars = max(1, min(5, int(stars)))
99
+ except:
 
100
  stars = 3
101
 
102
  new_entry = {
103
  "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
104
+ "Name": str(name).strip()[:100] if name else "Anonymous",
105
+ "Comment": str(comment).strip()[:800] if comment else "",
106
  "Stars": stars
107
  }
108
 
109
+ for _ in range(3):
110
+ try:
111
+ if os.path.exists(feedback_file):
112
+ df = pd.read_excel(feedback_file)
113
+ else:
114
+ df = pd.DataFrame(columns=["Timestamp", "Name", "Comment", "Stars"])
115
+
116
+ df = pd.concat([df, pd.DataFrame([new_entry])], ignore_index=True)
117
+ df.to_excel(feedback_file, index=False)
118
+ return "✅ Thank you! Your feedback has been saved successfully."
119
+ except:
120
+ time.sleep(0.4)
121
+
122
+ return " Could not save feedback. Please try again later."