charlottegers commited on
Commit
85ea865
·
verified ·
1 Parent(s): b0b63ce

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -390
app.py DELETED
@@ -1,390 +0,0 @@
1
- import random
2
- import warnings
3
- from io import StringIO
4
-
5
- import gradio as gr
6
- import numpy as np
7
- import pandas as pd
8
- import plotly.express as px
9
-
10
- warnings.filterwarnings("ignore")
11
- random.seed(2025)
12
- np.random.seed(2025)
13
-
14
- NUMERIC_COLS = [
15
- "distance_km", "package_weight_kg", "delivery_time_hours",
16
- "expected_time_hours", "delivery_rating", "delivery_cost"
17
- ]
18
- CATEGORICAL_COLS = [
19
- "delivery_partner", "package_type", "vehicle_type", "delivery_mode",
20
- "region", "weather_condition", "delayed", "delivery_status"
21
- ]
22
- REQUIRED_COLS = [
23
- "delivery_id", "delivery_partner", "package_type", "vehicle_type",
24
- "delivery_mode", "region", "weather_condition", "distance_km",
25
- "package_weight_kg", "delivery_time_hours", "expected_time_hours",
26
- "delayed", "delivery_status", "delivery_rating", "delivery_cost"
27
- ]
28
-
29
-
30
- def _convert_time_column(series):
31
- """Converts normal numeric values or timestamp-like time values into numeric hours."""
32
- if pd.api.types.is_numeric_dtype(series):
33
- return pd.to_numeric(series, errors="coerce")
34
- return pd.to_numeric(series.astype(str).str.split(".").str[-1], errors="coerce")
35
-
36
-
37
- def clean_data(file):
38
- if file is None:
39
- return None, "Please upload a CSV file first."
40
-
41
- df = pd.read_csv(file.name)
42
- original_rows = len(df)
43
- df.columns = df.columns.str.strip().str.lower()
44
-
45
- missing_cols = [c for c in REQUIRED_COLS if c not in df.columns]
46
- if missing_cols:
47
- return None, f"Missing columns: {missing_cols}"
48
-
49
- df = df.drop_duplicates().copy()
50
-
51
- df["delivery_time_hours"] = _convert_time_column(df["delivery_time_hours"])
52
- df["expected_time_hours"] = _convert_time_column(df["expected_time_hours"])
53
-
54
- for col in NUMERIC_COLS:
55
- df[col] = pd.to_numeric(df[col], errors="coerce")
56
- df[col] = df[col].fillna(df[col].median())
57
-
58
- for col in CATEGORICAL_COLS:
59
- df[col] = df[col].astype(str).str.strip().str.lower()
60
- mode_value = df[col].mode()[0] if not df[col].mode().empty else "unknown"
61
- df[col] = df[col].replace("nan", np.nan).fillna(mode_value)
62
-
63
- report = (
64
- f"Data cleaned successfully. Original rows: {original_rows:,}. "
65
- f"Rows after duplicate removal: {len(df):,}. Missing values handled."
66
- )
67
- return df, report
68
-
69
-
70
- def generate_synthetic_analysis(df):
71
- if df is None:
72
- return None
73
-
74
- data = df.copy()
75
-
76
- # Make text consistent
77
- for col in ["vehicle_type", "weather_condition", "delivery_mode", "region"]:
78
- data[col] = data[col].astype(str).str.strip().str.lower()
79
-
80
- # Expected time logic: distance plus operational difficulty
81
- vehicle_adjustment = {"bike": 1.2, "van": 0.5, "truck": 0.8, "ev van": 0.4}
82
- weather_adjustment = {
83
- "clear": 0.0, "cloudy": 0.2, "foggy": 0.6, "rainy": 0.8,
84
- "stormy": 1.2, "cold": 0.2, "hot": 0.2, "windy": 0.3
85
- }
86
- mode_adjustment = {"same day": 0.3, "express": 0.2, "two day": 0.7, "standard": 0.5}
87
- region_adjustment = {"central": 0.6, "north": 0.3, "south": 0.3, "east": 0.4, "west": 0.4}
88
-
89
- data["expected_time_hours"] = (
90
- data["distance_km"] / 45
91
- + data["vehicle_type"].map(vehicle_adjustment).fillna(0.5)
92
- + data["weather_condition"].map(weather_adjustment).fillna(0.3)
93
- + data["delivery_mode"].map(mode_adjustment).fillna(0.4)
94
- + data["region"].map(region_adjustment).fillna(0.3)
95
- ).clip(lower=0.5)
96
-
97
- vehicle_multiplier = {"bike": 1.05, "van": 0.95, "truck": 1.02, "ev van": 0.97}
98
- weather_multiplier = {
99
- "clear": 0.95, "cloudy": 1.00, "foggy": 1.05, "rainy": 1.10,
100
- "stormy": 1.20, "cold": 1.02, "hot": 1.02, "windy": 1.03
101
- }
102
- mode_multiplier = {"same day": 1.05, "express": 1.02, "two day": 0.97, "standard": 1.00}
103
- region_multiplier = {"central": 1.08, "north": 1.00, "south": 1.01, "east": 1.02, "west": 1.03}
104
-
105
- data["delivery_time_hours"] = (
106
- data["expected_time_hours"]
107
- * data["vehicle_type"].map(vehicle_multiplier).fillna(1.00)
108
- * data["weather_condition"].map(weather_multiplier).fillna(1.00)
109
- * data["delivery_mode"].map(mode_multiplier).fillna(1.00)
110
- * data["region"].map(region_multiplier).fillna(1.00)
111
- ).clip(lower=0.5)
112
-
113
- # Controlled delay distribution
114
- ratio = data["delivery_time_hours"] / data["expected_time_hours"]
115
- data["delivery_time_hours"] = np.where(
116
- ratio < 0.98, data["expected_time_hours"] * 0.95,
117
- np.where(ratio < 1.05, data["expected_time_hours"] * 1.00,
118
- np.where(ratio < 1.15, data["expected_time_hours"] * 1.10,
119
- data["expected_time_hours"] * 1.25))
120
- )
121
-
122
- data["expected_time_hours"] = data["expected_time_hours"].round(2)
123
- data["delivery_time_hours"] = data["delivery_time_hours"].round(2)
124
- data["delay_hours"] = (data["delivery_time_hours"] - data["expected_time_hours"]).round(2)
125
- data["calculated_delay"] = np.where(data["delay_hours"] > 0, "yes", "no")
126
-
127
- def delay_score(delay):
128
- if delay <= 0:
129
- base = 5
130
- elif delay <= 2:
131
- base = 4
132
- elif delay <= 5:
133
- base = 3
134
- elif delay <= 8:
135
- base = 2
136
- else:
137
- base = 1
138
- noise = random.choices([-1, 0, 1], weights=[1, 3, 1])[0]
139
- return int(np.clip(base + noise, 1, 5))
140
-
141
- def label(score):
142
- if score >= 5:
143
- return "Excellent"
144
- if score == 4:
145
- return "Good"
146
- if score == 3:
147
- return "Average"
148
- if score == 2:
149
- return "Poor"
150
- return "Critical"
151
-
152
- data["delay_score"] = data["delay_hours"].apply(delay_score)
153
- data["performance_label"] = data["delay_score"].apply(label)
154
- data["distance_category"] = pd.cut(
155
- data["distance_km"],
156
- bins=[0, 50, 150, 300, float("inf")],
157
- labels=["Short", "Medium", "Long", "Very Long"]
158
- )
159
- return data
160
-
161
-
162
- def kpi_cards(data):
163
- total = len(data)
164
- delay_rate = (data["calculated_delay"].eq("yes").mean() * 100) if total else 0
165
- avg_delay = data["delay_hours"].mean()
166
- avg_score = data["delay_score"].mean()
167
- avg_cost = data["delivery_cost"].mean()
168
- return (
169
- f"### KPI Summary\n"
170
- f"| KPI | Value |\n|---|---:|\n"
171
- f"| Total deliveries analyzed | {total:,.0f} |\n"
172
- f"| Delay rate | {delay_rate:.1f}% |\n"
173
- f"| Average delay hours | {avg_delay:.2f} |\n"
174
- f"| Average delay score | {avg_score:.2f} / 5 |\n"
175
- f"| Average delivery cost | {avg_cost:.2f} |"
176
- )
177
-
178
-
179
- def group_summary(data, group_col):
180
- return (
181
- data.groupby(group_col, observed=False)
182
- .agg(
183
- deliveries=("delivery_id", "count"),
184
- avg_delay_hours=("delay_hours", "mean"),
185
- delay_rate_pct=("calculated_delay", lambda x: (x.eq("yes").mean() * 100)),
186
- avg_delay_score=("delay_score", "mean"),
187
- avg_cost=("delivery_cost", "mean"),
188
- avg_rating=("delivery_rating", "mean")
189
- )
190
- .round(2)
191
- .sort_values("avg_delay_hours", ascending=False)
192
- .reset_index()
193
- )
194
-
195
-
196
- def make_figures(data):
197
- vehicle = group_summary(data, "vehicle_type")
198
- weather = group_summary(data, "weather_condition")
199
- region = group_summary(data, "region")
200
- mode = group_summary(data, "delivery_mode")
201
-
202
- fig_vehicle = px.bar(
203
- vehicle, x="vehicle_type", y="avg_delay_hours", text="avg_delay_hours",
204
- title="Average Delay by Vehicle Type",
205
- labels={"vehicle_type": "Vehicle type", "avg_delay_hours": "Average delay hours"}
206
- )
207
- fig_weather = px.bar(
208
- weather, x="weather_condition", y="delay_rate_pct", text="delay_rate_pct",
209
- title="Delay Rate by Weather Condition",
210
- labels={"weather_condition": "Weather", "delay_rate_pct": "Delay rate (%)"}
211
- )
212
- fig_region = px.bar(
213
- region, x="region", y="avg_delay_hours", text="avg_delay_hours",
214
- title="Average Delay by Region",
215
- labels={"region": "Region", "avg_delay_hours": "Average delay hours"}
216
- )
217
- fig_mode = px.bar(
218
- mode, x="delivery_mode", y="delay_rate_pct", text="delay_rate_pct",
219
- title="Delay Rate by Delivery Mode",
220
- labels={"delivery_mode": "Delivery mode", "delay_rate_pct": "Delay rate (%)"}
221
- )
222
- fig_scatter = px.scatter(
223
- data.sample(min(len(data), 3000), random_state=2025),
224
- x="distance_km", y="delay_hours", color="vehicle_type",
225
- hover_data=["weather_condition", "region", "delivery_mode"],
226
- title="Distance vs Delay Hours"
227
- )
228
- fig_pie = px.pie(
229
- data, names="performance_label", title="Performance Label Distribution"
230
- )
231
- return fig_vehicle, fig_weather, fig_region, fig_mode, fig_scatter, fig_pie
232
-
233
-
234
- def ai_business_recommendations(data):
235
- vehicle = group_summary(data, "vehicle_type")
236
- weather = group_summary(data, "weather_condition")
237
- region = group_summary(data, "region")
238
- mode = group_summary(data, "delivery_mode")
239
- distance = group_summary(data, "distance_category")
240
-
241
- worst_vehicle = vehicle.iloc[0]
242
- worst_weather = weather.iloc[0]
243
- worst_region = region.iloc[0]
244
- worst_mode = mode.iloc[0]
245
- worst_distance = distance.iloc[0]
246
-
247
- return f"""
248
- ## AI-enhanced Management Interpretation
249
-
250
- ### Main delay-risk factors
251
- 1. **Vehicle risk:** `{worst_vehicle['vehicle_type']}` has the highest average delay at **{worst_vehicle['avg_delay_hours']:.2f} hours**.
252
- 2. **Weather risk:** `{worst_weather['weather_condition']}` has the highest delay rate at **{worst_weather['delay_rate_pct']:.1f}%**.
253
- 3. **Regional risk:** `{worst_region['region']}` has the highest average delay at **{worst_region['avg_delay_hours']:.2f} hours**.
254
- 4. **Delivery mode risk:** `{worst_mode['delivery_mode']}` has the highest delay rate at **{worst_mode['delay_rate_pct']:.1f}%**.
255
- 5. **Distance risk:** `{worst_distance['distance_category']}` deliveries show the highest average delay at **{worst_distance['avg_delay_hours']:.2f} hours**.
256
-
257
- ### Recommended management actions
258
- - **Prioritize capacity planning** for the worst-performing vehicle and region combination.
259
- - **Add weather-based buffer rules** for high-risk conditions before accepting customer delivery promises.
260
- - **Use dynamic routing** for long-distance and central-region deliveries because these create operational pressure.
261
- - **Monitor same-day/express promises carefully** because fast delivery modes are more sensitive to small disruptions.
262
- - **Create an exception dashboard** that flags deliveries where expected time is unrealistic compared with distance, vehicle, weather, and region.
263
-
264
- ### Business value of this automation
265
- This app turns raw delivery data into cleaned data, synthetic scenario data, KPI dashboards, risk rankings, and management recommendations automatically. Instead of manually checking Excel tables, managers can upload a CSV and immediately see where delay risk is highest.
266
- """
267
-
268
-
269
- def qualitative_analysis():
270
- return """
271
- ## Qualitative Analysis Layer
272
-
273
- The business challenge is not only numerical. Delivery delays also affect customer trust, operational workload, and brand perception.
274
-
275
- ### Operational interpretation
276
- - Bad weather increases uncertainty and makes delivery planning less reliable.
277
- - Certain vehicle types are better suited to specific delivery contexts.
278
- - Central regions may create congestion risk and therefore need additional time buffers.
279
- - Long-distance deliveries require more careful promise management.
280
-
281
- ### Customer impact
282
- - Delays reduce satisfaction even when the package eventually arrives.
283
- - Customers are especially sensitive to delays in express or same-day delivery.
284
- - Better delivery estimates can improve trust because customers prefer realistic promises over optimistic but unreliable promises.
285
-
286
- ### Strategic interpretation
287
- The company should not only ask, “Which deliveries are late?” It should ask, “Which operational conditions make lateness predictable before the delivery happens?”
288
- """
289
-
290
-
291
- def run_dashboard(file):
292
- cleaned, report = clean_data(file)
293
- if cleaned is None:
294
- empty = pd.DataFrame()
295
- blank_fig = px.scatter(title="Upload a valid CSV to generate the dashboard")
296
- return report, empty, "", blank_fig, blank_fig, blank_fig, blank_fig, blank_fig, blank_fig, "", ""
297
-
298
- data = generate_synthetic_analysis(cleaned)
299
- figs = make_figures(data)
300
- return (
301
- report,
302
- data.head(100),
303
- kpi_cards(data),
304
- *figs,
305
- ai_business_recommendations(data),
306
- qualitative_analysis()
307
- )
308
-
309
-
310
- def download_processed_file(file):
311
- cleaned, report = clean_data(file)
312
- if cleaned is None:
313
- return None
314
- data = generate_synthetic_analysis(cleaned)
315
- output_path = "processed_delivery_dashboard_data.csv"
316
- data.to_csv(output_path, index=False)
317
- return output_path
318
-
319
-
320
- with gr.Blocks(theme=gr.themes.Soft(), title="AI Delivery Performance Dashboard") as demo:
321
- gr.Markdown(
322
- """
323
- # 🚚 AI Delivery Performance Dashboard
324
- Upload delivery logistics data and automatically generate a cleaned dataset, synthetic delay logic, KPI dashboard, quantitative charts, and AI-enhanced management recommendations.
325
-
326
- **Business challenge:** Which operational factors create the highest delivery delay risk, and what should management do?
327
- """
328
- )
329
-
330
- with gr.Row():
331
- file_input = gr.File(label="Upload Delivery_Logistics.csv", file_types=[".csv"])
332
- run_button = gr.Button("Generate Dashboard", variant="primary")
333
-
334
- cleaning_report = gr.Markdown()
335
-
336
- with gr.Tab("1. KPI Overview"):
337
- kpi_output = gr.Markdown()
338
- preview_table = gr.Dataframe(label="Preview of Processed Data", interactive=False)
339
- download_button = gr.Button("Download Processed CSV")
340
- download_file = gr.File(label="Processed CSV")
341
-
342
- with gr.Tab("2. Quantitative Analysis"):
343
- with gr.Row():
344
- fig_vehicle = gr.Plot()
345
- fig_weather = gr.Plot()
346
- with gr.Row():
347
- fig_region = gr.Plot()
348
- fig_mode = gr.Plot()
349
- with gr.Row():
350
- fig_scatter = gr.Plot()
351
- fig_pie = gr.Plot()
352
-
353
- with gr.Tab("3. AI Management Recommendations"):
354
- recommendations_output = gr.Markdown()
355
-
356
- with gr.Tab("4. Qualitative Analysis"):
357
- qualitative_output = gr.Markdown(value=qualitative_analysis())
358
-
359
- with gr.Tab("5. How the Automation Works"):
360
- gr.Markdown(
361
- """
362
- ## Automation logic
363
- 1. **Data extraction:** The user uploads a CSV file.
364
- 2. **Data cleaning:** The app standardizes column names, removes duplicates, converts time columns, and fills missing values.
365
- 3. **Synthetic data generation:** The app creates realistic expected and actual delivery times using distance, vehicle type, weather, delivery mode, and region.
366
- 4. **Automated analysis:** The app calculates delay hours, delay score, performance labels, risk rankings, and KPIs.
367
- 5. **AI-enhanced interpretation:** The app converts the numerical findings into business recommendations for managers.
368
-
369
- ## Why this fulfills the project instructions
370
- - Uses real-world/found delivery logistics data.
371
- - Adds synthetic data logic to create realistic delay scenarios.
372
- - Includes quantitative analysis through KPIs, rankings, and charts.
373
- - Includes qualitative analysis through operational and customer interpretation.
374
- - Automates data cleaning, generation, analysis, and recommendation writing.
375
- """
376
- )
377
-
378
- run_button.click(
379
- fn=run_dashboard,
380
- inputs=file_input,
381
- outputs=[
382
- cleaning_report, preview_table, kpi_output,
383
- fig_vehicle, fig_weather, fig_region, fig_mode, fig_scatter, fig_pie,
384
- recommendations_output, qualitative_output
385
- ]
386
- )
387
- download_button.click(fn=download_processed_file, inputs=file_input, outputs=download_file)
388
-
389
- if __name__ == "__main__":
390
- demo.launch()