charlottegers commited on
Commit
4b7047d
·
verified ·
1 Parent(s): 6461b68

Upload 4 files

Browse files
Delivery_Logistics.csv ADDED
The diff for this file is too large to render. See raw diff
 
app.py ADDED
@@ -0,0 +1,390 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import warnings
3
+ from io import StringIO
4
+
5
+ import gradio as gr
6
+ import numpy as np
7
+ import pandas as pd
8
+ import plotly.express as px
9
+
10
+ warnings.filterwarnings("ignore")
11
+ random.seed(2025)
12
+ np.random.seed(2025)
13
+
14
+ NUMERIC_COLS = [
15
+ "distance_km", "package_weight_kg", "delivery_time_hours",
16
+ "expected_time_hours", "delivery_rating", "delivery_cost"
17
+ ]
18
+ CATEGORICAL_COLS = [
19
+ "delivery_partner", "package_type", "vehicle_type", "delivery_mode",
20
+ "region", "weather_condition", "delayed", "delivery_status"
21
+ ]
22
+ REQUIRED_COLS = [
23
+ "delivery_id", "delivery_partner", "package_type", "vehicle_type",
24
+ "delivery_mode", "region", "weather_condition", "distance_km",
25
+ "package_weight_kg", "delivery_time_hours", "expected_time_hours",
26
+ "delayed", "delivery_status", "delivery_rating", "delivery_cost"
27
+ ]
28
+
29
+
30
+ def _convert_time_column(series):
31
+ """Converts normal numeric values or timestamp-like time values into numeric hours."""
32
+ if pd.api.types.is_numeric_dtype(series):
33
+ return pd.to_numeric(series, errors="coerce")
34
+ return pd.to_numeric(series.astype(str).str.split(".").str[-1], errors="coerce")
35
+
36
+
37
+ def clean_data(file):
38
+ if file is None:
39
+ return None, "Please upload a CSV file first."
40
+
41
+ df = pd.read_csv(file.name)
42
+ original_rows = len(df)
43
+ df.columns = df.columns.str.strip().str.lower()
44
+
45
+ missing_cols = [c for c in REQUIRED_COLS if c not in df.columns]
46
+ if missing_cols:
47
+ return None, f"Missing columns: {missing_cols}"
48
+
49
+ df = df.drop_duplicates().copy()
50
+
51
+ df["delivery_time_hours"] = _convert_time_column(df["delivery_time_hours"])
52
+ df["expected_time_hours"] = _convert_time_column(df["expected_time_hours"])
53
+
54
+ for col in NUMERIC_COLS:
55
+ df[col] = pd.to_numeric(df[col], errors="coerce")
56
+ df[col] = df[col].fillna(df[col].median())
57
+
58
+ for col in CATEGORICAL_COLS:
59
+ df[col] = df[col].astype(str).str.strip().str.lower()
60
+ mode_value = df[col].mode()[0] if not df[col].mode().empty else "unknown"
61
+ df[col] = df[col].replace("nan", np.nan).fillna(mode_value)
62
+
63
+ report = (
64
+ f"Data cleaned successfully. Original rows: {original_rows:,}. "
65
+ f"Rows after duplicate removal: {len(df):,}. Missing values handled."
66
+ )
67
+ return df, report
68
+
69
+
70
+ def generate_synthetic_analysis(df):
71
+ if df is None:
72
+ return None
73
+
74
+ data = df.copy()
75
+
76
+ # Make text consistent
77
+ for col in ["vehicle_type", "weather_condition", "delivery_mode", "region"]:
78
+ data[col] = data[col].astype(str).str.strip().str.lower()
79
+
80
+ # Expected time logic: distance plus operational difficulty
81
+ vehicle_adjustment = {"bike": 1.2, "van": 0.5, "truck": 0.8, "ev van": 0.4}
82
+ weather_adjustment = {
83
+ "clear": 0.0, "cloudy": 0.2, "foggy": 0.6, "rainy": 0.8,
84
+ "stormy": 1.2, "cold": 0.2, "hot": 0.2, "windy": 0.3
85
+ }
86
+ mode_adjustment = {"same day": 0.3, "express": 0.2, "two day": 0.7, "standard": 0.5}
87
+ region_adjustment = {"central": 0.6, "north": 0.3, "south": 0.3, "east": 0.4, "west": 0.4}
88
+
89
+ data["expected_time_hours"] = (
90
+ data["distance_km"] / 45
91
+ + data["vehicle_type"].map(vehicle_adjustment).fillna(0.5)
92
+ + data["weather_condition"].map(weather_adjustment).fillna(0.3)
93
+ + data["delivery_mode"].map(mode_adjustment).fillna(0.4)
94
+ + data["region"].map(region_adjustment).fillna(0.3)
95
+ ).clip(lower=0.5)
96
+
97
+ vehicle_multiplier = {"bike": 1.05, "van": 0.95, "truck": 1.02, "ev van": 0.97}
98
+ weather_multiplier = {
99
+ "clear": 0.95, "cloudy": 1.00, "foggy": 1.05, "rainy": 1.10,
100
+ "stormy": 1.20, "cold": 1.02, "hot": 1.02, "windy": 1.03
101
+ }
102
+ mode_multiplier = {"same day": 1.05, "express": 1.02, "two day": 0.97, "standard": 1.00}
103
+ region_multiplier = {"central": 1.08, "north": 1.00, "south": 1.01, "east": 1.02, "west": 1.03}
104
+
105
+ data["delivery_time_hours"] = (
106
+ data["expected_time_hours"]
107
+ * data["vehicle_type"].map(vehicle_multiplier).fillna(1.00)
108
+ * data["weather_condition"].map(weather_multiplier).fillna(1.00)
109
+ * data["delivery_mode"].map(mode_multiplier).fillna(1.00)
110
+ * data["region"].map(region_multiplier).fillna(1.00)
111
+ ).clip(lower=0.5)
112
+
113
+ # Controlled delay distribution
114
+ ratio = data["delivery_time_hours"] / data["expected_time_hours"]
115
+ data["delivery_time_hours"] = np.where(
116
+ ratio < 0.98, data["expected_time_hours"] * 0.95,
117
+ np.where(ratio < 1.05, data["expected_time_hours"] * 1.00,
118
+ np.where(ratio < 1.15, data["expected_time_hours"] * 1.10,
119
+ data["expected_time_hours"] * 1.25))
120
+ )
121
+
122
+ data["expected_time_hours"] = data["expected_time_hours"].round(2)
123
+ data["delivery_time_hours"] = data["delivery_time_hours"].round(2)
124
+ data["delay_hours"] = (data["delivery_time_hours"] - data["expected_time_hours"]).round(2)
125
+ data["calculated_delay"] = np.where(data["delay_hours"] > 0, "yes", "no")
126
+
127
+ def delay_score(delay):
128
+ if delay <= 0:
129
+ base = 5
130
+ elif delay <= 2:
131
+ base = 4
132
+ elif delay <= 5:
133
+ base = 3
134
+ elif delay <= 8:
135
+ base = 2
136
+ else:
137
+ base = 1
138
+ noise = random.choices([-1, 0, 1], weights=[1, 3, 1])[0]
139
+ return int(np.clip(base + noise, 1, 5))
140
+
141
+ def label(score):
142
+ if score >= 5:
143
+ return "Excellent"
144
+ if score == 4:
145
+ return "Good"
146
+ if score == 3:
147
+ return "Average"
148
+ if score == 2:
149
+ return "Poor"
150
+ return "Critical"
151
+
152
+ data["delay_score"] = data["delay_hours"].apply(delay_score)
153
+ data["performance_label"] = data["delay_score"].apply(label)
154
+ data["distance_category"] = pd.cut(
155
+ data["distance_km"],
156
+ bins=[0, 50, 150, 300, float("inf")],
157
+ labels=["Short", "Medium", "Long", "Very Long"]
158
+ )
159
+ return data
160
+
161
+
162
+ def kpi_cards(data):
163
+ total = len(data)
164
+ delay_rate = (data["calculated_delay"].eq("yes").mean() * 100) if total else 0
165
+ avg_delay = data["delay_hours"].mean()
166
+ avg_score = data["delay_score"].mean()
167
+ avg_cost = data["delivery_cost"].mean()
168
+ return (
169
+ f"### KPI Summary\n"
170
+ f"| KPI | Value |\n|---|---:|\n"
171
+ f"| Total deliveries analyzed | {total:,.0f} |\n"
172
+ f"| Delay rate | {delay_rate:.1f}% |\n"
173
+ f"| Average delay hours | {avg_delay:.2f} |\n"
174
+ f"| Average delay score | {avg_score:.2f} / 5 |\n"
175
+ f"| Average delivery cost | {avg_cost:.2f} |"
176
+ )
177
+
178
+
179
+ def group_summary(data, group_col):
180
+ return (
181
+ data.groupby(group_col, observed=False)
182
+ .agg(
183
+ deliveries=("delivery_id", "count"),
184
+ avg_delay_hours=("delay_hours", "mean"),
185
+ delay_rate_pct=("calculated_delay", lambda x: (x.eq("yes").mean() * 100)),
186
+ avg_delay_score=("delay_score", "mean"),
187
+ avg_cost=("delivery_cost", "mean"),
188
+ avg_rating=("delivery_rating", "mean")
189
+ )
190
+ .round(2)
191
+ .sort_values("avg_delay_hours", ascending=False)
192
+ .reset_index()
193
+ )
194
+
195
+
196
+ def make_figures(data):
197
+ vehicle = group_summary(data, "vehicle_type")
198
+ weather = group_summary(data, "weather_condition")
199
+ region = group_summary(data, "region")
200
+ mode = group_summary(data, "delivery_mode")
201
+
202
+ fig_vehicle = px.bar(
203
+ vehicle, x="vehicle_type", y="avg_delay_hours", text="avg_delay_hours",
204
+ title="Average Delay by Vehicle Type",
205
+ labels={"vehicle_type": "Vehicle type", "avg_delay_hours": "Average delay hours"}
206
+ )
207
+ fig_weather = px.bar(
208
+ weather, x="weather_condition", y="delay_rate_pct", text="delay_rate_pct",
209
+ title="Delay Rate by Weather Condition",
210
+ labels={"weather_condition": "Weather", "delay_rate_pct": "Delay rate (%)"}
211
+ )
212
+ fig_region = px.bar(
213
+ region, x="region", y="avg_delay_hours", text="avg_delay_hours",
214
+ title="Average Delay by Region",
215
+ labels={"region": "Region", "avg_delay_hours": "Average delay hours"}
216
+ )
217
+ fig_mode = px.bar(
218
+ mode, x="delivery_mode", y="delay_rate_pct", text="delay_rate_pct",
219
+ title="Delay Rate by Delivery Mode",
220
+ labels={"delivery_mode": "Delivery mode", "delay_rate_pct": "Delay rate (%)"}
221
+ )
222
+ fig_scatter = px.scatter(
223
+ data.sample(min(len(data), 3000), random_state=2025),
224
+ x="distance_km", y="delay_hours", color="vehicle_type",
225
+ hover_data=["weather_condition", "region", "delivery_mode"],
226
+ title="Distance vs Delay Hours"
227
+ )
228
+ fig_pie = px.pie(
229
+ data, names="performance_label", title="Performance Label Distribution"
230
+ )
231
+ return fig_vehicle, fig_weather, fig_region, fig_mode, fig_scatter, fig_pie
232
+
233
+
234
+ def ai_business_recommendations(data):
235
+ vehicle = group_summary(data, "vehicle_type")
236
+ weather = group_summary(data, "weather_condition")
237
+ region = group_summary(data, "region")
238
+ mode = group_summary(data, "delivery_mode")
239
+ distance = group_summary(data, "distance_category")
240
+
241
+ worst_vehicle = vehicle.iloc[0]
242
+ worst_weather = weather.iloc[0]
243
+ worst_region = region.iloc[0]
244
+ worst_mode = mode.iloc[0]
245
+ worst_distance = distance.iloc[0]
246
+
247
+ return f"""
248
+ ## AI-enhanced Management Interpretation
249
+
250
+ ### Main delay-risk factors
251
+ 1. **Vehicle risk:** `{worst_vehicle['vehicle_type']}` has the highest average delay at **{worst_vehicle['avg_delay_hours']:.2f} hours**.
252
+ 2. **Weather risk:** `{worst_weather['weather_condition']}` has the highest delay rate at **{worst_weather['delay_rate_pct']:.1f}%**.
253
+ 3. **Regional risk:** `{worst_region['region']}` has the highest average delay at **{worst_region['avg_delay_hours']:.2f} hours**.
254
+ 4. **Delivery mode risk:** `{worst_mode['delivery_mode']}` has the highest delay rate at **{worst_mode['delay_rate_pct']:.1f}%**.
255
+ 5. **Distance risk:** `{worst_distance['distance_category']}` deliveries show the highest average delay at **{worst_distance['avg_delay_hours']:.2f} hours**.
256
+
257
+ ### Recommended management actions
258
+ - **Prioritize capacity planning** for the worst-performing vehicle and region combination.
259
+ - **Add weather-based buffer rules** for high-risk conditions before accepting customer delivery promises.
260
+ - **Use dynamic routing** for long-distance and central-region deliveries because these create operational pressure.
261
+ - **Monitor same-day/express promises carefully** because fast delivery modes are more sensitive to small disruptions.
262
+ - **Create an exception dashboard** that flags deliveries where expected time is unrealistic compared with distance, vehicle, weather, and region.
263
+
264
+ ### Business value of this automation
265
+ This app turns raw delivery data into cleaned data, synthetic scenario data, KPI dashboards, risk rankings, and management recommendations automatically. Instead of manually checking Excel tables, managers can upload a CSV and immediately see where delay risk is highest.
266
+ """
267
+
268
+
269
+ def qualitative_analysis():
270
+ return """
271
+ ## Qualitative Analysis Layer
272
+
273
+ The business challenge is not only numerical. Delivery delays also affect customer trust, operational workload, and brand perception.
274
+
275
+ ### Operational interpretation
276
+ - Bad weather increases uncertainty and makes delivery planning less reliable.
277
+ - Certain vehicle types are better suited to specific delivery contexts.
278
+ - Central regions may create congestion risk and therefore need additional time buffers.
279
+ - Long-distance deliveries require more careful promise management.
280
+
281
+ ### Customer impact
282
+ - Delays reduce satisfaction even when the package eventually arrives.
283
+ - Customers are especially sensitive to delays in express or same-day delivery.
284
+ - Better delivery estimates can improve trust because customers prefer realistic promises over optimistic but unreliable promises.
285
+
286
+ ### Strategic interpretation
287
+ The company should not only ask, “Which deliveries are late?” It should ask, “Which operational conditions make lateness predictable before the delivery happens?”
288
+ """
289
+
290
+
291
+ def run_dashboard(file):
292
+ cleaned, report = clean_data(file)
293
+ if cleaned is None:
294
+ empty = pd.DataFrame()
295
+ blank_fig = px.scatter(title="Upload a valid CSV to generate the dashboard")
296
+ return report, empty, "", blank_fig, blank_fig, blank_fig, blank_fig, blank_fig, blank_fig, "", ""
297
+
298
+ data = generate_synthetic_analysis(cleaned)
299
+ figs = make_figures(data)
300
+ return (
301
+ report,
302
+ data.head(100),
303
+ kpi_cards(data),
304
+ *figs,
305
+ ai_business_recommendations(data),
306
+ qualitative_analysis()
307
+ )
308
+
309
+
310
+ def download_processed_file(file):
311
+ cleaned, report = clean_data(file)
312
+ if cleaned is None:
313
+ return None
314
+ data = generate_synthetic_analysis(cleaned)
315
+ output_path = "processed_delivery_dashboard_data.csv"
316
+ data.to_csv(output_path, index=False)
317
+ return output_path
318
+
319
+
320
+ with gr.Blocks(theme=gr.themes.Soft(), title="AI Delivery Performance Dashboard") as demo:
321
+ gr.Markdown(
322
+ """
323
+ # 🚚 AI Delivery Performance Dashboard
324
+ Upload delivery logistics data and automatically generate a cleaned dataset, synthetic delay logic, KPI dashboard, quantitative charts, and AI-enhanced management recommendations.
325
+
326
+ **Business challenge:** Which operational factors create the highest delivery delay risk, and what should management do?
327
+ """
328
+ )
329
+
330
+ with gr.Row():
331
+ file_input = gr.File(label="Upload Delivery_Logistics.csv", file_types=[".csv"])
332
+ run_button = gr.Button("Generate Dashboard", variant="primary")
333
+
334
+ cleaning_report = gr.Markdown()
335
+
336
+ with gr.Tab("1. KPI Overview"):
337
+ kpi_output = gr.Markdown()
338
+ preview_table = gr.Dataframe(label="Preview of Processed Data", interactive=False)
339
+ download_button = gr.Button("Download Processed CSV")
340
+ download_file = gr.File(label="Processed CSV")
341
+
342
+ with gr.Tab("2. Quantitative Analysis"):
343
+ with gr.Row():
344
+ fig_vehicle = gr.Plot()
345
+ fig_weather = gr.Plot()
346
+ with gr.Row():
347
+ fig_region = gr.Plot()
348
+ fig_mode = gr.Plot()
349
+ with gr.Row():
350
+ fig_scatter = gr.Plot()
351
+ fig_pie = gr.Plot()
352
+
353
+ with gr.Tab("3. AI Management Recommendations"):
354
+ recommendations_output = gr.Markdown()
355
+
356
+ with gr.Tab("4. Qualitative Analysis"):
357
+ qualitative_output = gr.Markdown(value=qualitative_analysis())
358
+
359
+ with gr.Tab("5. How the Automation Works"):
360
+ gr.Markdown(
361
+ """
362
+ ## Automation logic
363
+ 1. **Data extraction:** The user uploads a CSV file.
364
+ 2. **Data cleaning:** The app standardizes column names, removes duplicates, converts time columns, and fills missing values.
365
+ 3. **Synthetic data generation:** The app creates realistic expected and actual delivery times using distance, vehicle type, weather, delivery mode, and region.
366
+ 4. **Automated analysis:** The app calculates delay hours, delay score, performance labels, risk rankings, and KPIs.
367
+ 5. **AI-enhanced interpretation:** The app converts the numerical findings into business recommendations for managers.
368
+
369
+ ## Why this fulfills the project instructions
370
+ - Uses real-world/found delivery logistics data.
371
+ - Adds synthetic data logic to create realistic delay scenarios.
372
+ - Includes quantitative analysis through KPIs, rankings, and charts.
373
+ - Includes qualitative analysis through operational and customer interpretation.
374
+ - Automates data cleaning, generation, analysis, and recommendation writing.
375
+ """
376
+ )
377
+
378
+ run_button.click(
379
+ fn=run_dashboard,
380
+ inputs=file_input,
381
+ outputs=[
382
+ cleaning_report, preview_table, kpi_output,
383
+ fig_vehicle, fig_weather, fig_region, fig_mode, fig_scatter, fig_pie,
384
+ recommendations_output, qualitative_output
385
+ ]
386
+ )
387
+ download_button.click(fn=download_processed_file, inputs=file_input, outputs=download_file)
388
+
389
+ if __name__ == "__main__":
390
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio
2
+ pandas
3
+ numpy
4
+ plotly
synthetic_delivery_data.csv ADDED
The diff for this file is too large to render. See raw diff