import gradio as gr import pandas as pd import numpy as np import plotly.express as px import plotly.graph_objects as go import random import warnings from datetime import datetime warnings.filterwarnings("ignore") random.seed(2025) np.random.seed(2025) APP_TITLE = "AI Delivery Performance Intelligence Dashboard" REQUIRED_COLUMNS = [ "delivery_id", "delivery_partner", "package_type", "vehicle_type", "delivery_mode", "region", "weather_condition", "distance_km", "package_weight_kg", "delivery_time_hours", "expected_time_hours", "delayed", "delivery_status", "delivery_rating", "delivery_cost" ] NUMERIC_COLS = [ "distance_km", "package_weight_kg", "delivery_time_hours", "expected_time_hours", "delivery_rating", "delivery_cost" ] CATEGORICAL_COLS = [ "delivery_partner", "package_type", "vehicle_type", "delivery_mode", "region", "weather_condition", "delayed", "delivery_status" ] CUSTOM_CSS = """ .gradio-container { max-width: 1500px !important; margin: auto !important; background: linear-gradient(135deg, #f8fafc 0%, #eef2ff 45%, #ffffff 100%); } #hero { padding: 34px 38px; border-radius: 28px; background: linear-gradient(135deg, #111827 0%, #1e293b 48%, #4f46e5 100%); color: white; box-shadow: 0 22px 55px rgba(15, 23, 42, 0.22); margin-bottom: 18px; } #hero h1 { font-size: 38px; line-height: 1.05; margin-bottom: 8px; color: white; } #hero p { font-size: 16px; opacity: 0.92; color: white; } .metric-card { padding: 24px; border-radius: 24px; background: rgba(255,255,255,0.90); border: 1px solid rgba(226,232,240,0.9); box-shadow: 0 16px 40px rgba(15, 23, 42, 0.08); min-height: 150px; } .metric-label { font-size: 13px; color: #64748b; text-transform: uppercase; letter-spacing: 0.08em; font-weight: 700; } .metric-value { font-size: 34px; color: #111827; font-weight: 850; margin-top: 8px; } .metric-note { font-size: 13px; color: #64748b; margin-top: 8px; } .insight-box { padding: 22px 24px; border-radius: 24px; background: white; border: 1px solid #e5e7eb; box-shadow: 0 12px 32px rgba(15, 23, 42, 0.08); } .warning-box { padding: 18px 22px; border-radius: 20px; background: #fff7ed; border: 1px solid #fed7aa; } .success-box { padding: 18px 22px; border-radius: 20px; background: #ecfdf5; border: 1px solid #bbf7d0; } .small-muted { color: #64748b; font-size: 13px; } """ def _safe_lower_text(df): for col in df.select_dtypes(include=["object"]).columns: df[col] = df[col].astype(str).str.strip() return df def _extract_time_number(series): s = series.astype(str).str.strip() # Handles strange strings like 1970-01-01 00:00:00.000000008 by extracting the final number. extracted = s.str.extract(r"(\d+\.?\d*)$")[0] numeric = pd.to_numeric(extracted, errors="coerce") # If a normal numeric string was provided, use it. fallback = pd.to_numeric(s, errors="coerce") return numeric.fillna(fallback) def validate_and_clean(file): if file is None: raise gr.Error("Please upload a CSV file first.") df = pd.read_csv(file.name) original_rows = len(df) df.columns = df.columns.str.strip().str.lower() missing_cols = [c for c in REQUIRED_COLUMNS if c not in df.columns] if missing_cols: raise gr.Error( "Your file is missing required columns: " + ", ".join(missing_cols) + ". Please upload Delivery_Logistics.csv or rename your columns." ) df = df.drop_duplicates() duplicate_rows = original_rows - len(df) df = _safe_lower_text(df) for col in ["delivery_time_hours", "expected_time_hours"]: df[col] = _extract_time_number(df[col]) for col in NUMERIC_COLS: df[col] = pd.to_numeric(df[col], errors="coerce") median_value = df[col].median() if pd.isna(median_value): median_value = 0 df[col] = df[col].fillna(median_value) for col in CATEGORICAL_COLS: df[col] = df[col].replace(["nan", "None", ""], np.nan) mode_value = df[col].mode(dropna=True) fill_value = mode_value.iloc[0] if len(mode_value) else "unknown" df[col] = df[col].fillna(fill_value).astype(str).str.strip().str.lower() cleaning_report = { "original_rows": original_rows, "final_rows": len(df), "duplicates_removed": duplicate_rows, "columns": len(df.columns), } return df, cleaning_report def enrich_delivery_logic(df, weather_sensitivity=1.0, traffic_pressure=1.0, capacity_pressure=1.0): out = df.copy() text_cols = ["vehicle_type", "weather_condition", "delivery_mode", "region", "package_type", "delivery_partner"] for col in text_cols: out[col] = out[col].astype(str).str.strip().str.lower() # Expected time model out["expected_time_hours"] = out["distance_km"] / 45 vehicle_adjustment = {"bike": 1.20, "van": 0.50, "truck": 0.80, "ev van": 0.40} weather_adjustment = { "clear": 0.00, "cloudy": 0.20, "foggy": 0.60, "rainy": 0.80, "stormy": 1.20, "cold": 0.20, "hot": 0.20, "windy": 0.30 } mode_adjustment = {"same day": 0.30, "express": 0.20, "two day": 0.70, "standard": 0.50} region_adjustment = {"central": 0.60, "north": 0.30, "south": 0.30, "east": 0.40, "west": 0.40} out["expected_time_hours"] = ( out["expected_time_hours"] + out["vehicle_type"].map(vehicle_adjustment).fillna(0.50) + out["weather_condition"].map(weather_adjustment).fillna(0.30) * weather_sensitivity + out["delivery_mode"].map(mode_adjustment).fillna(0.40) + out["region"].map(region_adjustment).fillna(0.30) * traffic_pressure ) # Actual time multipliers vehicle_actual_multiplier = {"bike": 1.05, "van": 0.95, "truck": 1.02, "ev van": 0.97} weather_actual_multiplier = { "clear": 0.95, "cloudy": 1.00, "foggy": 1.05, "rainy": 1.10, "stormy": 1.20, "cold": 1.02, "hot": 1.02, "windy": 1.03 } mode_actual_multiplier = {"same day": 1.05, "express": 1.02, "two day": 0.97, "standard": 1.00} region_actual_multiplier = {"central": 1.08, "north": 1.00, "south": 1.01, "east": 1.02, "west": 1.03} out["delivery_time_hours"] = ( out["expected_time_hours"] * out["vehicle_type"].map(vehicle_actual_multiplier).fillna(1.00) * (out["weather_condition"].map(weather_actual_multiplier).fillna(1.00) ** weather_sensitivity) * out["delivery_mode"].map(mode_actual_multiplier).fillna(1.00) * (out["region"].map(region_actual_multiplier).fillna(1.00) ** traffic_pressure) * capacity_pressure ) # Controlled variation to keep realistic early/on-time/late spread out["delay_ratio"] = out["delivery_time_hours"] / out["expected_time_hours"] out["delivery_time_hours"] = np.where( out["delay_ratio"] < 0.98, out["expected_time_hours"] * 0.95, np.where( out["delay_ratio"] < 1.05, out["expected_time_hours"] * 1.00, np.where( out["delay_ratio"] < 1.15, out["expected_time_hours"] * 1.10, out["expected_time_hours"] * 1.25, ), ), ) # Scenario pressure adds extra stress after balancing scenario_extra = (weather_sensitivity - 1.0) * 0.10 + (traffic_pressure - 1.0) * 0.08 + (capacity_pressure - 1.0) out["delivery_time_hours"] = out["delivery_time_hours"] * (1 + max(scenario_extra, -0.20)) out["expected_time_hours"] = out["expected_time_hours"].clip(lower=0.5).round(2) out["delivery_time_hours"] = out["delivery_time_hours"].clip(lower=0.5).round(2) out["delay_hours"] = (out["delivery_time_hours"] - out["expected_time_hours"]).round(2) out["calculated_delay"] = np.where(out["delay_hours"] > 0, "yes", "no") def generate_delay_score(delay): if delay <= 0: base = 5 elif delay <= 2: base = 4 elif delay <= 5: base = 3 elif delay <= 8: base = 2 else: base = 1 noise = random.choices([-1, 0, 1], weights=[1, 3, 1])[0] return int(np.clip(base + noise, 1, 5)) out["delay_score"] = out["delay_hours"].apply(generate_delay_score) out["performance_label"] = out["delay_score"].map({ 5: "Excellent", 4: "Good", 3: "Average", 2: "Poor", 1: "Critical" }) out["distance_category"] = pd.cut( out["distance_km"], bins=[0, 50, 150, 300, float("inf")], labels=["Short", "Medium", "Long", "Very Long"], include_lowest=True ) out["risk_level"] = pd.cut( out["delay_hours"], bins=[-float("inf"), 0, 2, 5, float("inf")], labels=["Low", "Moderate", "High", "Critical"] ) return out def apply_filters(df, vehicles, weather, regions, modes, max_distance): filtered = df.copy() if vehicles: filtered = filtered[filtered["vehicle_type"].isin(vehicles)] if weather: filtered = filtered[filtered["weather_condition"].isin(weather)] if regions: filtered = filtered[filtered["region"].isin(regions)] if modes: filtered = filtered[filtered["delivery_mode"].isin(modes)] filtered = filtered[filtered["distance_km"] <= max_distance] if filtered.empty: return df return filtered def metric_html(label, value, note): return f"""
{label}
{value}
{note}
""" def generate_kpi_html(df, cleaning_report): avg_delay = df["delay_hours"].mean() delay_rate = (df["delay_hours"] > 0).mean() * 100 avg_score = df["delay_score"].mean() critical_rate = (df["risk_level"].astype(str) == "Critical").mean() * 100 total_cost = df["delivery_cost"].sum() avg_rating = df["delivery_rating"].mean() html = f"""
{metric_html("Average delay", f"{avg_delay:.2f} h", "Lower is better. Negative/zero means early or on time.")} {metric_html("Delay rate", f"{delay_rate:.1f}%", "Share of deliveries where actual time exceeds expected time.")} {metric_html("Performance score", f"{avg_score:.2f}/5", "Higher score means stronger operational performance.")} {metric_html("Critical risk share", f"{critical_rate:.1f}%", "Deliveries with severe delay exposure.")} {metric_html("Total delivery cost", f"€{total_cost:,.0f}", "Total operational cost in the selected dataset.")} {metric_html("Average rating", f"{avg_rating:.2f}/5", "Customer-facing quality indicator.")}

Dataset status

{cleaning_report["final_rows"]:,} rows analyzed, {cleaning_report["duplicates_removed"]:,} duplicates removed, {cleaning_report["columns"]} columns processed.

""" return html def summary_tables(df): vehicle_perf = df.groupby("vehicle_type").agg( avg_delay=("delay_hours", "mean"), avg_score=("delay_score", "mean"), deliveries=("delivery_id", "count") ).reset_index().sort_values("avg_delay", ascending=False) weather_perf = df.groupby("weather_condition").agg( avg_delay=("delay_hours", "mean"), avg_score=("delay_score", "mean"), deliveries=("delivery_id", "count") ).reset_index().sort_values("avg_delay", ascending=False) region_perf = df.groupby("region").agg( avg_delay=("delay_hours", "mean"), avg_score=("delay_score", "mean"), deliveries=("delivery_id", "count") ).reset_index().sort_values("avg_delay", ascending=False) mode_perf = df.groupby("delivery_mode").agg( avg_delay=("delay_hours", "mean"), avg_score=("delay_score", "mean"), deliveries=("delivery_id", "count") ).reset_index().sort_values("avg_delay", ascending=False) return vehicle_perf, weather_perf, region_perf, mode_perf def make_figures(df): vehicle_perf, weather_perf, region_perf, mode_perf = summary_tables(df) fig_vehicle = px.bar( vehicle_perf, x="vehicle_type", y="avg_delay", text="avg_delay", title="Average Delay by Vehicle Type", hover_data=["avg_score", "deliveries"] ) fig_vehicle.update_traces(texttemplate="%{text:.2f}h", textposition="outside") fig_vehicle.update_layout(height=430, margin=dict(l=30, r=30, t=70, b=40)) fig_weather = px.bar( weather_perf, x="weather_condition", y="avg_delay", text="avg_delay", title="Average Delay by Weather Condition", hover_data=["avg_score", "deliveries"] ) fig_weather.update_traces(texttemplate="%{text:.2f}h", textposition="outside") fig_weather.update_layout(height=430, margin=dict(l=30, r=30, t=70, b=40)) fig_region = px.bar( region_perf, x="region", y="avg_delay", text="avg_delay", title="Average Delay by Region", hover_data=["avg_score", "deliveries"] ) fig_region.update_traces(texttemplate="%{text:.2f}h", textposition="outside") fig_region.update_layout(height=430, margin=dict(l=30, r=30, t=70, b=40)) fig_mode = px.bar( mode_perf, x="delivery_mode", y="avg_delay", text="avg_delay", title="Average Delay by Delivery Mode", hover_data=["avg_score", "deliveries"] ) fig_mode.update_traces(texttemplate="%{text:.2f}h", textposition="outside") fig_mode.update_layout(height=430, margin=dict(l=30, r=30, t=70, b=40)) fig_scatter = px.scatter( df, x="distance_km", y="delay_hours", color="risk_level", size="package_weight_kg", hover_data=["vehicle_type", "weather_condition", "region", "delivery_mode"], title="Distance, Package Weight and Delay Risk" ) fig_scatter.update_layout(height=500, margin=dict(l=30, r=30, t=70, b=40)) label_order = ["Excellent", "Good", "Average", "Poor", "Critical"] dist = df["performance_label"].value_counts().reindex(label_order).fillna(0).reset_index() dist.columns = ["performance_label", "count"] fig_perf = px.pie( dist, names="performance_label", values="count", hole=0.55, title="Performance Distribution" ) fig_perf.update_layout(height=450, margin=dict(l=30, r=30, t=70, b=40)) heat = df.pivot_table( index="weather_condition", columns="vehicle_type", values="delay_hours", aggfunc="mean" ).round(2) fig_heatmap = px.imshow( heat, text_auto=True, aspect="auto", title="Delay Risk Heatmap: Weather × Vehicle" ) fig_heatmap.update_layout(height=470, margin=dict(l=30, r=30, t=70, b=40)) cost_df = df.groupby("delivery_mode").agg( avg_cost=("delivery_cost", "mean"), avg_rating=("delivery_rating", "mean"), avg_delay=("delay_hours", "mean"), deliveries=("delivery_id", "count") ).reset_index() fig_cost = px.scatter( cost_df, x="avg_cost", y="avg_rating", size="deliveries", color="avg_delay", hover_name="delivery_mode", title="Cost vs Customer Rating by Delivery Mode" ) fig_cost.update_layout(height=470, margin=dict(l=30, r=30, t=70, b=40)) return fig_vehicle, fig_weather, fig_region, fig_mode, fig_scatter, fig_perf, fig_heatmap, fig_cost def generate_qualitative(df): vehicle_perf, weather_perf, region_perf, mode_perf = summary_tables(df) worst_vehicle = vehicle_perf.iloc[0] best_vehicle = vehicle_perf.iloc[-1] worst_weather = weather_perf.iloc[0] worst_region = region_perf.iloc[0] worst_mode = mode_perf.iloc[0] delay_rate = (df["delay_hours"] > 0).mean() * 100 avg_delay = df["delay_hours"].mean() critical_share = (df["risk_level"].astype(str) == "Critical").mean() * 100 # Detect likely main driver by comparing max-min spread spreads = { "vehicle type": vehicle_perf["avg_delay"].max() - vehicle_perf["avg_delay"].min(), "weather condition": weather_perf["avg_delay"].max() - weather_perf["avg_delay"].min(), "region": region_perf["avg_delay"].max() - region_perf["avg_delay"].min(), "delivery mode": mode_perf["avg_delay"].max() - mode_perf["avg_delay"].min(), } main_driver = max(spreads, key=spreads.get) if delay_rate < 35: overall = "The operation is relatively stable, but some segments still create avoidable delay risk." elif delay_rate < 65: overall = "The operation shows a mixed performance pattern: many deliveries are controlled, but delay risk is clearly present." else: overall = "The operation is exposed to significant delay pressure and requires active management intervention." qualitative = f"""

Dataset-generated qualitative analysis

Overall interpretation: {overall}

The selected dataset has an average delay of {avg_delay:.2f} hours and a delay rate of {delay_rate:.1f}%. The critical-risk share is {critical_share:.1f}%, which indicates how much of the operation is exposed to severe service-level pressure.

Key operational story

The strongest differentiating driver in this dataset appears to be {main_driver}. This means management should not only look at overall delay averages, but identify which specific operational condition creates the largest performance gap.

Operational bottlenecks detected

Business meaning

The dataset suggests that delivery performance is not random. Delays are connected to operational choices such as vehicle allocation, delivery mode, and route/region conditions. This is important because it means management can improve performance through targeted actions instead of treating all deliveries the same.

""" return qualitative def generate_recommendations(df): vehicle_perf, weather_perf, region_perf, mode_perf = summary_tables(df) worst_vehicle = vehicle_perf.iloc[0]["vehicle_type"] best_vehicle = vehicle_perf.iloc[-1]["vehicle_type"] worst_weather = weather_perf.iloc[0]["weather_condition"] worst_region = region_perf.iloc[0]["region"] worst_mode = mode_perf.iloc[0]["delivery_mode"] delay_rate = (df["delay_hours"] > 0).mean() * 100 urgency = "high" if delay_rate >= 65 else "medium" if delay_rate >= 35 else "controlled" return f"""

AI Management Recommendations

Priority level: {urgency.upper()}

  1. Reallocate vehicle capacity: Increase use of {best_vehicle} where possible and review why {worst_vehicle} creates higher delay exposure.
  2. Create weather-specific routing rules: Under {worst_weather} conditions, add buffer time, adjust promises, or prioritize safer routes.
  3. Focus regional improvement: Investigate the {worst_region} region for congestion, route complexity, staffing gaps, or infrastructure issues.
  4. Review service promise logic: {worst_mode} has the weakest delay performance. Management should check whether promised delivery windows are realistic.
  5. Use risk-based planning: Classify deliveries before dispatch into low, moderate, high, and critical risk to allocate resources more intelligently.
Management conclusion: The company should move from reactive delay management to predictive risk management. The dashboard helps managers identify where delays are likely to happen before they become customer-facing service failures.
""" def create_downloads(df): timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") processed_path = f"/tmp/processed_delivery_data_{timestamp}.csv" summary_path = f"/tmp/management_summary_{timestamp}.csv" df.to_csv(processed_path, index=False) summary = [] for dimension in ["vehicle_type", "weather_condition", "region", "delivery_mode", "distance_category"]: temp = df.groupby(dimension).agg( avg_delay=("delay_hours", "mean"), avg_score=("delay_score", "mean"), deliveries=("delivery_id", "count") ).reset_index() temp.insert(0, "dimension", dimension) temp = temp.rename(columns={dimension: "category"}) summary.append(temp) pd.concat(summary, ignore_index=True).to_csv(summary_path, index=False) return processed_path, summary_path def load_options(file, weather_sensitivity, traffic_pressure, capacity_pressure): df_raw, _ = validate_and_clean(file) df = enrich_delivery_logic(df_raw, weather_sensitivity, traffic_pressure, capacity_pressure) vehicles = sorted(df["vehicle_type"].dropna().unique().tolist()) weather = sorted(df["weather_condition"].dropna().unique().tolist()) regions = sorted(df["region"].dropna().unique().tolist()) modes = sorted(df["delivery_mode"].dropna().unique().tolist()) max_distance = float(df["distance_km"].max()) return ( gr.update(choices=vehicles, value=[]), gr.update(choices=weather, value=[]), gr.update(choices=regions, value=[]), gr.update(choices=modes, value=[]), gr.update(maximum=max_distance, value=max_distance), f"✅ Dataset loaded. {len(df):,} deliveries detected. Now choose filters or click Generate Dashboard." ) def run_dashboard(file, vehicles, weather, regions, modes, max_distance, weather_sensitivity, traffic_pressure, capacity_pressure): df_raw, cleaning_report = validate_and_clean(file) df = enrich_delivery_logic(df_raw, weather_sensitivity, traffic_pressure, capacity_pressure) filtered = apply_filters(df, vehicles, weather, regions, modes, max_distance) kpi_html = generate_kpi_html(filtered, cleaning_report) figures = make_figures(filtered) qualitative = generate_qualitative(filtered) recommendations = generate_recommendations(filtered) processed_path, summary_path = create_downloads(filtered) preview_cols = [ "delivery_id", "vehicle_type", "weather_condition", "delivery_mode", "region", "distance_km", "expected_time_hours", "delivery_time_hours", "delay_hours", "delay_score", "performance_label", "risk_level" ] preview = filtered[preview_cols].head(20) return ( kpi_html, *figures, qualitative, recommendations, preview, processed_path, summary_path ) with gr.Blocks(css=CUSTOM_CSS, theme=gr.themes.Soft(primary_hue="indigo", neutral_hue="slate")) as demo: gr.HTML( """

AI Delivery Performance Intelligence Dashboard

Upload logistics data, generate realistic delay intelligence, explore performance drivers, simulate operational pressure, and receive dataset-based management recommendations.

""" ) with gr.Row(): with gr.Column(scale=1): file_input = gr.File(label="Upload Delivery CSV", file_types=[".csv"]) load_btn = gr.Button("Load Dataset & Activate Filters", variant="secondary") status = gr.Markdown("Upload your `Delivery_Logistics.csv` file to begin.") with gr.Column(scale=2): gr.Markdown( """ ### What this app does - Cleans and standardizes raw delivery data - Generates synthetic delivery delay intelligence - Shows KPI, quantitative, and qualitative analysis - Lets users filter by vehicle, weather, region, mode, and distance - Simulates changing weather, traffic, and capacity pressure - Exports processed data and management summaries """ ) with gr.Accordion("Interactive controls", open=True): with gr.Row(): vehicle_filter = gr.Dropdown(label="Filter by vehicle type", choices=[], multiselect=True) weather_filter = gr.Dropdown(label="Filter by weather condition", choices=[], multiselect=True) region_filter = gr.Dropdown(label="Filter by region", choices=[], multiselect=True) mode_filter = gr.Dropdown(label="Filter by delivery mode", choices=[], multiselect=True) with gr.Row(): distance_filter = gr.Slider(label="Maximum distance in km", minimum=0, maximum=500, value=500, step=1) weather_sensitivity = gr.Slider(label="Weather sensitivity scenario", minimum=0.5, maximum=2.0, value=1.0, step=0.1) traffic_pressure = gr.Slider(label="Traffic / region pressure scenario", minimum=0.5, maximum=2.0, value=1.0, step=0.1) capacity_pressure = gr.Slider(label="Capacity pressure scenario", minimum=0.8, maximum=1.4, value=1.0, step=0.05) generate_btn = gr.Button("Generate Dashboard", variant="primary", size="lg") with gr.Tab("1. KPI Overview"): kpi_output = gr.HTML() preview_table = gr.Dataframe(label="Preview of processed delivery intelligence", interactive=False, wrap=True) with gr.Tab("2. Quantitative Analysis"): with gr.Row(): fig_vehicle = gr.Plot() fig_weather = gr.Plot() with gr.Row(): fig_region = gr.Plot() fig_mode = gr.Plot() with gr.Row(): fig_scatter = gr.Plot() fig_perf = gr.Plot() with gr.Row(): fig_heatmap = gr.Plot() fig_cost = gr.Plot() with gr.Tab("3. Qualitative Analysis"): qualitative_output = gr.HTML() with gr.Tab("4. AI Management Recommendations"): recommendations_output = gr.HTML() with gr.Row(): processed_download = gr.File(label="Download processed dataset") summary_download = gr.File(label="Download management summary") load_btn.click( load_options, inputs=[file_input, weather_sensitivity, traffic_pressure, capacity_pressure], outputs=[vehicle_filter, weather_filter, region_filter, mode_filter, distance_filter, status] ) generate_btn.click( run_dashboard, inputs=[ file_input, vehicle_filter, weather_filter, region_filter, mode_filter, distance_filter, weather_sensitivity, traffic_pressure, capacity_pressure ], outputs=[ kpi_output, fig_vehicle, fig_weather, fig_region, fig_mode, fig_scatter, fig_perf, fig_heatmap, fig_cost, qualitative_output, recommendations_output, preview_table, processed_download, summary_download ] ) demo.launch()