Spaces:

ESCP
/

delivery.app.final.project

Runtime error

App Files Files Community

charlottegers commited on Apr 30

Commit

4b7047d

verified ·

1 Parent(s): 6461b68

Upload 4 files

Browse files

Files changed (4) hide show

Delivery_Logistics.csv +0 -0
app.py +390 -0
requirements.txt +4 -0
synthetic_delivery_data.csv +0 -0

Delivery_Logistics.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

app.py ADDED Viewed

	@@ -0,0 +1,390 @@

+import random
+import warnings
+from io import StringIO
+import gradio as gr
+import numpy as np
+import pandas as pd
+import plotly.express as px
+warnings.filterwarnings("ignore")
+random.seed(2025)
+np.random.seed(2025)
+NUMERIC_COLS = [
+    "distance_km", "package_weight_kg", "delivery_time_hours",
+    "expected_time_hours", "delivery_rating", "delivery_cost"
+]
+CATEGORICAL_COLS = [
+    "delivery_partner", "package_type", "vehicle_type", "delivery_mode",
+    "region", "weather_condition", "delayed", "delivery_status"
+]
+REQUIRED_COLS = [
+    "delivery_id", "delivery_partner", "package_type", "vehicle_type",
+    "delivery_mode", "region", "weather_condition", "distance_km",
+    "package_weight_kg", "delivery_time_hours", "expected_time_hours",
+    "delayed", "delivery_status", "delivery_rating", "delivery_cost"
+]
+def _convert_time_column(series):
+    """Converts normal numeric values or timestamp-like time values into numeric hours."""
+    if pd.api.types.is_numeric_dtype(series):
+        return pd.to_numeric(series, errors="coerce")
+    return pd.to_numeric(series.astype(str).str.split(".").str[-1], errors="coerce")
+def clean_data(file):
+    if file is None:
+        return None, "Please upload a CSV file first."
+    df = pd.read_csv(file.name)
+    original_rows = len(df)
+    df.columns = df.columns.str.strip().str.lower()
+    missing_cols = [c for c in REQUIRED_COLS if c not in df.columns]
+    if missing_cols:
+        return None, f"Missing columns: {missing_cols}"
+    df = df.drop_duplicates().copy()
+    df["delivery_time_hours"] = _convert_time_column(df["delivery_time_hours"])
+    df["expected_time_hours"] = _convert_time_column(df["expected_time_hours"])
+    for col in NUMERIC_COLS:
+        df[col] = pd.to_numeric(df[col], errors="coerce")
+        df[col] = df[col].fillna(df[col].median())
+    for col in CATEGORICAL_COLS:
+        df[col] = df[col].astype(str).str.strip().str.lower()
+        mode_value = df[col].mode()[0] if not df[col].mode().empty else "unknown"
+        df[col] = df[col].replace("nan", np.nan).fillna(mode_value)
+    report = (
+        f"Data cleaned successfully. Original rows: {original_rows:,}. "
+        f"Rows after duplicate removal: {len(df):,}. Missing values handled."
+    )
+    return df, report
+def generate_synthetic_analysis(df):
+    if df is None:
+        return None
+    data = df.copy()
+    # Make text consistent
+    for col in ["vehicle_type", "weather_condition", "delivery_mode", "region"]:
+        data[col] = data[col].astype(str).str.strip().str.lower()
+    # Expected time logic: distance plus operational difficulty
+    vehicle_adjustment = {"bike": 1.2, "van": 0.5, "truck": 0.8, "ev van": 0.4}
+    weather_adjustment = {
+        "clear": 0.0, "cloudy": 0.2, "foggy": 0.6, "rainy": 0.8,
+        "stormy": 1.2, "cold": 0.2, "hot": 0.2, "windy": 0.3
+    }
+    mode_adjustment = {"same day": 0.3, "express": 0.2, "two day": 0.7, "standard": 0.5}
+    region_adjustment = {"central": 0.6, "north": 0.3, "south": 0.3, "east": 0.4, "west": 0.4}
+    data["expected_time_hours"] = (
+        data["distance_km"] / 45
+        + data["vehicle_type"].map(vehicle_adjustment).fillna(0.5)
+        + data["weather_condition"].map(weather_adjustment).fillna(0.3)
+        + data["delivery_mode"].map(mode_adjustment).fillna(0.4)
+        + data["region"].map(region_adjustment).fillna(0.3)
+    ).clip(lower=0.5)
+    vehicle_multiplier = {"bike": 1.05, "van": 0.95, "truck": 1.02, "ev van": 0.97}
+    weather_multiplier = {
+        "clear": 0.95, "cloudy": 1.00, "foggy": 1.05, "rainy": 1.10,
+        "stormy": 1.20, "cold": 1.02, "hot": 1.02, "windy": 1.03
+    }
+    mode_multiplier = {"same day": 1.05, "express": 1.02, "two day": 0.97, "standard": 1.00}
+    region_multiplier = {"central": 1.08, "north": 1.00, "south": 1.01, "east": 1.02, "west": 1.03}
+    data["delivery_time_hours"] = (
+        data["expected_time_hours"]
+        * data["vehicle_type"].map(vehicle_multiplier).fillna(1.00)
+        * data["weather_condition"].map(weather_multiplier).fillna(1.00)
+        * data["delivery_mode"].map(mode_multiplier).fillna(1.00)
+        * data["region"].map(region_multiplier).fillna(1.00)
+    ).clip(lower=0.5)
+    # Controlled delay distribution
+    ratio = data["delivery_time_hours"] / data["expected_time_hours"]
+    data["delivery_time_hours"] = np.where(
+        ratio < 0.98, data["expected_time_hours"] * 0.95,
+        np.where(ratio < 1.05, data["expected_time_hours"] * 1.00,
+                 np.where(ratio < 1.15, data["expected_time_hours"] * 1.10,
+                          data["expected_time_hours"] * 1.25))
+    )
+    data["expected_time_hours"] = data["expected_time_hours"].round(2)
+    data["delivery_time_hours"] = data["delivery_time_hours"].round(2)
+    data["delay_hours"] = (data["delivery_time_hours"] - data["expected_time_hours"]).round(2)
+    data["calculated_delay"] = np.where(data["delay_hours"] > 0, "yes", "no")
+    def delay_score(delay):
+        if delay <= 0:
+            base = 5
+        elif delay <= 2:
+            base = 4
+        elif delay <= 5:
+            base = 3
+        elif delay <= 8:
+            base = 2
+        else:
+            base = 1
+        noise = random.choices([-1, 0, 1], weights=[1, 3, 1])[0]
+        return int(np.clip(base + noise, 1, 5))
+    def label(score):
+        if score >= 5:
+            return "Excellent"
+        if score == 4:
+            return "Good"
+        if score == 3:
+            return "Average"
+        if score == 2:
+            return "Poor"
+        return "Critical"
+    data["delay_score"] = data["delay_hours"].apply(delay_score)
+    data["performance_label"] = data["delay_score"].apply(label)
+    data["distance_category"] = pd.cut(
+        data["distance_km"],
+        bins=[0, 50, 150, 300, float("inf")],
+        labels=["Short", "Medium", "Long", "Very Long"]
+    )
+    return data
+def kpi_cards(data):
+    total = len(data)
+    delay_rate = (data["calculated_delay"].eq("yes").mean() * 100) if total else 0
+    avg_delay = data["delay_hours"].mean()
+    avg_score = data["delay_score"].mean()
+    avg_cost = data["delivery_cost"].mean()
+    return (
+        f"### KPI Summary\n"
+        f"| KPI | Value |\n|---|---:|\n"
+        f"| Total deliveries analyzed | {total:,.0f} |\n"
+        f"| Delay rate | {delay_rate:.1f}% |\n"
+        f"| Average delay hours | {avg_delay:.2f} |\n"
+        f"| Average delay score | {avg_score:.2f} / 5 |\n"
+        f"| Average delivery cost | {avg_cost:.2f} |"
+    )
+def group_summary(data, group_col):
+    return (
+        data.groupby(group_col, observed=False)
+        .agg(
+            deliveries=("delivery_id", "count"),
+            avg_delay_hours=("delay_hours", "mean"),
+            delay_rate_pct=("calculated_delay", lambda x: (x.eq("yes").mean() * 100)),
+            avg_delay_score=("delay_score", "mean"),
+            avg_cost=("delivery_cost", "mean"),
+            avg_rating=("delivery_rating", "mean")
+        )
+        .round(2)
+        .sort_values("avg_delay_hours", ascending=False)
+        .reset_index()
+    )
+def make_figures(data):
+    vehicle = group_summary(data, "vehicle_type")
+    weather = group_summary(data, "weather_condition")
+    region = group_summary(data, "region")
+    mode = group_summary(data, "delivery_mode")
+    fig_vehicle = px.bar(
+        vehicle, x="vehicle_type", y="avg_delay_hours", text="avg_delay_hours",
+        title="Average Delay by Vehicle Type",
+        labels={"vehicle_type": "Vehicle type", "avg_delay_hours": "Average delay hours"}
+    )
+    fig_weather = px.bar(
+        weather, x="weather_condition", y="delay_rate_pct", text="delay_rate_pct",
+        title="Delay Rate by Weather Condition",
+        labels={"weather_condition": "Weather", "delay_rate_pct": "Delay rate (%)"}
+    )
+    fig_region = px.bar(
+        region, x="region", y="avg_delay_hours", text="avg_delay_hours",
+        title="Average Delay by Region",
+        labels={"region": "Region", "avg_delay_hours": "Average delay hours"}
+    )
+    fig_mode = px.bar(
+        mode, x="delivery_mode", y="delay_rate_pct", text="delay_rate_pct",
+        title="Delay Rate by Delivery Mode",
+        labels={"delivery_mode": "Delivery mode", "delay_rate_pct": "Delay rate (%)"}
+    )
+    fig_scatter = px.scatter(
+        data.sample(min(len(data), 3000), random_state=2025),
+        x="distance_km", y="delay_hours", color="vehicle_type",
+        hover_data=["weather_condition", "region", "delivery_mode"],
+        title="Distance vs Delay Hours"
+    )
+    fig_pie = px.pie(
+        data, names="performance_label", title="Performance Label Distribution"
+    )
+    return fig_vehicle, fig_weather, fig_region, fig_mode, fig_scatter, fig_pie
+def ai_business_recommendations(data):
+    vehicle = group_summary(data, "vehicle_type")
+    weather = group_summary(data, "weather_condition")
+    region = group_summary(data, "region")
+    mode = group_summary(data, "delivery_mode")
+    distance = group_summary(data, "distance_category")
+    worst_vehicle = vehicle.iloc[0]
+    worst_weather = weather.iloc[0]
+    worst_region = region.iloc[0]
+    worst_mode = mode.iloc[0]
+    worst_distance = distance.iloc[0]
+    return f"""
+## AI-enhanced Management Interpretation
+### Main delay-risk factors
+1. **Vehicle risk:** `{worst_vehicle['vehicle_type']}` has the highest average delay at **{worst_vehicle['avg_delay_hours']:.2f} hours**.
+2. **Weather risk:** `{worst_weather['weather_condition']}` has the highest delay rate at **{worst_weather['delay_rate_pct']:.1f}%**.
+3. **Regional risk:** `{worst_region['region']}` has the highest average delay at **{worst_region['avg_delay_hours']:.2f} hours**.
+4. **Delivery mode risk:** `{worst_mode['delivery_mode']}` has the highest delay rate at **{worst_mode['delay_rate_pct']:.1f}%**.
+5. **Distance risk:** `{worst_distance['distance_category']}` deliveries show the highest average delay at **{worst_distance['avg_delay_hours']:.2f} hours**.
+### Recommended management actions
+- **Prioritize capacity planning** for the worst-performing vehicle and region combination.
+- **Add weather-based buffer rules** for high-risk conditions before accepting customer delivery promises.
+- **Use dynamic routing** for long-distance and central-region deliveries because these create operational pressure.
+- **Monitor same-day/express promises carefully** because fast delivery modes are more sensitive to small disruptions.
+- **Create an exception dashboard** that flags deliveries where expected time is unrealistic compared with distance, vehicle, weather, and region.
+### Business value of this automation
+This app turns raw delivery data into cleaned data, synthetic scenario data, KPI dashboards, risk rankings, and management recommendations automatically. Instead of manually checking Excel tables, managers can upload a CSV and immediately see where delay risk is highest.
+"""
+def qualitative_analysis():
+    return """
+## Qualitative Analysis Layer
+The business challenge is not only numerical. Delivery delays also affect customer trust, operational workload, and brand perception.
+### Operational interpretation
+- Bad weather increases uncertainty and makes delivery planning less reliable.
+- Certain vehicle types are better suited to specific delivery contexts.
+- Central regions may create congestion risk and therefore need additional time buffers.
+- Long-distance deliveries require more careful promise management.
+### Customer impact
+- Delays reduce satisfaction even when the package eventually arrives.
+- Customers are especially sensitive to delays in express or same-day delivery.
+- Better delivery estimates can improve trust because customers prefer realistic promises over optimistic but unreliable promises.
+### Strategic interpretation
+The company should not only ask, “Which deliveries are late?” It should ask, “Which operational conditions make lateness predictable before the delivery happens?”
+"""
+def run_dashboard(file):
+    cleaned, report = clean_data(file)
+    if cleaned is None:
+        empty = pd.DataFrame()
+        blank_fig = px.scatter(title="Upload a valid CSV to generate the dashboard")
+        return report, empty, "", blank_fig, blank_fig, blank_fig, blank_fig, blank_fig, blank_fig, "", ""
+    data = generate_synthetic_analysis(cleaned)
+    figs = make_figures(data)
+    return (
+        report,
+        data.head(100),
+        kpi_cards(data),
+        *figs,
+        ai_business_recommendations(data),
+        qualitative_analysis()
+    )
+def download_processed_file(file):
+    cleaned, report = clean_data(file)
+    if cleaned is None:
+        return None
+    data = generate_synthetic_analysis(cleaned)
+    output_path = "processed_delivery_dashboard_data.csv"
+    data.to_csv(output_path, index=False)
+    return output_path
+with gr.Blocks(theme=gr.themes.Soft(), title="AI Delivery Performance Dashboard") as demo:
+    gr.Markdown(
+        """
+# 🚚 AI Delivery Performance Dashboard
+Upload delivery logistics data and automatically generate a cleaned dataset, synthetic delay logic, KPI dashboard, quantitative charts, and AI-enhanced management recommendations.
+**Business challenge:** Which operational factors create the highest delivery delay risk, and what should management do?
+"""
+    )
+    with gr.Row():
+        file_input = gr.File(label="Upload Delivery_Logistics.csv", file_types=[".csv"])
+        run_button = gr.Button("Generate Dashboard", variant="primary")
+    cleaning_report = gr.Markdown()
+    with gr.Tab("1. KPI Overview"):
+        kpi_output = gr.Markdown()
+        preview_table = gr.Dataframe(label="Preview of Processed Data", interactive=False)
+        download_button = gr.Button("Download Processed CSV")
+        download_file = gr.File(label="Processed CSV")
+    with gr.Tab("2. Quantitative Analysis"):
+        with gr.Row():
+            fig_vehicle = gr.Plot()
+            fig_weather = gr.Plot()
+        with gr.Row():
+            fig_region = gr.Plot()
+            fig_mode = gr.Plot()
+        with gr.Row():
+            fig_scatter = gr.Plot()
+            fig_pie = gr.Plot()
+    with gr.Tab("3. AI Management Recommendations"):
+        recommendations_output = gr.Markdown()
+    with gr.Tab("4. Qualitative Analysis"):
+        qualitative_output = gr.Markdown(value=qualitative_analysis())
+    with gr.Tab("5. How the Automation Works"):
+        gr.Markdown(
+            """
+## Automation logic
+1. **Data extraction:** The user uploads a CSV file.
+2. **Data cleaning:** The app standardizes column names, removes duplicates, converts time columns, and fills missing values.
+3. **Synthetic data generation:** The app creates realistic expected and actual delivery times using distance, vehicle type, weather, delivery mode, and region.
+4. **Automated analysis:** The app calculates delay hours, delay score, performance labels, risk rankings, and KPIs.
+5. **AI-enhanced interpretation:** The app converts the numerical findings into business recommendations for managers.
+## Why this fulfills the project instructions
+- Uses real-world/found delivery logistics data.
+- Adds synthetic data logic to create realistic delay scenarios.
+- Includes quantitative analysis through KPIs, rankings, and charts.
+- Includes qualitative analysis through operational and customer interpretation.
+- Automates data cleaning, generation, analysis, and recommendation writing.
+"""
+        )
+    run_button.click(
+        fn=run_dashboard,
+        inputs=file_input,
+        outputs=[
+            cleaning_report, preview_table, kpi_output,
+            fig_vehicle, fig_weather, fig_region, fig_mode, fig_scatter, fig_pie,
+            recommendations_output, qualitative_output
+        ]
+    )
+    download_button.click(fn=download_processed_file, inputs=file_input, outputs=download_file)
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+gradio
+pandas
+numpy
+plotly

synthetic_delivery_data.csv ADDED Viewed

The diff for this file is too large to render. See raw diff