Spaces:

ESCP
/

delivery.app.final.project

Runtime error

App Files Files Community

charlottegers commited on Apr 30

Commit

85ea865

verified ·

1 Parent(s): b0b63ce

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -390

app.py DELETED Viewed

@@ -1,390 +0,0 @@
-import random
-import warnings
-from io import StringIO
-import gradio as gr
-import numpy as np
-import pandas as pd
-import plotly.express as px
-warnings.filterwarnings("ignore")
-random.seed(2025)
-np.random.seed(2025)
-NUMERIC_COLS = [
-    "distance_km", "package_weight_kg", "delivery_time_hours",
-    "expected_time_hours", "delivery_rating", "delivery_cost"
-]
-CATEGORICAL_COLS = [
-    "delivery_partner", "package_type", "vehicle_type", "delivery_mode",
-    "region", "weather_condition", "delayed", "delivery_status"
-]
-REQUIRED_COLS = [
-    "delivery_id", "delivery_partner", "package_type", "vehicle_type",
-    "delivery_mode", "region", "weather_condition", "distance_km",
-    "package_weight_kg", "delivery_time_hours", "expected_time_hours",
-    "delayed", "delivery_status", "delivery_rating", "delivery_cost"
-]
-def _convert_time_column(series):
-    """Converts normal numeric values or timestamp-like time values into numeric hours."""
-    if pd.api.types.is_numeric_dtype(series):
-        return pd.to_numeric(series, errors="coerce")
-    return pd.to_numeric(series.astype(str).str.split(".").str[-1], errors="coerce")
-def clean_data(file):
-    if file is None:
-        return None, "Please upload a CSV file first."
-    df = pd.read_csv(file.name)
-    original_rows = len(df)
-    df.columns = df.columns.str.strip().str.lower()
-    missing_cols = [c for c in REQUIRED_COLS if c not in df.columns]
-    if missing_cols:
-        return None, f"Missing columns: {missing_cols}"
-    df = df.drop_duplicates().copy()
-    df["delivery_time_hours"] = _convert_time_column(df["delivery_time_hours"])
-    df["expected_time_hours"] = _convert_time_column(df["expected_time_hours"])
-    for col in NUMERIC_COLS:
-        df[col] = pd.to_numeric(df[col], errors="coerce")
-        df[col] = df[col].fillna(df[col].median())
-    for col in CATEGORICAL_COLS:
-        df[col] = df[col].astype(str).str.strip().str.lower()
-        mode_value = df[col].mode()[0] if not df[col].mode().empty else "unknown"
-        df[col] = df[col].replace("nan", np.nan).fillna(mode_value)
-    report = (
-        f"Data cleaned successfully. Original rows: {original_rows:,}. "
-        f"Rows after duplicate removal: {len(df):,}. Missing values handled."
-    )
-    return df, report
-def generate_synthetic_analysis(df):
-    if df is None:
-        return None
-    data = df.copy()
-    # Make text consistent
-    for col in ["vehicle_type", "weather_condition", "delivery_mode", "region"]:
-        data[col] = data[col].astype(str).str.strip().str.lower()
-    # Expected time logic: distance plus operational difficulty
-    vehicle_adjustment = {"bike": 1.2, "van": 0.5, "truck": 0.8, "ev van": 0.4}
-    weather_adjustment = {
-        "clear": 0.0, "cloudy": 0.2, "foggy": 0.6, "rainy": 0.8,
-        "stormy": 1.2, "cold": 0.2, "hot": 0.2, "windy": 0.3
-    }
-    mode_adjustment = {"same day": 0.3, "express": 0.2, "two day": 0.7, "standard": 0.5}
-    region_adjustment = {"central": 0.6, "north": 0.3, "south": 0.3, "east": 0.4, "west": 0.4}
-    data["expected_time_hours"] = (
-        data["distance_km"] / 45
-        + data["vehicle_type"].map(vehicle_adjustment).fillna(0.5)
-        + data["weather_condition"].map(weather_adjustment).fillna(0.3)
-        + data["delivery_mode"].map(mode_adjustment).fillna(0.4)
-        + data["region"].map(region_adjustment).fillna(0.3)
-    ).clip(lower=0.5)
-    vehicle_multiplier = {"bike": 1.05, "van": 0.95, "truck": 1.02, "ev van": 0.97}
-    weather_multiplier = {
-        "clear": 0.95, "cloudy": 1.00, "foggy": 1.05, "rainy": 1.10,
-        "stormy": 1.20, "cold": 1.02, "hot": 1.02, "windy": 1.03
-    }
-    mode_multiplier = {"same day": 1.05, "express": 1.02, "two day": 0.97, "standard": 1.00}
-    region_multiplier = {"central": 1.08, "north": 1.00, "south": 1.01, "east": 1.02, "west": 1.03}
-    data["delivery_time_hours"] = (
-        data["expected_time_hours"]
-        * data["vehicle_type"].map(vehicle_multiplier).fillna(1.00)
-        * data["weather_condition"].map(weather_multiplier).fillna(1.00)
-        * data["delivery_mode"].map(mode_multiplier).fillna(1.00)
-        * data["region"].map(region_multiplier).fillna(1.00)
-    ).clip(lower=0.5)
-    # Controlled delay distribution
-    ratio = data["delivery_time_hours"] / data["expected_time_hours"]
-    data["delivery_time_hours"] = np.where(
-        ratio < 0.98, data["expected_time_hours"] * 0.95,
-        np.where(ratio < 1.05, data["expected_time_hours"] * 1.00,
-                 np.where(ratio < 1.15, data["expected_time_hours"] * 1.10,
-                          data["expected_time_hours"] * 1.25))
-    )
-    data["expected_time_hours"] = data["expected_time_hours"].round(2)
-    data["delivery_time_hours"] = data["delivery_time_hours"].round(2)
-    data["delay_hours"] = (data["delivery_time_hours"] - data["expected_time_hours"]).round(2)
-    data["calculated_delay"] = np.where(data["delay_hours"] > 0, "yes", "no")
-    def delay_score(delay):
-        if delay <= 0:
-            base = 5
-        elif delay <= 2:
-            base = 4
-        elif delay <= 5:
-            base = 3
-        elif delay <= 8:
-            base = 2
-        else:
-            base = 1
-        noise = random.choices([-1, 0, 1], weights=[1, 3, 1])[0]
-        return int(np.clip(base + noise, 1, 5))
-    def label(score):
-        if score >= 5:
-            return "Excellent"
-        if score == 4:
-            return "Good"
-        if score == 3:
-            return "Average"
-        if score == 2:
-            return "Poor"
-        return "Critical"
-    data["delay_score"] = data["delay_hours"].apply(delay_score)
-    data["performance_label"] = data["delay_score"].apply(label)
-    data["distance_category"] = pd.cut(
-        data["distance_km"],
-        bins=[0, 50, 150, 300, float("inf")],
-        labels=["Short", "Medium", "Long", "Very Long"]
-    )
-    return data
-def kpi_cards(data):
-    total = len(data)
-    delay_rate = (data["calculated_delay"].eq("yes").mean() * 100) if total else 0
-    avg_delay = data["delay_hours"].mean()
-    avg_score = data["delay_score"].mean()
-    avg_cost = data["delivery_cost"].mean()
-    return (
-        f"### KPI Summary\n"
-        f"| KPI | Value |\n|---|---:|\n"
-        f"| Total deliveries analyzed | {total:,.0f} |\n"
-        f"| Delay rate | {delay_rate:.1f}% |\n"
-        f"| Average delay hours | {avg_delay:.2f} |\n"
-        f"| Average delay score | {avg_score:.2f} / 5 |\n"
-        f"| Average delivery cost | {avg_cost:.2f} |"
-    )
-def group_summary(data, group_col):
-    return (
-        data.groupby(group_col, observed=False)
-        .agg(
-            deliveries=("delivery_id", "count"),
-            avg_delay_hours=("delay_hours", "mean"),
-            delay_rate_pct=("calculated_delay", lambda x: (x.eq("yes").mean() * 100)),
-            avg_delay_score=("delay_score", "mean"),
-            avg_cost=("delivery_cost", "mean"),
-            avg_rating=("delivery_rating", "mean")
-        )
-        .round(2)
-        .sort_values("avg_delay_hours", ascending=False)
-        .reset_index()
-    )
-def make_figures(data):
-    vehicle = group_summary(data, "vehicle_type")
-    weather = group_summary(data, "weather_condition")
-    region = group_summary(data, "region")
-    mode = group_summary(data, "delivery_mode")
-    fig_vehicle = px.bar(
-        vehicle, x="vehicle_type", y="avg_delay_hours", text="avg_delay_hours",
-        title="Average Delay by Vehicle Type",
-        labels={"vehicle_type": "Vehicle type", "avg_delay_hours": "Average delay hours"}
-    )
-    fig_weather = px.bar(
-        weather, x="weather_condition", y="delay_rate_pct", text="delay_rate_pct",
-        title="Delay Rate by Weather Condition",
-        labels={"weather_condition": "Weather", "delay_rate_pct": "Delay rate (%)"}
-    )
-    fig_region = px.bar(
-        region, x="region", y="avg_delay_hours", text="avg_delay_hours",
-        title="Average Delay by Region",
-        labels={"region": "Region", "avg_delay_hours": "Average delay hours"}
-    )
-    fig_mode = px.bar(
-        mode, x="delivery_mode", y="delay_rate_pct", text="delay_rate_pct",
-        title="Delay Rate by Delivery Mode",
-        labels={"delivery_mode": "Delivery mode", "delay_rate_pct": "Delay rate (%)"}
-    )
-    fig_scatter = px.scatter(
-        data.sample(min(len(data), 3000), random_state=2025),
-        x="distance_km", y="delay_hours", color="vehicle_type",
-        hover_data=["weather_condition", "region", "delivery_mode"],
-        title="Distance vs Delay Hours"
-    )
-    fig_pie = px.pie(
-        data, names="performance_label", title="Performance Label Distribution"
-    )
-    return fig_vehicle, fig_weather, fig_region, fig_mode, fig_scatter, fig_pie
-def ai_business_recommendations(data):
-    vehicle = group_summary(data, "vehicle_type")
-    weather = group_summary(data, "weather_condition")
-    region = group_summary(data, "region")
-    mode = group_summary(data, "delivery_mode")
-    distance = group_summary(data, "distance_category")
-    worst_vehicle = vehicle.iloc[0]
-    worst_weather = weather.iloc[0]
-    worst_region = region.iloc[0]
-    worst_mode = mode.iloc[0]
-    worst_distance = distance.iloc[0]
-    return f"""
-## AI-enhanced Management Interpretation
-### Main delay-risk factors
-1. **Vehicle risk:** `{worst_vehicle['vehicle_type']}` has the highest average delay at **{worst_vehicle['avg_delay_hours']:.2f} hours**.
-2. **Weather risk:** `{worst_weather['weather_condition']}` has the highest delay rate at **{worst_weather['delay_rate_pct']:.1f}%**.
-3. **Regional risk:** `{worst_region['region']}` has the highest average delay at **{worst_region['avg_delay_hours']:.2f} hours**.
-4. **Delivery mode risk:** `{worst_mode['delivery_mode']}` has the highest delay rate at **{worst_mode['delay_rate_pct']:.1f}%**.
-5. **Distance risk:** `{worst_distance['distance_category']}` deliveries show the highest average delay at **{worst_distance['avg_delay_hours']:.2f} hours**.
-### Recommended management actions
-- **Prioritize capacity planning** for the worst-performing vehicle and region combination.
-- **Add weather-based buffer rules** for high-risk conditions before accepting customer delivery promises.
-- **Use dynamic routing** for long-distance and central-region deliveries because these create operational pressure.
-- **Monitor same-day/express promises carefully** because fast delivery modes are more sensitive to small disruptions.
-- **Create an exception dashboard** that flags deliveries where expected time is unrealistic compared with distance, vehicle, weather, and region.
-### Business value of this automation
-This app turns raw delivery data into cleaned data, synthetic scenario data, KPI dashboards, risk rankings, and management recommendations automatically. Instead of manually checking Excel tables, managers can upload a CSV and immediately see where delay risk is highest.
-"""
-def qualitative_analysis():
-    return """
-## Qualitative Analysis Layer
-The business challenge is not only numerical. Delivery delays also affect customer trust, operational workload, and brand perception.
-### Operational interpretation
-- Bad weather increases uncertainty and makes delivery planning less reliable.
-- Certain vehicle types are better suited to specific delivery contexts.
-- Central regions may create congestion risk and therefore need additional time buffers.
-- Long-distance deliveries require more careful promise management.
-### Customer impact
-- Delays reduce satisfaction even when the package eventually arrives.
-- Customers are especially sensitive to delays in express or same-day delivery.
-- Better delivery estimates can improve trust because customers prefer realistic promises over optimistic but unreliable promises.
-### Strategic interpretation
-The company should not only ask, “Which deliveries are late?” It should ask, “Which operational conditions make lateness predictable before the delivery happens?”
-"""
-def run_dashboard(file):
-    cleaned, report = clean_data(file)
-    if cleaned is None:
-        empty = pd.DataFrame()
-        blank_fig = px.scatter(title="Upload a valid CSV to generate the dashboard")
-        return report, empty, "", blank_fig, blank_fig, blank_fig, blank_fig, blank_fig, blank_fig, "", ""
-    data = generate_synthetic_analysis(cleaned)
-    figs = make_figures(data)
-    return (
-        report,
-        data.head(100),
-        kpi_cards(data),
-        *figs,
-        ai_business_recommendations(data),
-        qualitative_analysis()
-    )
-def download_processed_file(file):
-    cleaned, report = clean_data(file)
-    if cleaned is None:
-        return None
-    data = generate_synthetic_analysis(cleaned)
-    output_path = "processed_delivery_dashboard_data.csv"
-    data.to_csv(output_path, index=False)
-    return output_path
-with gr.Blocks(theme=gr.themes.Soft(), title="AI Delivery Performance Dashboard") as demo:
-    gr.Markdown(
-        """
-# 🚚 AI Delivery Performance Dashboard
-Upload delivery logistics data and automatically generate a cleaned dataset, synthetic delay logic, KPI dashboard, quantitative charts, and AI-enhanced management recommendations.
-**Business challenge:** Which operational factors create the highest delivery delay risk, and what should management do?
-"""
-    )
-    with gr.Row():
-        file_input = gr.File(label="Upload Delivery_Logistics.csv", file_types=[".csv"])
-        run_button = gr.Button("Generate Dashboard", variant="primary")
-    cleaning_report = gr.Markdown()
-    with gr.Tab("1. KPI Overview"):
-        kpi_output = gr.Markdown()
-        preview_table = gr.Dataframe(label="Preview of Processed Data", interactive=False)
-        download_button = gr.Button("Download Processed CSV")
-        download_file = gr.File(label="Processed CSV")
-    with gr.Tab("2. Quantitative Analysis"):
-        with gr.Row():
-            fig_vehicle = gr.Plot()
-            fig_weather = gr.Plot()
-        with gr.Row():
-            fig_region = gr.Plot()
-            fig_mode = gr.Plot()
-        with gr.Row():
-            fig_scatter = gr.Plot()
-            fig_pie = gr.Plot()
-    with gr.Tab("3. AI Management Recommendations"):
-        recommendations_output = gr.Markdown()
-    with gr.Tab("4. Qualitative Analysis"):
-        qualitative_output = gr.Markdown(value=qualitative_analysis())
-    with gr.Tab("5. How the Automation Works"):
-        gr.Markdown(
-            """
-## Automation logic
-1. **Data extraction:** The user uploads a CSV file.
-2. **Data cleaning:** The app standardizes column names, removes duplicates, converts time columns, and fills missing values.
-3. **Synthetic data generation:** The app creates realistic expected and actual delivery times using distance, vehicle type, weather, delivery mode, and region.
-4. **Automated analysis:** The app calculates delay hours, delay score, performance labels, risk rankings, and KPIs.
-5. **AI-enhanced interpretation:** The app converts the numerical findings into business recommendations for managers.
-## Why this fulfills the project instructions
-- Uses real-world/found delivery logistics data.
-- Adds synthetic data logic to create realistic delay scenarios.
-- Includes quantitative analysis through KPIs, rankings, and charts.
-- Includes qualitative analysis through operational and customer interpretation.
-- Automates data cleaning, generation, analysis, and recommendation writing.
-"""
-        )
-    run_button.click(
-        fn=run_dashboard,
-        inputs=file_input,
-        outputs=[
-            cleaning_report, preview_table, kpi_output,
-            fig_vehicle, fig_weather, fig_region, fig_mode, fig_scatter, fig_pie,
-            recommendations_output, qualitative_output
-        ]
-    )
-    download_button.click(fn=download_processed_file, inputs=file_input, outputs=download_file)
-if __name__ == "__main__":
-    demo.launch()