Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| import plotly.express as px | |
| from sklearn.ensemble import RandomForestClassifier | |
| from sklearn.preprocessing import OneHotEncoder | |
| from sklearn.compose import ColumnTransformer | |
| from sklearn.pipeline import Pipeline | |
| from pathlib import Path | |
| import tempfile | |
| DATA_PATH = Path("synthetic_delivery_data.csv") | |
| NUMERIC_COLS = [ | |
| "distance_km", "package_weight_kg", "delivery_time_hours", | |
| "expected_time_hours", "delivery_rating", "delivery_cost" | |
| ] | |
| CAT_COLS = [ | |
| "delivery_partner", "package_type", "vehicle_type", "delivery_mode", | |
| "region", "weather_condition", "delayed", "delivery_status" | |
| ] | |
| CUSTOM_CSS = """ | |
| .gradio-container {max-width: 1280px !important; margin: auto;} | |
| .metric-card {background: linear-gradient(135deg, #ffffff, #f7f8fb); border: 1px solid #e8e8ef; border-radius: 18px; padding: 18px; box-shadow: 0 8px 24px rgba(0,0,0,.05);} | |
| .metric-label {font-size: 13px; color: #5f6470; margin-bottom: 6px;} | |
| .metric-value {font-size: 30px; font-weight: 800; color: #111827;} | |
| .insight-box {background: #111827; color: white; border-radius: 18px; padding: 20px; line-height: 1.55;} | |
| .small-muted {color: #6b7280; font-size: 13px;} | |
| """ | |
| def _clean_time_column(series): | |
| """Convert either normal numbers or timestamp-looking duration strings into numeric hours.""" | |
| if pd.api.types.is_numeric_dtype(series): | |
| return pd.to_numeric(series, errors="coerce") | |
| s = series.astype(str) | |
| # Handles values like 1970-01-01 00:00:00.000000008 by extracting last part. | |
| extracted = s.str.split(".").str[-1] | |
| return pd.to_numeric(extracted, errors="coerce") | |
| def load_and_prepare(file_obj=None): | |
| if file_obj is None: | |
| df = pd.read_csv(DATA_PATH) | |
| else: | |
| df = pd.read_csv(file_obj.name) | |
| df = df.copy() | |
| df.columns = df.columns.str.strip().str.lower() | |
| df = df.drop_duplicates() | |
| required_minimum = ["distance_km", "vehicle_type", "weather_condition", "delivery_mode", "region"] | |
| missing_required = [c for c in required_minimum if c not in df.columns] | |
| if missing_required: | |
| raise gr.Error(f"Your file is missing these required columns: {missing_required}") | |
| for col in ["delivery_time_hours", "expected_time_hours"]: | |
| if col in df.columns: | |
| df[col] = _clean_time_column(df[col]) | |
| for col in NUMERIC_COLS: | |
| if col in df.columns: | |
| df[col] = pd.to_numeric(df[col], errors="coerce") | |
| df[col] = df[col].fillna(df[col].median()) | |
| for col in CAT_COLS: | |
| if col in df.columns: | |
| df[col] = df[col].astype(str).str.strip().str.lower() | |
| if df[col].isna().any(): | |
| df[col] = df[col].fillna(df[col].mode()[0]) | |
| # If expected/delivery time are not reliable or missing, rebuild them with business logic. | |
| df = create_synthetic_time_logic(df) | |
| df["delay_hours"] = (df["delivery_time_hours"] - df["expected_time_hours"]).round(2) | |
| df["calculated_delay"] = np.where(df["delay_hours"] > 0, "yes", "no") | |
| df["delay_score"] = df["delay_hours"].apply(delay_score) | |
| df["performance_label"] = df["delay_score"].apply(performance_label) | |
| df["distance_category"] = pd.cut( | |
| df["distance_km"], | |
| bins=[0, 50, 150, 300, float("inf")], | |
| labels=["short", "medium", "long", "very long"], | |
| include_lowest=True, | |
| ).astype(str) | |
| return df | |
| def create_synthetic_time_logic(df): | |
| df = df.copy() | |
| for col in ["vehicle_type", "weather_condition", "delivery_mode", "region"]: | |
| df[col] = df[col].astype(str).str.strip().str.lower() | |
| vehicle_adjustment = {"bike": 1.2, "van": 0.5, "truck": 0.8, "ev van": 0.4} | |
| weather_adjustment = {"clear": 0.0, "cloudy": 0.2, "foggy": 0.6, "rainy": 0.8, "stormy": 1.2, "cold": 0.2, "hot": 0.2, "windy": 0.3} | |
| mode_adjustment = {"same day": 0.3, "express": 0.2, "two day": 0.7, "standard": 0.5} | |
| region_adjustment = {"central": 0.6, "north": 0.3, "south": 0.3, "east": 0.4, "west": 0.4} | |
| expected = ( | |
| df["distance_km"] / 45 | |
| + df["vehicle_type"].map(vehicle_adjustment).fillna(0.5) | |
| + df["weather_condition"].map(weather_adjustment).fillna(0.3) | |
| + df["delivery_mode"].map(mode_adjustment).fillna(0.4) | |
| + df["region"].map(region_adjustment).fillna(0.3) | |
| ).clip(lower=0.5) | |
| vehicle_mult = {"bike": 1.05, "van": 0.95, "truck": 1.02, "ev van": 0.97} | |
| weather_mult = {"clear": 0.95, "cloudy": 1.00, "foggy": 1.05, "rainy": 1.10, "stormy": 1.20, "cold": 1.02, "hot": 1.02, "windy": 1.03} | |
| mode_mult = {"same day": 1.05, "express": 1.02, "two day": 0.97, "standard": 1.00} | |
| region_mult = {"central": 1.08, "north": 1.00, "south": 1.01, "east": 1.02, "west": 1.03} | |
| actual = ( | |
| expected | |
| * df["vehicle_type"].map(vehicle_mult).fillna(1) | |
| * df["weather_condition"].map(weather_mult).fillna(1) | |
| * df["delivery_mode"].map(mode_mult).fillna(1) | |
| * df["region"].map(region_mult).fillna(1) | |
| ).clip(lower=0.5) | |
| ratio = actual / expected | |
| balanced_actual = np.where( | |
| ratio < 0.98, expected * 0.95, | |
| np.where(ratio < 1.05, expected * 1.00, | |
| np.where(ratio < 1.15, expected * 1.10, expected * 1.25)) | |
| ) | |
| df["expected_time_hours"] = expected.round(2) | |
| df["delivery_time_hours"] = pd.Series(balanced_actual).round(2) | |
| return df | |
| def delay_score(delay): | |
| if delay <= 0: return 5 | |
| if delay <= 2: return 4 | |
| if delay <= 5: return 3 | |
| if delay <= 8: return 2 | |
| return 1 | |
| def performance_label(score): | |
| return {5: "excellent", 4: "good", 3: "average", 2: "poor", 1: "critical"}.get(int(score), "unknown") | |
| def filter_df(df, vehicle, weather, mode, region): | |
| out = df.copy() | |
| filters = {"vehicle_type": vehicle, "weather_condition": weather, "delivery_mode": mode, "region": region} | |
| for col, selected in filters.items(): | |
| if selected and "all" not in selected: | |
| out = out[out[col].isin(selected)] | |
| return out | |
| def kpi_html(df): | |
| total = len(df) | |
| delay_rate = (df["calculated_delay"].eq("yes").mean() * 100) if total else 0 | |
| avg_delay = df["delay_hours"].mean() if total else 0 | |
| avg_score = df["delay_score"].mean() if total else 0 | |
| cost = df["delivery_cost"].mean() if "delivery_cost" in df.columns and total else 0 | |
| return f""" | |
| <div style='display:grid;grid-template-columns:repeat(4,minmax(0,1fr));gap:14px;'> | |
| <div class='metric-card'><div class='metric-label'>Deliveries analyzed</div><div class='metric-value'>{total:,.0f}</div></div> | |
| <div class='metric-card'><div class='metric-label'>Delay rate</div><div class='metric-value'>{delay_rate:.1f}%</div></div> | |
| <div class='metric-card'><div class='metric-label'>Average delay hours</div><div class='metric-value'>{avg_delay:.2f}</div></div> | |
| <div class='metric-card'><div class='metric-label'>Avg. delay score</div><div class='metric-value'>{avg_score:.2f}/5</div></div> | |
| </div> | |
| <p class='small-muted'>Average delivery cost in filtered data: {cost:,.2f}</p> | |
| """ | |
| def group_summary(df, col): | |
| return ( | |
| df.groupby(col, observed=False) | |
| .agg( | |
| deliveries=(col, "size"), | |
| delay_rate=("calculated_delay", lambda x: round((x.eq("yes").mean() * 100), 2)), | |
| avg_delay_hours=("delay_hours", "mean"), | |
| avg_delay_score=("delay_score", "mean"), | |
| avg_distance_km=("distance_km", "mean"), | |
| ) | |
| .round(2) | |
| .sort_values(["delay_rate", "avg_delay_hours"], ascending=False) | |
| .reset_index() | |
| ) | |
| def make_charts(df): | |
| by_vehicle = group_summary(df, "vehicle_type") | |
| by_weather = group_summary(df, "weather_condition") | |
| by_region = group_summary(df, "region") | |
| by_mode = group_summary(df, "delivery_mode") | |
| fig_vehicle = px.bar(by_vehicle, x="vehicle_type", y="delay_rate", text="delay_rate", title="Delay Risk by Vehicle Type") | |
| fig_weather = px.bar(by_weather, x="weather_condition", y="avg_delay_hours", text="avg_delay_hours", title="Average Delay Hours by Weather") | |
| fig_region = px.bar(by_region, x="region", y="delay_rate", text="delay_rate", title="Delay Rate by Region") | |
| fig_mode = px.bar(by_mode, x="delivery_mode", y="avg_delay_score", text="avg_delay_score", title="Performance Score by Delivery Mode") | |
| fig_scatter = px.scatter(df.sample(min(len(df), 2000), random_state=42), x="distance_km", y="delay_hours", color="weather_condition", hover_data=["vehicle_type", "delivery_mode", "region"], title="Distance vs Delay Hours") | |
| for fig in [fig_vehicle, fig_weather, fig_region, fig_mode, fig_scatter]: | |
| fig.update_layout(template="plotly_white", height=430, margin=dict(l=40, r=20, t=60, b=40)) | |
| return fig_vehicle, fig_weather, fig_region, fig_mode, fig_scatter | |
| def train_feature_importance(df): | |
| model_cols = ["vehicle_type", "weather_condition", "delivery_mode", "region", "distance_category", "distance_km", "package_weight_kg"] | |
| model_cols = [c for c in model_cols if c in df.columns] | |
| X = df[model_cols] | |
| y = df["calculated_delay"].eq("yes").astype(int) | |
| cat = [c for c in model_cols if X[c].dtype == "object" or str(X[c].dtype) == "category"] | |
| num = [c for c in model_cols if c not in cat] | |
| pre = ColumnTransformer([("cat", OneHotEncoder(handle_unknown="ignore"), cat), ("num", "passthrough", num)]) | |
| clf = RandomForestClassifier(n_estimators=80, random_state=42, max_depth=7) | |
| pipe = Pipeline([("pre", pre), ("clf", clf)]) | |
| pipe.fit(X, y) | |
| names = list(pipe.named_steps["pre"].get_feature_names_out()) | |
| importances = pipe.named_steps["clf"].feature_importances_ | |
| imp = pd.DataFrame({"factor": names, "importance": importances}).sort_values("importance", ascending=False).head(12) | |
| imp["factor"] = imp["factor"].str.replace("cat__", "", regex=False).str.replace("num__", "", regex=False) | |
| fig = px.bar(imp.sort_values("importance"), x="importance", y="factor", orientation="h", title="AI Model: Most Important Delay-Risk Drivers") | |
| fig.update_layout(template="plotly_white", height=470, margin=dict(l=120, r=20, t=60, b=40)) | |
| return fig, imp | |
| def auto_insights(df): | |
| if len(df) == 0: | |
| return "<div class='insight-box'>No data available for the selected filters.</div>" | |
| summaries = {c: group_summary(df, c) for c in ["vehicle_type", "weather_condition", "delivery_mode", "region", "distance_category"] if c in df.columns} | |
| worst = {k: v.iloc[0] for k, v in summaries.items() if len(v) > 0} | |
| best = {k: v.sort_values(["delay_rate", "avg_delay_hours"], ascending=True).iloc[0] for k, v in summaries.items() if len(v) > 0} | |
| top_risk_text = "<br>".join([f"• <b>{k.replace('_',' ').title()}</b>: highest risk = <b>{row[k]}</b> ({row['delay_rate']:.1f}% delay rate, {row['avg_delay_hours']:.2f} avg delay hours)" for k, row in worst.items()]) | |
| best_text = "<br>".join([f"• <b>{k.replace('_',' ').title()}</b>: best performer = <b>{row[k]}</b> ({row['delay_rate']:.1f}% delay rate)" for k, row in best.items()]) | |
| delay_rate = df["calculated_delay"].eq("yes").mean() * 100 | |
| recommendation = "Prioritize operational buffers for the highest-risk combinations, especially where bad weather, central routes, same-day delivery, or slower vehicle types overlap." | |
| if delay_rate > 35: | |
| recommendation += " The current filtered scenario has a high delay rate, so management should add contingency capacity and proactively communicate expected delays to customers." | |
| else: | |
| recommendation += " The current filtered scenario is relatively manageable, so management can focus on monitoring and selective process improvements." | |
| return f""" | |
| <div class='insight-box'> | |
| <h3>AI-enhanced executive interpretation</h3> | |
| <p><b>Business challenge:</b> Which operational factors create the highest delivery-delay risk, and what should management do?</p> | |
| <p><b>Highest-risk factors found in the filtered data:</b><br>{top_risk_text}</p> | |
| <p><b>Best-performing conditions:</b><br>{best_text}</p> | |
| <p><b>Management action:</b> {recommendation}</p> | |
| <p><b>Qualitative interpretation:</b> Delay risk is not only a numeric issue. It affects customer trust, service reliability, driver planning, and cost control. The dashboard therefore combines quantitative KPIs with qualitative business recommendations.</p> | |
| </div> | |
| """ | |
| def update_dashboard(file_obj, vehicle, weather, mode, region): | |
| df = load_and_prepare(file_obj) | |
| fdf = filter_df(df, vehicle, weather, mode, region) | |
| if len(fdf) == 0: | |
| raise gr.Error("Your filters produced no rows. Select fewer filters.") | |
| figs = make_charts(fdf) | |
| model_fig, imp = train_feature_importance(fdf) | |
| sample = fdf.head(15) | |
| tables = [group_summary(fdf, c) for c in ["vehicle_type", "weather_condition", "region", "delivery_mode", "distance_category"]] | |
| return (kpi_html(fdf), auto_insights(fdf), *figs, model_fig, *tables, sample) | |
| def choices_from_data(file_obj=None): | |
| df = load_and_prepare(file_obj) | |
| return [ | |
| gr.update(choices=sorted(df["vehicle_type"].dropna().unique().tolist()), value=[]), | |
| gr.update(choices=sorted(df["weather_condition"].dropna().unique().tolist()), value=[]), | |
| gr.update(choices=sorted(df["delivery_mode"].dropna().unique().tolist()), value=[]), | |
| gr.update(choices=sorted(df["region"].dropna().unique().tolist()), value=[]), | |
| ] | |
| def simulate_delivery(distance, weight, vehicle, weather, mode, region): | |
| row = pd.DataFrame({ | |
| "distance_km": [distance], "package_weight_kg": [weight], "vehicle_type": [vehicle], | |
| "weather_condition": [weather], "delivery_mode": [mode], "region": [region] | |
| }) | |
| row = create_synthetic_time_logic(row) | |
| row["delay_hours"] = (row["delivery_time_hours"] - row["expected_time_hours"]).round(2) | |
| row["delay_score"] = row["delay_hours"].apply(delay_score) | |
| row["performance_label"] = row["delay_score"].apply(performance_label) | |
| risk = "HIGH RISK" if row.loc[0, "delay_hours"] > 0 else "LOW RISK" | |
| return f""" | |
| ### Simulation Result | |
| - Expected delivery time: **{row.loc[0, 'expected_time_hours']:.2f} hours** | |
| - Predicted actual delivery time: **{row.loc[0, 'delivery_time_hours']:.2f} hours** | |
| - Predicted delay: **{row.loc[0, 'delay_hours']:.2f} hours** | |
| - Delay score: **{row.loc[0, 'delay_score']}/5** | |
| - Performance label: **{row.loc[0, 'performance_label'].title()}** | |
| - Risk classification: **{risk}** | |
| """ | |
| def download_summary(file_obj, vehicle, weather, mode, region): | |
| df = load_and_prepare(file_obj) | |
| fdf = filter_df(df, vehicle, weather, mode, region) | |
| summary = { | |
| "rows_analyzed": len(fdf), | |
| "delay_rate_percent": round(fdf["calculated_delay"].eq("yes").mean() * 100, 2), | |
| "average_delay_hours": round(fdf["delay_hours"].mean(), 2), | |
| "average_delay_score": round(fdf["delay_score"].mean(), 2), | |
| } | |
| lines = ["Delivery Delay Risk Executive Summary", "", "KPIs:"] | |
| for k, v in summary.items(): | |
| lines.append(f"- {k.replace('_', ' ').title()}: {v}") | |
| lines += ["", "Highest-risk groups:"] | |
| for c in ["vehicle_type", "weather_condition", "delivery_mode", "region", "distance_category"]: | |
| tab = group_summary(fdf, c) | |
| row = tab.iloc[0] | |
| lines.append(f"- {c}: {row[c]} | delay rate {row['delay_rate']}% | avg delay {row['avg_delay_hours']}h") | |
| lines += ["", "Recommended actions:", "- Add operational buffers for high-risk weather and region combinations.", "- Match faster vehicle types to same-day and express deliveries.", "- Use the simulator before accepting risky delivery promises.", "- Monitor delay score weekly as an operational KPI."] | |
| tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode="w", encoding="utf-8") | |
| tmp.write("\n".join(lines)) | |
| tmp.close() | |
| return tmp.name | |
| base_df = load_and_prepare(None) | |
| vehicle_choices = sorted(base_df["vehicle_type"].dropna().unique().tolist()) | |
| weather_choices = sorted(base_df["weather_condition"].dropna().unique().tolist()) | |
| mode_choices = sorted(base_df["delivery_mode"].dropna().unique().tolist()) | |
| region_choices = sorted(base_df["region"].dropna().unique().tolist()) | |
| with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", neutral_hue="slate"), css=CUSTOM_CSS, title="Delivery Delay Risk Dashboard") as demo: | |
| gr.Markdown(""" | |
| # 🚚 Delivery Delay Risk Intelligence Dashboard | |
| **AI-enhanced operations dashboard for identifying delivery delay risk factors and management actions.** | |
| Upload your CSV or use the included dataset. The app cleans the data, generates realistic delivery-time logic, calculates delay risk, visualizes operational drivers, simulates new deliveries, and creates an executive summary. | |
| """) | |
| with gr.Row(): | |
| file_input = gr.File(label="Optional: upload your real-world/found delivery CSV", file_types=[".csv"]) | |
| refresh_btn = gr.Button("Load / refresh data", variant="primary") | |
| with gr.Accordion("Filters", open=True): | |
| with gr.Row(): | |
| vehicle_filter = gr.Dropdown(vehicle_choices, label="Vehicle type", multiselect=True) | |
| weather_filter = gr.Dropdown(weather_choices, label="Weather condition", multiselect=True) | |
| mode_filter = gr.Dropdown(mode_choices, label="Delivery mode", multiselect=True) | |
| region_filter = gr.Dropdown(region_choices, label="Region", multiselect=True) | |
| kpis = gr.HTML() | |
| insights = gr.HTML() | |
| with gr.Tab("Interactive dashboard"): | |
| with gr.Row(): | |
| fig_vehicle = gr.Plot() | |
| fig_weather = gr.Plot() | |
| with gr.Row(): | |
| fig_region = gr.Plot() | |
| fig_mode = gr.Plot() | |
| fig_scatter = gr.Plot() | |
| with gr.Tab("AI risk-driver model"): | |
| model_fig = gr.Plot() | |
| gr.Markdown("This section trains a simple Random Forest model inside the app to estimate which factors are most important for predicting delays.") | |
| with gr.Tab("Summary tables"): | |
| with gr.Row(): | |
| vehicle_table = gr.Dataframe(label="Vehicle performance") | |
| weather_table = gr.Dataframe(label="Weather performance") | |
| with gr.Row(): | |
| region_table = gr.Dataframe(label="Region performance") | |
| mode_table = gr.Dataframe(label="Delivery mode performance") | |
| distance_table = gr.Dataframe(label="Distance category performance") | |
| sample_table = gr.Dataframe(label="Cleaned sample data") | |
| with gr.Tab("Delivery risk simulator"): | |
| with gr.Row(): | |
| sim_distance = gr.Slider(1, 500, value=120, label="Distance km") | |
| sim_weight = gr.Slider(0.1, 60, value=10, label="Package weight kg") | |
| with gr.Row(): | |
| sim_vehicle = gr.Dropdown(vehicle_choices, value=vehicle_choices[0], label="Vehicle") | |
| sim_weather = gr.Dropdown(weather_choices, value=weather_choices[0], label="Weather") | |
| sim_mode = gr.Dropdown(mode_choices, value=mode_choices[0], label="Mode") | |
| sim_region = gr.Dropdown(region_choices, value=region_choices[0], label="Region") | |
| sim_btn = gr.Button("Simulate delivery risk", variant="primary") | |
| sim_output = gr.Markdown() | |
| with gr.Tab("Download executive summary"): | |
| gr.Markdown("Generate a short text summary for your presentation/report.") | |
| download_btn = gr.Button("Create executive summary file") | |
| download_file = gr.File(label="Download summary") | |
| outputs = [kpis, insights, fig_vehicle, fig_weather, fig_region, fig_mode, fig_scatter, model_fig, vehicle_table, weather_table, region_table, mode_table, distance_table, sample_table] | |
| refresh_btn.click(update_dashboard, inputs=[file_input, vehicle_filter, weather_filter, mode_filter, region_filter], outputs=outputs) | |
| file_input.change(choices_from_data, inputs=[file_input], outputs=[vehicle_filter, weather_filter, mode_filter, region_filter]) | |
| sim_btn.click(simulate_delivery, inputs=[sim_distance, sim_weight, sim_vehicle, sim_weather, sim_mode, sim_region], outputs=sim_output) | |
| download_btn.click(download_summary, inputs=[file_input, vehicle_filter, weather_filter, mode_filter, region_filter], outputs=download_file) | |
| demo.load(update_dashboard, inputs=[file_input, vehicle_filter, weather_filter, mode_filter, region_filter], outputs=outputs) | |
| if __name__ == "__main__": | |
| demo.launch() | |