File size: 4,105 Bytes
63043ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import gradio as gr
import pandas as pd
import shap
from sklearn.ensemble import IsolationForest
import matplotlib.pyplot as plt

# Global cache
summary_df_global = pd.DataFrame()
STATIC_FILE_PATH = "Fuel_ADT_2025_Cleaned.csv"

def plot_explanation_counts(counts_df):
    plt.figure(figsize=(10, 5))
    bars = plt.barh(counts_df['explanation'], counts_df['count'], color='skyblue')
    plt.xlabel("Count")
    plt.ylabel("Explanation")
    plt.title("Anomaly Count by Explanation")
    plt.gca().invert_yaxis()
    for bar in bars:
        width = bar.get_width()
        plt.text(width + 0.3, bar.get_y() + bar.get_height() / 2, str(int(width)), va='center', fontsize=8)
    plt.tight_layout()
    return plt

def analyze_high_fuel_anomalies(file_path):
    global summary_df_global

    df = pd.read_csv(file_path)
    df = df.dropna(subset=['loaded_drv_time_percycle', 'empty_drv_time_percycle', 'fuel_rate_cycle'])

    fuel_threshold = df['fuel_rate_cycle'].quantile(0.90)
    high_fuel_df = df[df['fuel_rate_cycle'] > fuel_threshold]

    features = high_fuel_df[['loaded_drv_time_percycle', 'empty_drv_time_percycle', 'fuel_rate_cycle']].reset_index(drop=True)
    high_fuel_df = high_fuel_df.reset_index(drop=True)

    model = IsolationForest(contamination=0.1, random_state=42)
    high_fuel_df['anomaly'] = model.fit_predict(features)

    anomalies = high_fuel_df[high_fuel_df['anomaly'] == -1].reset_index(drop=True)

    explainer = shap.Explainer(model, features)
    shap_values = explainer(features)

    summary_rows = []
    for i in range(min(100, len(anomalies))):
        row = anomalies.loc[i]
        shap_vals = shap_values[i].values

        explanation_parts = []
        if shap_vals[2] > 0.3: explanation_parts.append("slightly high fuel rate")
        elif shap_vals[2] < -0.3: explanation_parts.append("slightly low fuel rate")
        if shap_vals[0] > 0.3: explanation_parts.append("slightly long loaded drive")
        elif shap_vals[0] < -0.3: explanation_parts.append("slightly short loaded drive")
        if shap_vals[1] > 0.3: explanation_parts.append("slightly long empty drive")
        elif shap_vals[1] < -0.3: explanation_parts.append("slightly short empty drive")

        explanation = ", ".join(explanation_parts) if explanation_parts else "minor deviation"
        summary_rows.append({
            'serge_x': row['serge_x'],
            'date': row['after_calendar_x_parsed'],
            'loaded_drv_time': round(row['loaded_drv_time_percycle'], 2),
            'empty_drv_time': round(row['empty_drv_time_percycle'], 2),
            'fuel_rate': round(row['fuel_rate_cycle'], 2),
            'explanation': explanation
        })

    summary_df = pd.DataFrame(summary_rows)
    summary_df_global = summary_df.copy()

    explanation_counts = summary_df['explanation'].value_counts().reset_index()
    explanation_counts.columns = ['explanation', 'count']
    fig = plot_explanation_counts(explanation_counts)

    dropdown_choices = ["All"] + sorted(summary_df['explanation'].unique().tolist())
    return fig, gr.update(choices=dropdown_choices, value="All"), summary_df

def filter_by_explanation(selected):
    if selected == "All":
        return summary_df_global
    return summary_df_global[summary_df_global["explanation"] == selected]

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("## πŸš› High Fuel Anomaly Detector with SHAP")
    gr.Markdown("Detect and explain high fuel usage anomalies from ADT fleet data, grouped by behavior.")

    run_button = gr.Button("πŸš€ Run Analysis on Static File")
    chart_output = gr.Plot(label="πŸ“Š Anomaly Count by Explanation")
    dropdown = gr.Dropdown(label="πŸ” Filter by Explanation", choices=[], interactive=True)
    table = gr.Dataframe(label="πŸ“‹ Filtered Anomalies")

    run_button.click(fn=lambda: analyze_high_fuel_anomalies(STATIC_FILE_PATH),
                     inputs=[],
                     outputs=[chart_output, dropdown, table])

    dropdown.change(fn=filter_by_explanation,
                    inputs=dropdown,
                    outputs=table)

demo.launch()