fanomaly / app.py
rickyt
initialize
63043ac
import gradio as gr
import pandas as pd
import shap
from sklearn.ensemble import IsolationForest
import matplotlib.pyplot as plt
# Global cache
summary_df_global = pd.DataFrame()
STATIC_FILE_PATH = "Fuel_ADT_2025_Cleaned.csv"
def plot_explanation_counts(counts_df):
plt.figure(figsize=(10, 5))
bars = plt.barh(counts_df['explanation'], counts_df['count'], color='skyblue')
plt.xlabel("Count")
plt.ylabel("Explanation")
plt.title("Anomaly Count by Explanation")
plt.gca().invert_yaxis()
for bar in bars:
width = bar.get_width()
plt.text(width + 0.3, bar.get_y() + bar.get_height() / 2, str(int(width)), va='center', fontsize=8)
plt.tight_layout()
return plt
def analyze_high_fuel_anomalies(file_path):
global summary_df_global
df = pd.read_csv(file_path)
df = df.dropna(subset=['loaded_drv_time_percycle', 'empty_drv_time_percycle', 'fuel_rate_cycle'])
fuel_threshold = df['fuel_rate_cycle'].quantile(0.90)
high_fuel_df = df[df['fuel_rate_cycle'] > fuel_threshold]
features = high_fuel_df[['loaded_drv_time_percycle', 'empty_drv_time_percycle', 'fuel_rate_cycle']].reset_index(drop=True)
high_fuel_df = high_fuel_df.reset_index(drop=True)
model = IsolationForest(contamination=0.1, random_state=42)
high_fuel_df['anomaly'] = model.fit_predict(features)
anomalies = high_fuel_df[high_fuel_df['anomaly'] == -1].reset_index(drop=True)
explainer = shap.Explainer(model, features)
shap_values = explainer(features)
summary_rows = []
for i in range(min(100, len(anomalies))):
row = anomalies.loc[i]
shap_vals = shap_values[i].values
explanation_parts = []
if shap_vals[2] > 0.3: explanation_parts.append("slightly high fuel rate")
elif shap_vals[2] < -0.3: explanation_parts.append("slightly low fuel rate")
if shap_vals[0] > 0.3: explanation_parts.append("slightly long loaded drive")
elif shap_vals[0] < -0.3: explanation_parts.append("slightly short loaded drive")
if shap_vals[1] > 0.3: explanation_parts.append("slightly long empty drive")
elif shap_vals[1] < -0.3: explanation_parts.append("slightly short empty drive")
explanation = ", ".join(explanation_parts) if explanation_parts else "minor deviation"
summary_rows.append({
'serge_x': row['serge_x'],
'date': row['after_calendar_x_parsed'],
'loaded_drv_time': round(row['loaded_drv_time_percycle'], 2),
'empty_drv_time': round(row['empty_drv_time_percycle'], 2),
'fuel_rate': round(row['fuel_rate_cycle'], 2),
'explanation': explanation
})
summary_df = pd.DataFrame(summary_rows)
summary_df_global = summary_df.copy()
explanation_counts = summary_df['explanation'].value_counts().reset_index()
explanation_counts.columns = ['explanation', 'count']
fig = plot_explanation_counts(explanation_counts)
dropdown_choices = ["All"] + sorted(summary_df['explanation'].unique().tolist())
return fig, gr.update(choices=dropdown_choices, value="All"), summary_df
def filter_by_explanation(selected):
if selected == "All":
return summary_df_global
return summary_df_global[summary_df_global["explanation"] == selected]
# Gradio Interface
with gr.Blocks() as demo:
gr.Markdown("## πŸš› High Fuel Anomaly Detector with SHAP")
gr.Markdown("Detect and explain high fuel usage anomalies from ADT fleet data, grouped by behavior.")
run_button = gr.Button("πŸš€ Run Analysis on Static File")
chart_output = gr.Plot(label="πŸ“Š Anomaly Count by Explanation")
dropdown = gr.Dropdown(label="πŸ” Filter by Explanation", choices=[], interactive=True)
table = gr.Dataframe(label="πŸ“‹ Filtered Anomalies")
run_button.click(fn=lambda: analyze_high_fuel_anomalies(STATIC_FILE_PATH),
inputs=[],
outputs=[chart_output, dropdown, table])
dropdown.change(fn=filter_by_explanation,
inputs=dropdown,
outputs=table)
demo.launch()