import pandas as pd import joblib import shap from features.log_feature_extraction import run_pipeline MODEL_PATH = "models/failure_model.pkl" FEATURE_PATH = "models/feature_columns.pkl" def explain_logs(log_file): run_pipeline(log_file, "temp_features.csv") df = pd.read_csv("temp_features.csv") model = joblib.load(MODEL_PATH) feature_cols = joblib.load(FEATURE_PATH) X = df[feature_cols] explainer = shap.TreeExplainer(model.estimator) shap_values = explainer.shap_values(X) importance = abs(shap_values).mean(axis=0) feature_importance = sorted( zip(feature_cols, importance), key=lambda x: x[1], reverse=True )[:10] return { "top_features": [ {"feature": f, "impact": float(v)} for f, v in feature_importance ] }