File size: 843 Bytes
db65b8b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import pandas as pd
import joblib
import shap

from features.log_feature_extraction import run_pipeline


MODEL_PATH = "models/failure_model.pkl"
FEATURE_PATH = "models/feature_columns.pkl"


def explain_logs(log_file):

    run_pipeline(log_file, "temp_features.csv")

    df = pd.read_csv("temp_features.csv")

    model = joblib.load(MODEL_PATH)
    feature_cols = joblib.load(FEATURE_PATH)

    X = df[feature_cols]

    explainer = shap.TreeExplainer(model.estimator)

    shap_values = explainer.shap_values(X)

    importance = abs(shap_values).mean(axis=0)

    feature_importance = sorted(
        zip(feature_cols, importance),
        key=lambda x: x[1],
        reverse=True
    )[:10]

    return {
        "top_features": [
            {"feature": f, "impact": float(v)}
            for f, v in feature_importance
        ]
    }