File size: 1,518 Bytes
db65b8b
 
 
 
 
 
 
 
fb121b9
 
 
db65b8b
 
 
60310bd
db65b8b
 
 
 
60310bd
 
 
 
fb121b9
 
 
db65b8b
60310bd
 
db65b8b
60310bd
 
db65b8b
60310bd
 
db65b8b
 
60310bd
db65b8b
 
 
 
 
 
60310bd
 
db65b8b
 
 
 
 
 
 
 
 
fb121b9
db65b8b
 
 
 
 
 
fb121b9
 
60310bd
fb121b9
 
db65b8b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import pandas as pd
import joblib

from features.log_feature_extraction import run_pipeline

MODEL_PATH = "models/failure_model.pkl"
FEATURE_PATH = "models/feature_columns.pkl"

model = joblib.load(MODEL_PATH)
feature_cols = joblib.load(FEATURE_PATH)


def predict_logs(log_file):

    # run feature extraction
    run_pipeline(log_file, "temp_features.csv")

    df = pd.read_csv("temp_features.csv")

    # keep module column for aggregation later
    modules = df["module"].copy()

    # ensure all required features exist
    for col in feature_cols:
        if col not in df.columns:
            df[col] = 0

    # select only model features
    X = df[feature_cols]

    # predict probabilities
    probs = model.predict_proba(X)[:, 1]

    # attach predictions back
    df["module"] = modules
    df["failure_probability"] = probs

    # aggregate module risk
    module_risk = (
        df.groupby("module")["failure_probability"]
        .mean()
        .sort_values(ascending=False)
    )

    results = []

    for module, prob in module_risk.items():

        if prob > 0.75:
            risk = "HIGH"
        elif prob > 0.4:
            risk = "MEDIUM"
        else:
            risk = "LOW"

        results.append({
            "module": module,
            "failure_probability": float(prob),
            "risk": risk
        })

    return {
        "summary": {
            "total_logs": int(len(df)),
            "modules_analyzed": int(len(results))
        },
        "module_risk": results
    }