Spaces:
Sleeping
Sleeping
File size: 1,518 Bytes
db65b8b fb121b9 db65b8b 60310bd db65b8b 60310bd fb121b9 db65b8b 60310bd db65b8b 60310bd db65b8b 60310bd db65b8b 60310bd db65b8b 60310bd db65b8b fb121b9 db65b8b fb121b9 60310bd fb121b9 db65b8b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 | import pandas as pd
import joblib
from features.log_feature_extraction import run_pipeline
MODEL_PATH = "models/failure_model.pkl"
FEATURE_PATH = "models/feature_columns.pkl"
model = joblib.load(MODEL_PATH)
feature_cols = joblib.load(FEATURE_PATH)
def predict_logs(log_file):
# run feature extraction
run_pipeline(log_file, "temp_features.csv")
df = pd.read_csv("temp_features.csv")
# keep module column for aggregation later
modules = df["module"].copy()
# ensure all required features exist
for col in feature_cols:
if col not in df.columns:
df[col] = 0
# select only model features
X = df[feature_cols]
# predict probabilities
probs = model.predict_proba(X)[:, 1]
# attach predictions back
df["module"] = modules
df["failure_probability"] = probs
# aggregate module risk
module_risk = (
df.groupby("module")["failure_probability"]
.mean()
.sort_values(ascending=False)
)
results = []
for module, prob in module_risk.items():
if prob > 0.75:
risk = "HIGH"
elif prob > 0.4:
risk = "MEDIUM"
else:
risk = "LOW"
results.append({
"module": module,
"failure_probability": float(prob),
"risk": risk
})
return {
"summary": {
"total_logs": int(len(df)),
"modules_analyzed": int(len(results))
},
"module_risk": results
} |