rtl-reliability-engine / api /predictor.py
abhinavvvvv's picture
fixed predictor
60310bd
import pandas as pd
import joblib
from features.log_feature_extraction import run_pipeline
MODEL_PATH = "models/failure_model.pkl"
FEATURE_PATH = "models/feature_columns.pkl"
model = joblib.load(MODEL_PATH)
feature_cols = joblib.load(FEATURE_PATH)
def predict_logs(log_file):
# run feature extraction
run_pipeline(log_file, "temp_features.csv")
df = pd.read_csv("temp_features.csv")
# keep module column for aggregation later
modules = df["module"].copy()
# ensure all required features exist
for col in feature_cols:
if col not in df.columns:
df[col] = 0
# select only model features
X = df[feature_cols]
# predict probabilities
probs = model.predict_proba(X)[:, 1]
# attach predictions back
df["module"] = modules
df["failure_probability"] = probs
# aggregate module risk
module_risk = (
df.groupby("module")["failure_probability"]
.mean()
.sort_values(ascending=False)
)
results = []
for module, prob in module_risk.items():
if prob > 0.75:
risk = "HIGH"
elif prob > 0.4:
risk = "MEDIUM"
else:
risk = "LOW"
results.append({
"module": module,
"failure_probability": float(prob),
"risk": risk
})
return {
"summary": {
"total_logs": int(len(df)),
"modules_analyzed": int(len(results))
},
"module_risk": results
}