Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import joblib | |
| from features.log_feature_extraction import run_pipeline | |
| MODEL_PATH = "models/failure_model.pkl" | |
| FEATURE_PATH = "models/feature_columns.pkl" | |
| model = joblib.load(MODEL_PATH) | |
| feature_cols = joblib.load(FEATURE_PATH) | |
| def predict_logs(log_file): | |
| # run feature extraction | |
| run_pipeline(log_file, "temp_features.csv") | |
| df = pd.read_csv("temp_features.csv") | |
| # keep module column for aggregation later | |
| modules = df["module"].copy() | |
| # ensure all required features exist | |
| for col in feature_cols: | |
| if col not in df.columns: | |
| df[col] = 0 | |
| # select only model features | |
| X = df[feature_cols] | |
| # predict probabilities | |
| probs = model.predict_proba(X)[:, 1] | |
| # attach predictions back | |
| df["module"] = modules | |
| df["failure_probability"] = probs | |
| # aggregate module risk | |
| module_risk = ( | |
| df.groupby("module")["failure_probability"] | |
| .mean() | |
| .sort_values(ascending=False) | |
| ) | |
| results = [] | |
| for module, prob in module_risk.items(): | |
| if prob > 0.75: | |
| risk = "HIGH" | |
| elif prob > 0.4: | |
| risk = "MEDIUM" | |
| else: | |
| risk = "LOW" | |
| results.append({ | |
| "module": module, | |
| "failure_probability": float(prob), | |
| "risk": risk | |
| }) | |
| return { | |
| "summary": { | |
| "total_logs": int(len(df)), | |
| "modules_analyzed": int(len(results)) | |
| }, | |
| "module_risk": results | |
| } |