Spaces:
Sleeping
Sleeping
Commit ·
60310bd
1
Parent(s): fb121b9
fixed predictor
Browse files- api/predictor.py +15 -8
api/predictor.py
CHANGED
|
@@ -3,7 +3,6 @@ import joblib
|
|
| 3 |
|
| 4 |
from features.log_feature_extraction import run_pipeline
|
| 5 |
|
| 6 |
-
|
| 7 |
MODEL_PATH = "models/failure_model.pkl"
|
| 8 |
FEATURE_PATH = "models/feature_columns.pkl"
|
| 9 |
|
|
@@ -13,30 +12,38 @@ feature_cols = joblib.load(FEATURE_PATH)
|
|
| 13 |
|
| 14 |
def predict_logs(log_file):
|
| 15 |
|
|
|
|
| 16 |
run_pipeline(log_file, "temp_features.csv")
|
| 17 |
|
| 18 |
df = pd.read_csv("temp_features.csv")
|
| 19 |
|
| 20 |
-
#
|
|
|
|
|
|
|
|
|
|
| 21 |
for col in feature_cols:
|
| 22 |
if col not in df.columns:
|
| 23 |
df[col] = 0
|
| 24 |
|
| 25 |
-
#
|
| 26 |
-
|
| 27 |
|
| 28 |
-
|
|
|
|
| 29 |
|
|
|
|
|
|
|
| 30 |
df["failure_probability"] = probs
|
| 31 |
|
| 32 |
-
|
| 33 |
-
|
| 34 |
module_risk = (
|
| 35 |
df.groupby("module")["failure_probability"]
|
| 36 |
.mean()
|
| 37 |
.sort_values(ascending=False)
|
| 38 |
)
|
| 39 |
|
|
|
|
|
|
|
| 40 |
for module, prob in module_risk.items():
|
| 41 |
|
| 42 |
if prob > 0.75:
|
|
@@ -55,7 +62,7 @@ def predict_logs(log_file):
|
|
| 55 |
return {
|
| 56 |
"summary": {
|
| 57 |
"total_logs": int(len(df)),
|
| 58 |
-
"modules_analyzed": len(results)
|
| 59 |
},
|
| 60 |
"module_risk": results
|
| 61 |
}
|
|
|
|
| 3 |
|
| 4 |
from features.log_feature_extraction import run_pipeline
|
| 5 |
|
|
|
|
| 6 |
MODEL_PATH = "models/failure_model.pkl"
|
| 7 |
FEATURE_PATH = "models/feature_columns.pkl"
|
| 8 |
|
|
|
|
| 12 |
|
| 13 |
def predict_logs(log_file):
|
| 14 |
|
| 15 |
+
# run feature extraction
|
| 16 |
run_pipeline(log_file, "temp_features.csv")
|
| 17 |
|
| 18 |
df = pd.read_csv("temp_features.csv")
|
| 19 |
|
| 20 |
+
# keep module column for aggregation later
|
| 21 |
+
modules = df["module"].copy()
|
| 22 |
+
|
| 23 |
+
# ensure all required features exist
|
| 24 |
for col in feature_cols:
|
| 25 |
if col not in df.columns:
|
| 26 |
df[col] = 0
|
| 27 |
|
| 28 |
+
# select only model features
|
| 29 |
+
X = df[feature_cols]
|
| 30 |
|
| 31 |
+
# predict probabilities
|
| 32 |
+
probs = model.predict_proba(X)[:, 1]
|
| 33 |
|
| 34 |
+
# attach predictions back
|
| 35 |
+
df["module"] = modules
|
| 36 |
df["failure_probability"] = probs
|
| 37 |
|
| 38 |
+
# aggregate module risk
|
|
|
|
| 39 |
module_risk = (
|
| 40 |
df.groupby("module")["failure_probability"]
|
| 41 |
.mean()
|
| 42 |
.sort_values(ascending=False)
|
| 43 |
)
|
| 44 |
|
| 45 |
+
results = []
|
| 46 |
+
|
| 47 |
for module, prob in module_risk.items():
|
| 48 |
|
| 49 |
if prob > 0.75:
|
|
|
|
| 62 |
return {
|
| 63 |
"summary": {
|
| 64 |
"total_logs": int(len(df)),
|
| 65 |
+
"modules_analyzed": int(len(results))
|
| 66 |
},
|
| 67 |
"module_risk": results
|
| 68 |
}
|