abhinavvvvv commited on
Commit
60310bd
·
1 Parent(s): fb121b9

fixed predictor

Browse files
Files changed (1) hide show
  1. api/predictor.py +15 -8
api/predictor.py CHANGED
@@ -3,7 +3,6 @@ import joblib
3
 
4
  from features.log_feature_extraction import run_pipeline
5
 
6
-
7
  MODEL_PATH = "models/failure_model.pkl"
8
  FEATURE_PATH = "models/feature_columns.pkl"
9
 
@@ -13,30 +12,38 @@ feature_cols = joblib.load(FEATURE_PATH)
13
 
14
  def predict_logs(log_file):
15
 
 
16
  run_pipeline(log_file, "temp_features.csv")
17
 
18
  df = pd.read_csv("temp_features.csv")
19
 
20
- # ensure all training columns exist
 
 
 
21
  for col in feature_cols:
22
  if col not in df.columns:
23
  df[col] = 0
24
 
25
- # remove extra columns not used by model
26
- df = df[feature_cols]
27
 
28
- probs = model.predict_proba(df)[:, 1]
 
29
 
 
 
30
  df["failure_probability"] = probs
31
 
32
- results = []
33
-
34
  module_risk = (
35
  df.groupby("module")["failure_probability"]
36
  .mean()
37
  .sort_values(ascending=False)
38
  )
39
 
 
 
40
  for module, prob in module_risk.items():
41
 
42
  if prob > 0.75:
@@ -55,7 +62,7 @@ def predict_logs(log_file):
55
  return {
56
  "summary": {
57
  "total_logs": int(len(df)),
58
- "modules_analyzed": len(results)
59
  },
60
  "module_risk": results
61
  }
 
3
 
4
  from features.log_feature_extraction import run_pipeline
5
 
 
6
  MODEL_PATH = "models/failure_model.pkl"
7
  FEATURE_PATH = "models/feature_columns.pkl"
8
 
 
12
 
13
  def predict_logs(log_file):
14
 
15
+ # run feature extraction
16
  run_pipeline(log_file, "temp_features.csv")
17
 
18
  df = pd.read_csv("temp_features.csv")
19
 
20
+ # keep module column for aggregation later
21
+ modules = df["module"].copy()
22
+
23
+ # ensure all required features exist
24
  for col in feature_cols:
25
  if col not in df.columns:
26
  df[col] = 0
27
 
28
+ # select only model features
29
+ X = df[feature_cols]
30
 
31
+ # predict probabilities
32
+ probs = model.predict_proba(X)[:, 1]
33
 
34
+ # attach predictions back
35
+ df["module"] = modules
36
  df["failure_probability"] = probs
37
 
38
+ # aggregate module risk
 
39
  module_risk = (
40
  df.groupby("module")["failure_probability"]
41
  .mean()
42
  .sort_values(ascending=False)
43
  )
44
 
45
+ results = []
46
+
47
  for module, prob in module_risk.items():
48
 
49
  if prob > 0.75:
 
62
  return {
63
  "summary": {
64
  "total_logs": int(len(df)),
65
+ "modules_analyzed": int(len(results))
66
  },
67
  "module_risk": results
68
  }