File size: 1,672 Bytes
1ce499f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
import json
from pathlib import Path
import joblib, numpy as np
from sklearn.ensemble import IsolationForest
from xgboost import XGBClassifier
from app.feature_engineering import FEATURE_COLUMNS, FEATURE_LABELS
BASE_DIR=Path(__file__).resolve().parents[1]
MODEL_DIR=BASE_DIR/"models"; MODEL_DIR.mkdir(exist_ok=True)
BASELINE_PATH=MODEL_DIR/"baseline_model.joblib"; ANOMALY_PATH=MODEL_DIR/"anomaly_model.joblib"; FEATURE_COLUMNS_PATH=MODEL_DIR/"feature_columns.json"
def train_models(X,y):
    clf=XGBClassifier(n_estimators=140,max_depth=5,learning_rate=0.06,subsample=0.9,colsample_bytree=0.9,eval_metric="logloss",random_state=42); clf.fit(X,y)
    anomaly=IsolationForest(n_estimators=180,contamination=0.08,random_state=42); anomaly.fit(X)
    joblib.dump(clf,BASELINE_PATH); joblib.dump(anomaly,ANOMALY_PATH); FEATURE_COLUMNS_PATH.write_text(json.dumps(FEATURE_COLUMNS), encoding="utf-8"); return str(BASELINE_PATH)
def predict_local(feature_vector):
    clf=joblib.load(BASELINE_PATH); anomaly=joblib.load(ANOMALY_PATH); x=np.array([feature_vector]); risk=float(clf.predict_proba(x)[0][1]); raw=float(anomaly.decision_function(x)[0]); anomaly_score=float(max(0.0,min(1.0,1.0-((raw+0.5)/1.0)))); final=0.72*risk+0.18*anomaly_score+0.10*min(1.0,feature_vector[10] if len(feature_vector)>10 else 0.0); return risk, anomaly_score, float(max(0.0,min(1.0,final)))
def feature_importance():
    clf=joblib.load(BASELINE_PATH); vals=clf.feature_importances_.tolist(); out=[]
    for name,value in sorted(zip(FEATURE_COLUMNS, vals), key=lambda x:x[1], reverse=True): out.append({"feature":name,"label":FEATURE_LABELS.get(name,name),"importance":float(value)})
    return out