Adisri99's picture
Upload 26 files
1ce499f verified
import json
from pathlib import Path
import joblib, numpy as np
from sklearn.ensemble import IsolationForest
from xgboost import XGBClassifier
from app.feature_engineering import FEATURE_COLUMNS, FEATURE_LABELS
BASE_DIR=Path(__file__).resolve().parents[1]
MODEL_DIR=BASE_DIR/"models"; MODEL_DIR.mkdir(exist_ok=True)
BASELINE_PATH=MODEL_DIR/"baseline_model.joblib"; ANOMALY_PATH=MODEL_DIR/"anomaly_model.joblib"; FEATURE_COLUMNS_PATH=MODEL_DIR/"feature_columns.json"
def train_models(X,y):
clf=XGBClassifier(n_estimators=140,max_depth=5,learning_rate=0.06,subsample=0.9,colsample_bytree=0.9,eval_metric="logloss",random_state=42); clf.fit(X,y)
anomaly=IsolationForest(n_estimators=180,contamination=0.08,random_state=42); anomaly.fit(X)
joblib.dump(clf,BASELINE_PATH); joblib.dump(anomaly,ANOMALY_PATH); FEATURE_COLUMNS_PATH.write_text(json.dumps(FEATURE_COLUMNS), encoding="utf-8"); return str(BASELINE_PATH)
def predict_local(feature_vector):
clf=joblib.load(BASELINE_PATH); anomaly=joblib.load(ANOMALY_PATH); x=np.array([feature_vector]); risk=float(clf.predict_proba(x)[0][1]); raw=float(anomaly.decision_function(x)[0]); anomaly_score=float(max(0.0,min(1.0,1.0-((raw+0.5)/1.0)))); final=0.72*risk+0.18*anomaly_score+0.10*min(1.0,feature_vector[10] if len(feature_vector)>10 else 0.0); return risk, anomaly_score, float(max(0.0,min(1.0,final)))
def feature_importance():
clf=joblib.load(BASELINE_PATH); vals=clf.feature_importances_.tolist(); out=[]
for name,value in sorted(zip(FEATURE_COLUMNS, vals), key=lambda x:x[1], reverse=True): out.append({"feature":name,"label":FEATURE_LABELS.get(name,name),"importance":float(value)})
return out