import json from pathlib import Path import joblib, numpy as np from sklearn.ensemble import IsolationForest from xgboost import XGBClassifier from app.feature_engineering import FEATURE_COLUMNS, FEATURE_LABELS BASE_DIR=Path(__file__).resolve().parents[1] MODEL_DIR=BASE_DIR/"models"; MODEL_DIR.mkdir(exist_ok=True) BASELINE_PATH=MODEL_DIR/"baseline_model.joblib"; ANOMALY_PATH=MODEL_DIR/"anomaly_model.joblib"; FEATURE_COLUMNS_PATH=MODEL_DIR/"feature_columns.json" def train_models(X,y): clf=XGBClassifier(n_estimators=140,max_depth=5,learning_rate=0.06,subsample=0.9,colsample_bytree=0.9,eval_metric="logloss",random_state=42); clf.fit(X,y) anomaly=IsolationForest(n_estimators=180,contamination=0.08,random_state=42); anomaly.fit(X) joblib.dump(clf,BASELINE_PATH); joblib.dump(anomaly,ANOMALY_PATH); FEATURE_COLUMNS_PATH.write_text(json.dumps(FEATURE_COLUMNS), encoding="utf-8"); return str(BASELINE_PATH) def predict_local(feature_vector): clf=joblib.load(BASELINE_PATH); anomaly=joblib.load(ANOMALY_PATH); x=np.array([feature_vector]); risk=float(clf.predict_proba(x)[0][1]); raw=float(anomaly.decision_function(x)[0]); anomaly_score=float(max(0.0,min(1.0,1.0-((raw+0.5)/1.0)))); final=0.72*risk+0.18*anomaly_score+0.10*min(1.0,feature_vector[10] if len(feature_vector)>10 else 0.0); return risk, anomaly_score, float(max(0.0,min(1.0,final))) def feature_importance(): clf=joblib.load(BASELINE_PATH); vals=clf.feature_importances_.tolist(); out=[] for name,value in sorted(zip(FEATURE_COLUMNS, vals), key=lambda x:x[1], reverse=True): out.append({"feature":name,"label":FEATURE_LABELS.get(name,name),"importance":float(value)}) return out