Spaces:
Sleeping
Sleeping
File size: 9,025 Bytes
a21e473 1b347a0 325b47c a21e473 1b347a0 a21e473 1b347a0 fc4ec5a 1b347a0 679faff 1b347a0 5aa701f 1b347a0 5aa701f 1b347a0 5aa701f 1b347a0 1bf4542 fc4ec5a 5aa701f 1bf4542 1b347a0 1bf4542 1b347a0 5a72624 a21e473 325b47c fc4ec5a 1b347a0 325b47c 1b347a0 fc4ec5a 1b347a0 325b47c fc4ec5a 325b47c a21e473 325b47c a21e473 1b347a0 679faff 1b347a0 679faff a21e473 5a72624 fc4ec5a 1b347a0 a21e473 1b347a0 fc4ec5a 1b347a0 a21e473 1b347a0 fc4ec5a 5a72624 1b347a0 a21e473 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 | import os,sys
import certifi
from dotenv import load_dotenv
from src.exception.exception import NetworkSecurityException
from src.logging.logger import logging
from src.pipeline.training_pipeline import Trainingpipeline
from fastapi import FastAPI, File, UploadFile, Request
from fastapi.middleware.cors import CORSMiddleware
from uvicorn import run as app_run
from fastapi.responses import Response
from starlette.responses import RedirectResponse
import pandas as pd
from src.utils.ml_utils.model.estimator import NetworkSecurityModel
from contextlib import asynccontextmanager
import mlflow
ca = certifi.where()
load_dotenv()
mongo_db_uri = os.getenv("MONGO_DB_URI")
from src.constant.training_pipeline import DATA_INGESTION_COLLECTION_NAME
from src.constant.training_pipeline import DATA_INGESTION_DATBASE_NANE
from src.utils.main_utils.utils import load_object, save_object
# import pymongo
# client = pymongo.MongoClient(mongo_db_uri,tlsCAFile=ca)
# database = client[DATA_INGESTION_DATBASE_NANE]
# collection = database[DATA_INGESTION_COLLECTION_NAME]
from fastapi.templating import Jinja2Templates
templates = Jinja2Templates(directory="./templates")
# Persistent storage paths
PERSISTENT_MODEL_DIR = "/data/models"
LOCAL_MODEL_DIR = "final_model"
def restore_models_from_persistent_storage():
"""Restore models from HuggingFace persistent storage to local directory"""
try:
persistent_model = f"{PERSISTENT_MODEL_DIR}/model.pkl"
persistent_preprocessor = f"{PERSISTENT_MODEL_DIR}/preprocessor.pkl"
local_model = f"{LOCAL_MODEL_DIR}/model.pkl"
local_preprocessor = f"{LOCAL_MODEL_DIR}/preprocessor.pkl"
# Check if models exist in persistent storage
if os.path.exists(persistent_model) and os.path.exists(persistent_preprocessor):
# Copy from persistent storage to local directory
os.makedirs(LOCAL_MODEL_DIR, exist_ok=True)
import shutil
shutil.copy2(persistent_model, local_model)
shutil.copy2(persistent_preprocessor, local_preprocessor)
logging.info("β
Models restored from persistent storage (/data/models)")
return True
else:
logging.warning("β οΈ No models found in persistent storage")
return False
except Exception as e:
logging.error(f"Error restoring models from persistent storage: {e}")
return False
# Cache for loaded models
MODEL_CACHE = {"model": None, "preprocessor": None}
MLFLOW_AVAILABLE = True # Assume available, model_trainer.py handles initialization
def load_models_from_mlflow():
"""Load latest models from MLflow"""
try:
if not MLFLOW_AVAILABLE:
logging.error("MLflow not available")
return False
logging.info("Searching for latest MLflow run...")
# Get the latest run from the experiment
client = mlflow.tracking.MlflowClient()
# Try to get experiment, if it doesn't exist, no models are trained yet
try:
experiment = client.get_experiment_by_name("Default")
except Exception as e:
logging.warning(f"Could not get experiment: {e}")
return False
if experiment is None:
logging.warning("No MLflow experiment found. Train model first.")
return False
runs = client.search_runs(
experiment_ids=[experiment.experiment_id],
order_by=["start_time DESC"],
max_results=1
)
if not runs:
logging.warning("No MLflow runs found. Train model first.")
return False
latest_run = runs[0]
run_id = latest_run.info.run_id
logging.info(f"Loading models from MLflow run: {run_id}")
# Load model and preprocessor
model_uri = f"runs:/{run_id}/model"
preprocessor_uri = f"runs:/{run_id}/preprocessor"
MODEL_CACHE["model"] = mlflow.sklearn.load_model(model_uri)
MODEL_CACHE["preprocessor"] = mlflow.sklearn.load_model(preprocessor_uri)
# Save to local directory as backup
os.makedirs("final_model", exist_ok=True)
save_object("final_model/model.pkl", MODEL_CACHE["model"])
save_object("final_model/preprocessor.pkl", MODEL_CACHE["preprocessor"])
logging.info("β
Models loaded from MLflow and cached locally")
return True
except Exception as e:
logging.error(f"Error loading models from MLflow: {e}")
import traceback
logging.error(traceback.format_exc())
return False
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Initialize application on startup"""
logging.info("===== Application Startup =====")
# Try to restore models from persistent storage
model_path = f"{LOCAL_MODEL_DIR}/model.pkl"
preprocessor_path = f"{LOCAL_MODEL_DIR}/preprocessor.pkl"
# Check if local models exist
if os.path.exists(model_path) and os.path.exists(preprocessor_path):
logging.info("β
Models found in local directory")
else:
# Try to restore from persistent storage
logging.info("Checking persistent storage for models...")
if restore_models_from_persistent_storage():
logging.info("β
Models restored and ready for predictions")
else:
logging.warning("β οΈ No models available. Please call /train endpoint first.")
logging.info("β
Application ready to serve requests")
yield
logging.info("===== Application Shutdown =====")
app = FastAPI(lifespan=lifespan)
orgin = ["*"]
app.add_middleware(
CORSMiddleware,
allow_origins=orgin,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# @app.get("/", tags = ["authentication"])
# async def index():
# return RedirectResponse(url="/docs")
@app.get("/")
async def root():
"""Root endpoint with system status"""
local_exists = os.path.exists(f"{LOCAL_MODEL_DIR}/model.pkl")
persistent_exists = os.path.exists(f"{PERSISTENT_MODEL_DIR}/model.pkl")
if local_exists or persistent_exists:
model_status = "β
Ready"
else:
model_status = "β οΈ Not trained - call /train first"
return {
"status": "running",
"service": "Network Security System - Phishing Detection",
"model_status": model_status,
"persistent_storage": persistent_exists,
"mlflow_enabled": MLFLOW_AVAILABLE,
"endpoints": {
"docs": "/docs",
"train": "/train (trains and saves to persistent storage)",
"predict": "/predict (uses persistent models)"
}
}
@app.get("/train")
async def training_route():
try:
logging.info("Starting training pipeline...")
training_pipeline = Trainingpipeline()
training_pipeline.run_pipeline()
# Clear model cache so next prediction loads fresh models
MODEL_CACHE["model"] = None
MODEL_CACHE["preprocessor"] = None
return Response("β
Training completed! Models logged to MLflow. Call /predict to use them.")
except Exception as e:
raise NetworkSecurityException(e, sys)
@app.post("/predict") # predict route
async def predict_route(request: Request, file: UploadFile =File(...)):
try:
model_path = f"{LOCAL_MODEL_DIR}/model.pkl"
preprocessor_path = f"{LOCAL_MODEL_DIR}/preprocessor.pkl"
# Check if models exist locally, if not try to restore from persistent storage
if not (os.path.exists(model_path) and os.path.exists(preprocessor_path)):
logging.info("Local models not found, restoring from persistent storage...")
if not restore_models_from_persistent_storage():
return Response(
"β No trained model available. Please call /train endpoint first.",
status_code=400
)
df = pd.read_csv(file.file)
# Remove target column if it exists
if 'Result' in df.columns:
df = df.drop(columns=['Result'])
# Load models from local files
preprocessor = load_object(file_path=preprocessor_path)
model = load_object(file_path=model_path)
NSmodel = NetworkSecurityModel(preprocessing_object=preprocessor, trained_model_object=model)
y_pred = NSmodel.predict(df)
df['predicted_column'] = y_pred
# Save predictions
df.to_csv(f"{LOCAL_MODEL_DIR}/predicted.csv")
table_html = df.to_html(classes='table table-striped')
return templates.TemplateResponse("table.html", {"request": request, "table": table_html})
except Exception as e:
raise NetworkSecurityException(e, sys)
if __name__ == "__main__":
app_run(app, host="0.0.0.0", port=8080)
|