import os,sys import certifi from dotenv import load_dotenv from src.exception.exception import NetworkSecurityException from src.logging.logger import logging from src.pipeline.training_pipeline import Trainingpipeline from fastapi import FastAPI, File, UploadFile, Request from fastapi.middleware.cors import CORSMiddleware from uvicorn import run as app_run from fastapi.responses import Response from starlette.responses import RedirectResponse import pandas as pd from src.utils.ml_utils.model.estimator import NetworkSecurityModel from contextlib import asynccontextmanager import mlflow ca = certifi.where() load_dotenv() mongo_db_uri = os.getenv("MONGO_DB_URI") from src.constant.training_pipeline import DATA_INGESTION_COLLECTION_NAME from src.constant.training_pipeline import DATA_INGESTION_DATBASE_NANE from src.utils.main_utils.utils import load_object, save_object # import pymongo # client = pymongo.MongoClient(mongo_db_uri,tlsCAFile=ca) # database = client[DATA_INGESTION_DATBASE_NANE] # collection = database[DATA_INGESTION_COLLECTION_NAME] from fastapi.templating import Jinja2Templates templates = Jinja2Templates(directory="./templates") # Persistent storage paths PERSISTENT_MODEL_DIR = "/data/models" LOCAL_MODEL_DIR = "final_model" def restore_models_from_persistent_storage(): """Restore models from HuggingFace persistent storage to local directory""" try: persistent_model = f"{PERSISTENT_MODEL_DIR}/model.pkl" persistent_preprocessor = f"{PERSISTENT_MODEL_DIR}/preprocessor.pkl" local_model = f"{LOCAL_MODEL_DIR}/model.pkl" local_preprocessor = f"{LOCAL_MODEL_DIR}/preprocessor.pkl" # Check if models exist in persistent storage if os.path.exists(persistent_model) and os.path.exists(persistent_preprocessor): # Copy from persistent storage to local directory os.makedirs(LOCAL_MODEL_DIR, exist_ok=True) import shutil shutil.copy2(persistent_model, local_model) shutil.copy2(persistent_preprocessor, local_preprocessor) logging.info("✅ Models restored from persistent storage (/data/models)") return True else: logging.warning("⚠️ No models found in persistent storage") return False except Exception as e: logging.error(f"Error restoring models from persistent storage: {e}") return False # Cache for loaded models MODEL_CACHE = {"model": None, "preprocessor": None} MLFLOW_AVAILABLE = True # Assume available, model_trainer.py handles initialization def load_models_from_mlflow(): """Load latest models from MLflow""" try: if not MLFLOW_AVAILABLE: logging.error("MLflow not available") return False logging.info("Searching for latest MLflow run...") # Get the latest run from the experiment client = mlflow.tracking.MlflowClient() # Try to get experiment, if it doesn't exist, no models are trained yet try: experiment = client.get_experiment_by_name("Default") except Exception as e: logging.warning(f"Could not get experiment: {e}") return False if experiment is None: logging.warning("No MLflow experiment found. Train model first.") return False runs = client.search_runs( experiment_ids=[experiment.experiment_id], order_by=["start_time DESC"], max_results=1 ) if not runs: logging.warning("No MLflow runs found. Train model first.") return False latest_run = runs[0] run_id = latest_run.info.run_id logging.info(f"Loading models from MLflow run: {run_id}") # Load model and preprocessor model_uri = f"runs:/{run_id}/model" preprocessor_uri = f"runs:/{run_id}/preprocessor" MODEL_CACHE["model"] = mlflow.sklearn.load_model(model_uri) MODEL_CACHE["preprocessor"] = mlflow.sklearn.load_model(preprocessor_uri) # Save to local directory as backup os.makedirs("final_model", exist_ok=True) save_object("final_model/model.pkl", MODEL_CACHE["model"]) save_object("final_model/preprocessor.pkl", MODEL_CACHE["preprocessor"]) logging.info("✅ Models loaded from MLflow and cached locally") return True except Exception as e: logging.error(f"Error loading models from MLflow: {e}") import traceback logging.error(traceback.format_exc()) return False @asynccontextmanager async def lifespan(app: FastAPI): """Initialize application on startup""" logging.info("===== Application Startup =====") # Try to restore models from persistent storage model_path = f"{LOCAL_MODEL_DIR}/model.pkl" preprocessor_path = f"{LOCAL_MODEL_DIR}/preprocessor.pkl" # Check if local models exist if os.path.exists(model_path) and os.path.exists(preprocessor_path): logging.info("✅ Models found in local directory") else: # Try to restore from persistent storage logging.info("Checking persistent storage for models...") if restore_models_from_persistent_storage(): logging.info("✅ Models restored and ready for predictions") else: logging.warning("⚠️ No models available. Please call /train endpoint first.") logging.info("✅ Application ready to serve requests") yield logging.info("===== Application Shutdown =====") app = FastAPI(lifespan=lifespan) orgin = ["*"] app.add_middleware( CORSMiddleware, allow_origins=orgin, allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # @app.get("/", tags = ["authentication"]) # async def index(): # return RedirectResponse(url="/docs") @app.get("/") async def root(): """Root endpoint with system status""" local_exists = os.path.exists(f"{LOCAL_MODEL_DIR}/model.pkl") persistent_exists = os.path.exists(f"{PERSISTENT_MODEL_DIR}/model.pkl") if local_exists or persistent_exists: model_status = "✅ Ready" else: model_status = "⚠️ Not trained - call /train first" return { "status": "running", "service": "Network Security System - Phishing Detection", "model_status": model_status, "persistent_storage": persistent_exists, "mlflow_enabled": MLFLOW_AVAILABLE, "endpoints": { "docs": "/docs", "train": "/train (trains and saves to persistent storage)", "predict": "/predict (uses persistent models)" } } @app.get("/train") async def training_route(): try: logging.info("Starting training pipeline...") training_pipeline = Trainingpipeline() training_pipeline.run_pipeline() # Clear model cache so next prediction loads fresh models MODEL_CACHE["model"] = None MODEL_CACHE["preprocessor"] = None return Response("✅ Training completed! Models logged to MLflow. Call /predict to use them.") except Exception as e: raise NetworkSecurityException(e, sys) @app.post("/predict") # predict route async def predict_route(request: Request, file: UploadFile =File(...)): try: model_path = f"{LOCAL_MODEL_DIR}/model.pkl" preprocessor_path = f"{LOCAL_MODEL_DIR}/preprocessor.pkl" # Check if models exist locally, if not try to restore from persistent storage if not (os.path.exists(model_path) and os.path.exists(preprocessor_path)): logging.info("Local models not found, restoring from persistent storage...") if not restore_models_from_persistent_storage(): return Response( "❌ No trained model available. Please call /train endpoint first.", status_code=400 ) df = pd.read_csv(file.file) # Remove target column if it exists if 'Result' in df.columns: df = df.drop(columns=['Result']) # Load models from local files preprocessor = load_object(file_path=preprocessor_path) model = load_object(file_path=model_path) NSmodel = NetworkSecurityModel(preprocessing_object=preprocessor, trained_model_object=model) y_pred = NSmodel.predict(df) df['predicted_column'] = y_pred # Save predictions df.to_csv(f"{LOCAL_MODEL_DIR}/predicted.csv") table_html = df.to_html(classes='table table-striped') return templates.TemplateResponse("table.html", {"request": request, "table": table_html}) except Exception as e: raise NetworkSecurityException(e, sys) if __name__ == "__main__": app_run(app, host="0.0.0.0", port=8080)