Spaces:

Kshitijk20
/

NSS

Sleeping

App Files Files Community

Kshitijk20 commited on Jan 23

Commit

fc4ec5a

1 Parent(s): 679faff

lazy model loading

Browse files

Files changed (1) hide show

app.py +65 -13

app.py CHANGED Viewed

@@ -29,6 +29,34 @@ from src.utils.main_utils.utils import load_object, save_object
 from fastapi.templating import Jinja2Templates
 templates = Jinja2Templates(directory="./templates")
 # Cache for loaded models
 MODEL_CACHE = {"model": None, "preprocessor": None}
 MLFLOW_AVAILABLE = True  # Assume available, model_trainer.py handles initialization
@@ -96,7 +124,22 @@ def load_models_from_mlflow():
 async def lifespan(app: FastAPI):
     """Initialize application on startup"""
     logging.info("===== Application Startup =====")
-    logging.info("⚠️ Models will be loaded on first /train or /predict request")
     logging.info("✅ Application ready to serve requests")
     yield
@@ -122,17 +165,24 @@ app.add_middleware(
 @app.get("/")
 async def root():
     """Root endpoint with system status"""
-    model_status = "✅ Ready" if MODEL_CACHE["model"] is not None else "⚠️ Not trained - call /train first"
     return {
         "status": "running",
         "service": "Network Security System - Phishing Detection",
         "model_status": model_status,
         "mlflow_enabled": MLFLOW_AVAILABLE,
         "endpoints": {
             "docs": "/docs",
-            "train": "/train (trains and logs to MLflow)",
-            "predict": "/predict (loads from MLflow)"
         }
     }
@@ -154,10 +204,13 @@ async def training_route():
 @app.post("/predict") # predict route
 async def predict_route(request: Request, file: UploadFile =File(...)):
     try:
-        # Check if models are loaded
-        if MODEL_CACHE["model"] is None or MODEL_CACHE["preprocessor"] is None:
-            # Try to load from MLflow
-            if not load_models_from_mlflow():
                 return Response(
                     "❌ No trained model available. Please call /train endpoint first.",
                     status_code=400
@@ -168,17 +221,16 @@ async def predict_route(request: Request, file: UploadFile =File(...)):
         if 'Result' in df.columns:
             df = df.drop(columns=['Result'])
-        # Use cached models from MLflow
-        preprocessor = MODEL_CACHE["preprocessor"]
-        model = MODEL_CACHE["model"]
         NSmodel = NetworkSecurityModel(preprocessing_object=preprocessor, trained_model_object=model)
         y_pred = NSmodel.predict(df)
         df['predicted_column'] = y_pred
         # Save predictions
-        os.makedirs("final_model", exist_ok=True)
-        df.to_csv("final_model/predicted.csv")
         table_html = df.to_html(classes='table table-striped')
         return templates.TemplateResponse("table.html", {"request": request, "table": table_html})

 from fastapi.templating import Jinja2Templates
 templates = Jinja2Templates(directory="./templates")
+# Persistent storage paths
+PERSISTENT_MODEL_DIR = "/data/models"
+LOCAL_MODEL_DIR = "final_model"
+def restore_models_from_persistent_storage():
+    """Restore models from HuggingFace persistent storage to local directory"""
+    try:
+        persistent_model = f"{PERSISTENT_MODEL_DIR}/model.pkl"
+        persistent_preprocessor = f"{PERSISTENT_MODEL_DIR}/preprocessor.pkl"
+        local_model = f"{LOCAL_MODEL_DIR}/model.pkl"
+        local_preprocessor = f"{LOCAL_MODEL_DIR}/preprocessor.pkl"
+        # Check if models exist in persistent storage
+        if os.path.exists(persistent_model) and os.path.exists(persistent_preprocessor):
+            # Copy from persistent storage to local directory
+            os.makedirs(LOCAL_MODEL_DIR, exist_ok=True)
+            import shutil
+            shutil.copy2(persistent_model, local_model)
+            shutil.copy2(persistent_preprocessor, local_preprocessor)
+            logging.info("✅ Models restored from persistent storage (/data/models)")
+            return True
+        else:
+            logging.warning("⚠️ No models found in persistent storage")
+            return False
+    except Exception as e:
+        logging.error(f"Error restoring models from persistent storage: {e}")
+        return False
 # Cache for loaded models
 MODEL_CACHE = {"model": None, "preprocessor": None}
 MLFLOW_AVAILABLE = True  # Assume available, model_trainer.py handles initialization
 async def lifespan(app: FastAPI):
     """Initialize application on startup"""
     logging.info("===== Application Startup =====")
+    # Try to restore models from persistent storage
+    model_path = f"{LOCAL_MODEL_DIR}/model.pkl"
+    preprocessor_path = f"{LOCAL_MODEL_DIR}/preprocessor.pkl"
+    # Check if local models exist
+    if os.path.exists(model_path) and os.path.exists(preprocessor_path):
+        logging.info("✅ Models found in local directory")
+    else:
+        # Try to restore from persistent storage
+        logging.info("Checking persistent storage for models...")
+        if restore_models_from_persistent_storage():
+            logging.info("✅ Models restored and ready for predictions")
+        else:
+            logging.warning("⚠️ No models available. Please call /train endpoint first.")
     logging.info("✅ Application ready to serve requests")
     yield
 @app.get("/")
 async def root():
     """Root endpoint with system status"""
+    local_exists = os.path.exists(f"{LOCAL_MODEL_DIR}/model.pkl")
+    persistent_exists = os.path.exists(f"{PERSISTENT_MODEL_DIR}/model.pkl")
+    if local_exists or persistent_exists:
+        model_status = "✅ Ready"
+    else:
+        model_status = "⚠️ Not trained - call /train first"
     return {
         "status": "running",
         "service": "Network Security System - Phishing Detection",
         "model_status": model_status,
+        "persistent_storage": persistent_exists,
         "mlflow_enabled": MLFLOW_AVAILABLE,
         "endpoints": {
             "docs": "/docs",
+            "train": "/train (trains and saves to persistent storage)",
+            "predict": "/predict (uses persistent models)"
         }
     }
 @app.post("/predict") # predict route
 async def predict_route(request: Request, file: UploadFile =File(...)):
     try:
+        model_path = f"{LOCAL_MODEL_DIR}/model.pkl"
+        preprocessor_path = f"{LOCAL_MODEL_DIR}/preprocessor.pkl"
+        # Check if models exist locally, if not try to restore from persistent storage
+        if not (os.path.exists(model_path) and os.path.exists(preprocessor_path)):
+            logging.info("Local models not found, restoring from persistent storage...")
+            if not restore_models_from_persistent_storage():
                 return Response(
                     "❌ No trained model available. Please call /train endpoint first.",
                     status_code=400
         if 'Result' in df.columns:
             df = df.drop(columns=['Result'])
+        # Load models from local files
+        preprocessor = load_object(file_path=preprocessor_path)
+        model = load_object(file_path=model_path)
         NSmodel = NetworkSecurityModel(preprocessing_object=preprocessor, trained_model_object=model)
         y_pred = NSmodel.predict(df)
         df['predicted_column'] = y_pred
         # Save predictions
+        df.to_csv(f"{LOCAL_MODEL_DIR}/predicted.csv")
         table_html = df.to_html(classes='table table-striped')
         return templates.TemplateResponse("table.html", {"request": request, "table": table_html})