import numpy as np import joblib # For loading the serialized model import pandas as pd # For data manipulation from flask import Flask, request, jsonify # For creating the Flask API import os # To check if the model file exists import logging # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) logger.info("Starting SuperKart Sales Predictor API loading file...") # Initialize the Flask application superkart_sales_predictor_api = Flask("SuperKart Sales Predictor") # Define the path to the trained machine learning model model_path = "superkart_regression_model_v1.0.joblib" model = None scaler = None # Initialize scaler # Placeholder for training data columns and MRP min/max (replace with loading from saved files in production) training_columns = None mrp_bins = None # Use mrp_bins directly def load_model(): """ This function loads the trained machine learning model. It should be called when the Flask app starts to ensure the model is ready for predictions. """ global model if model is None: try: logger.info(f"Loading model from {model_path}...") model = joblib.load(model_path) logger.info("Model loaded successfully.") except FileNotFoundError: logger.error(f"Error: Model file not found at {model_path}") except Exception as e: logger.error(f"An error occurred while loading the model: {e}") def load_scaler(): """ This function loads the fitted StandardScaler. """ global scaler if scaler is None: try: logger.info("Loading scaler...") scaler_path = "scaler.joblib" # Define the path to your saved scaler scaler = joblib.load(scaler_path) logger.info("Scaler loaded successfully.") except FileNotFoundError: logger.error(f"Error: Scaler file not found at {scaler_path}") except Exception as e: logger.error(f"An error occurred while loading the scaler: {e}") def load_training_artifacts(): """ Loads artifacts from training like column names and bin edges. """ global training_columns, mrp_bins try: # Load training column names training_columns_path = "training_columns.joblib" training_columns = joblib.load(training_columns_path) logger.info("Training column names loaded successfully.") # Load MRP bin edges mrp_bins_path = "mrp_bins.joblib" mrp_bins = joblib.load(mrp_bins_path) logger.info("MRP bin edges loaded successfully.") except FileNotFoundError as e: logger.error(f"Error loading training artifacts: {e}") except Exception as e: logger.error(f"An error occurred while loading training artifacts: {e}") # Load model, scaler, and training artifacts when the app starts load_model() load_scaler() load_training_artifacts() # Define a route for the home page (GET request) @superkart_sales_predictor_api.route('/') def home(): """ This function handles GET requests to the root URL ('/') of the API. It returns a simple welcome message and model loading status. """ logger.info(f"Home page request") global model, scaler, training_columns, mrp_bins status_message = "Welcome to the SuperKart Sales Prediction API! " if model is None: status_message += "Model loading failed. " else: status_message += "Model loaded successfully. " if scaler is None: status_message += "Scaler loading failed. " else: status_message += "Scaler loaded successfully. " if training_columns is None or mrp_bins is None: status_message += "Training artifacts loading failed." else: status_message += "Training artifacts loaded successfully." return status_message # Define an endpoint for single sales prediction (POST request) @superkart_sales_predictor_api.route('/predict_sales', methods=['POST']) def predict_sales(): """ This function handles POST requests to the '/predict_sales' endpoint. It expects a JSON payload containing product and store details and returns the predicted sales as a JSON response. """ global model, scaler, training_columns, mrp_bins if model is None or scaler is None or training_columns is None or mrp_bins is None: return jsonify({'error': 'Required artifacts (model, scaler, training columns, MRP bins) not loaded. Cannot make predictions.'}), 500 try: # Get the JSON data from the request body input_data = request.get_json() logger.info(f"Received input data: {input_data}") # Convert the input data to a pandas DataFrame # Ensure the column order matches the training data input_df = pd.DataFrame([input_data]) # Preprocess the input data similar to how the training data was preprocessed # This includes feature engineering, one-hot encoding, and scaling # 1. Feature Engineering # Calculate Years_Since_Establishment relative to a fixed year (e.g., 2025) input_df['Years_Since_Establishment'] = 2025 - input_df['Store_Establishment_Year'] input_df['Product_Broad_Category'] = input_df['Product_Id'].apply(lambda x: x[:2]) # Create 'MRP_Category' using loaded fixed bins from training data mrp_labels = ['Low', 'Medium', 'High'] # Use pd.cut on the input data with the loaded fixed bins input_df['MRP_Category'] = pd.cut(input_df['Product_MRP'], bins=mrp_bins, labels=mrp_labels, include_lowest=True) # 2. One-Hot Encoding # Identify categorical columns (excluding the target variable and Product_Id which is now captured by Product_Broad_Category) categorical_cols = ['Product_Sugar_Content', 'Product_Type', 'Store_Id', 'Store_Size', 'Store_Location_City_Type', 'Store_Type', 'Product_Broad_Category', 'MRP_Category'] # Apply one-hot encoding # Use pd.get_dummies which will create columns only for categories present in the input input_encoded = pd.get_dummies(input_df, columns=categorical_cols, drop_first=True) # 3. Align columns with training data # Ensure the order and presence of columns are the same as the training data # Add missing columns with default value 0 (for one-hot encoded columns not present in input) preprocessed_input = pd.DataFrame(columns=training_columns) for col in training_columns: if col in input_encoded.columns: preprocessed_input[col] = input_encoded[col] else: preprocessed_input[col] = 0 # Ensure the order of columns is the same as training data preprocessed_input = preprocessed_input[training_columns] # 4. Scaling Numerical Features numerical_cols_to_scale = ['Product_Weight', 'Product_Allocated_Area', 'Product_MRP', 'Years_Since_Establishment'] # Apply the fitted scaler to the numerical columns preprocessed_input[numerical_cols_to_scale] = scaler.transform(preprocessed_input[numerical_cols_to_scale]) # Make prediction using the loaded model # The model was trained on log-transformed sales, so the prediction will be log-transformed predicted_sales_log = model.predict(preprocessed_input)[0] # Inverse transform the prediction to get the actual sales value predicted_sales = np.expm1(predicted_sales_log) # Use np.expm1 to reverse np.log1p # Return the prediction as a JSON response return jsonify({'predicted_sales': predicted_sales}) except Exception as e: logger.error(f"Error during prediction: {e}") return jsonify({'error': str(e)}), 400 # Define an endpoint for single sales prediction (POST request) @superkart_sales_predictor_api.post('/version') def home_version(): """ This function handles GET requests to the root URL ('/') of the API. It returns a simple welcome message and model loading status. """ logger.info(f"Home page request") global model, scaler if model is None: load_model() if scaler is None: load_scaler() # Load scaler when the endpoint is called if not already loaded if model is None or scaler is None: return "Welcome to the SuperKart Sales Prediction API! Model loading failed version 1.0." else: return "Welcome to the SuperKart Sales Prediction API! Model loaded successfully version 1.0." # To run the Flask app (for local testing) if __name__ == '__main__': # In a production environment, you would typically use a production-ready WSGI server # such as Gunicorn or uWSGI. logger.info("About to start the SuperKart Sales Predictor API...") # Load the model and scaler when the app starts load_model() load_scaler() load_training_artifacts() # Load training artifacts as well superkart_sales_predictor_api.run(debug=True, host='0.0.0.0', port=7860)