Retheesh's picture
Upload folder using huggingface_hub
6312228 verified
import numpy as np
import joblib # For loading the serialized model
import pandas as pd # For data manipulation
from flask import Flask, request, jsonify # For creating the Flask API
import os # To check if the model file exists
import logging
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
logger.info("Starting SuperKart Sales Predictor API loading file...")
# Initialize the Flask application
superkart_sales_predictor_api = Flask("SuperKart Sales Predictor")
# Define the path to the trained machine learning model
model_path = "superkart_regression_model_v1.0.joblib"
model = None
scaler = None # Initialize scaler
# Placeholder for training data columns and MRP min/max (replace with loading from saved files in production)
training_columns = None
mrp_bins = None # Use mrp_bins directly
def load_model():
"""
This function loads the trained machine learning model.
It should be called when the Flask app starts to ensure the model is ready for predictions.
"""
global model
if model is None:
try:
logger.info(f"Loading model from {model_path}...")
model = joblib.load(model_path)
logger.info("Model loaded successfully.")
except FileNotFoundError:
logger.error(f"Error: Model file not found at {model_path}")
except Exception as e:
logger.error(f"An error occurred while loading the model: {e}")
def load_scaler():
"""
This function loads the fitted StandardScaler.
"""
global scaler
if scaler is None:
try:
logger.info("Loading scaler...")
scaler_path = "scaler.joblib" # Define the path to your saved scaler
scaler = joblib.load(scaler_path)
logger.info("Scaler loaded successfully.")
except FileNotFoundError:
logger.error(f"Error: Scaler file not found at {scaler_path}")
except Exception as e:
logger.error(f"An error occurred while loading the scaler: {e}")
def load_training_artifacts():
"""
Loads artifacts from training like column names and bin edges.
"""
global training_columns, mrp_bins
try:
# Load training column names
training_columns_path = "training_columns.joblib"
training_columns = joblib.load(training_columns_path)
logger.info("Training column names loaded successfully.")
# Load MRP bin edges
mrp_bins_path = "mrp_bins.joblib"
mrp_bins = joblib.load(mrp_bins_path)
logger.info("MRP bin edges loaded successfully.")
except FileNotFoundError as e:
logger.error(f"Error loading training artifacts: {e}")
except Exception as e:
logger.error(f"An error occurred while loading training artifacts: {e}")
# Load model, scaler, and training artifacts when the app starts
load_model()
load_scaler()
load_training_artifacts()
# Define a route for the home page (GET request)
@superkart_sales_predictor_api.route('/')
def home():
"""
This function handles GET requests to the root URL ('/') of the API.
It returns a simple welcome message and model loading status.
"""
logger.info(f"Home page request")
global model, scaler, training_columns, mrp_bins
status_message = "Welcome to the SuperKart Sales Prediction API! "
if model is None:
status_message += "Model loading failed. "
else:
status_message += "Model loaded successfully. "
if scaler is None:
status_message += "Scaler loading failed. "
else:
status_message += "Scaler loaded successfully. "
if training_columns is None or mrp_bins is None:
status_message += "Training artifacts loading failed."
else:
status_message += "Training artifacts loaded successfully."
return status_message
# Define an endpoint for single sales prediction (POST request)
@superkart_sales_predictor_api.route('/predict_sales', methods=['POST'])
def predict_sales():
"""
This function handles POST requests to the '/predict_sales' endpoint.
It expects a JSON payload containing product and store details and returns
the predicted sales as a JSON response.
"""
global model, scaler, training_columns, mrp_bins
if model is None or scaler is None or training_columns is None or mrp_bins is None:
return jsonify({'error': 'Required artifacts (model, scaler, training columns, MRP bins) not loaded. Cannot make predictions.'}), 500
try:
# Get the JSON data from the request body
input_data = request.get_json()
logger.info(f"Received input data: {input_data}")
# Convert the input data to a pandas DataFrame
# Ensure the column order matches the training data
input_df = pd.DataFrame([input_data])
# Preprocess the input data similar to how the training data was preprocessed
# This includes feature engineering, one-hot encoding, and scaling
# 1. Feature Engineering
# Calculate Years_Since_Establishment relative to a fixed year (e.g., 2025)
input_df['Years_Since_Establishment'] = 2025 - input_df['Store_Establishment_Year']
input_df['Product_Broad_Category'] = input_df['Product_Id'].apply(lambda x: x[:2])
# Create 'MRP_Category' using loaded fixed bins from training data
mrp_labels = ['Low', 'Medium', 'High']
# Use pd.cut on the input data with the loaded fixed bins
input_df['MRP_Category'] = pd.cut(input_df['Product_MRP'], bins=mrp_bins, labels=mrp_labels, include_lowest=True)
# 2. One-Hot Encoding
# Identify categorical columns (excluding the target variable and Product_Id which is now captured by Product_Broad_Category)
categorical_cols = ['Product_Sugar_Content', 'Product_Type', 'Store_Id', 'Store_Size', 'Store_Location_City_Type', 'Store_Type', 'Product_Broad_Category', 'MRP_Category']
# Apply one-hot encoding
# Use pd.get_dummies which will create columns only for categories present in the input
input_encoded = pd.get_dummies(input_df, columns=categorical_cols, drop_first=True)
# 3. Align columns with training data
# Ensure the order and presence of columns are the same as the training data
# Add missing columns with default value 0 (for one-hot encoded columns not present in input)
preprocessed_input = pd.DataFrame(columns=training_columns)
for col in training_columns:
if col in input_encoded.columns:
preprocessed_input[col] = input_encoded[col]
else:
preprocessed_input[col] = 0
# Ensure the order of columns is the same as training data
preprocessed_input = preprocessed_input[training_columns]
# 4. Scaling Numerical Features
numerical_cols_to_scale = ['Product_Weight', 'Product_Allocated_Area', 'Product_MRP', 'Years_Since_Establishment']
# Apply the fitted scaler to the numerical columns
preprocessed_input[numerical_cols_to_scale] = scaler.transform(preprocessed_input[numerical_cols_to_scale])
# Make prediction using the loaded model
# The model was trained on log-transformed sales, so the prediction will be log-transformed
predicted_sales_log = model.predict(preprocessed_input)[0]
# Inverse transform the prediction to get the actual sales value
predicted_sales = np.expm1(predicted_sales_log) # Use np.expm1 to reverse np.log1p
# Return the prediction as a JSON response
return jsonify({'predicted_sales': predicted_sales})
except Exception as e:
logger.error(f"Error during prediction: {e}")
return jsonify({'error': str(e)}), 400
# Define an endpoint for single sales prediction (POST request)
@superkart_sales_predictor_api.post('/version')
def home_version():
"""
This function handles GET requests to the root URL ('/') of the API.
It returns a simple welcome message and model loading status.
"""
logger.info(f"Home page request")
global model, scaler
if model is None:
load_model()
if scaler is None:
load_scaler() # Load scaler when the endpoint is called if not already loaded
if model is None or scaler is None:
return "Welcome to the SuperKart Sales Prediction API! Model loading failed version 1.0."
else:
return "Welcome to the SuperKart Sales Prediction API! Model loaded successfully version 1.0."
# To run the Flask app (for local testing)
if __name__ == '__main__':
# In a production environment, you would typically use a production-ready WSGI server
# such as Gunicorn or uWSGI.
logger.info("About to start the SuperKart Sales Predictor API...")
# Load the model and scaler when the app starts
load_model()
load_scaler()
load_training_artifacts() # Load training artifacts as well
superkart_sales_predictor_api.run(debug=True, host='0.0.0.0', port=7860)