Spaces:

Retheesh
/

SuperKartSalesPrediction

Sleeping

App Files Files Community

SuperKartSalesPrediction / app.py

Retheesh

Upload folder using huggingface_hub

6312228 verified 6 months ago

raw

history blame contribute delete

9.04 kB

	import numpy as np
	import joblib # For loading the serialized model
	import pandas as pd # For data manipulation
	from flask import Flask, request, jsonify # For creating the Flask API
	import os # To check if the model file exists
	import logging

	# Configure logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	logger.info("Starting SuperKart Sales Predictor API loading file...")
	# Initialize the Flask application
	superkart_sales_predictor_api = Flask("SuperKart Sales Predictor")

	# Define the path to the trained machine learning model
	model_path = "superkart_regression_model_v1.0.joblib"
	model = None
	scaler = None # Initialize scaler
	# Placeholder for training data columns and MRP min/max (replace with loading from saved files in production)
	training_columns = None
	mrp_bins = None # Use mrp_bins directly


	def load_model():
	"""
	This function loads the trained machine learning model.
	It should be called when the Flask app starts to ensure the model is ready for predictions.
	"""
	global model
	if model is None:
	try:
	logger.info(f"Loading model from {model_path}...")
	model = joblib.load(model_path)
	logger.info("Model loaded successfully.")
	except FileNotFoundError:
	logger.error(f"Error: Model file not found at {model_path}")
	except Exception as e:
	logger.error(f"An error occurred while loading the model: {e}")

	def load_scaler():
	"""
	This function loads the fitted StandardScaler.
	"""
	global scaler
	if scaler is None:
	try:
	logger.info("Loading scaler...")
	scaler_path = "scaler.joblib" # Define the path to your saved scaler
	scaler = joblib.load(scaler_path)
	logger.info("Scaler loaded successfully.")
	except FileNotFoundError:
	logger.error(f"Error: Scaler file not found at {scaler_path}")
	except Exception as e:
	logger.error(f"An error occurred while loading the scaler: {e}")

	def load_training_artifacts():
	"""
	Loads artifacts from training like column names and bin edges.
	"""
	global training_columns, mrp_bins
	try:
	# Load training column names
	training_columns_path = "training_columns.joblib"
	training_columns = joblib.load(training_columns_path)
	logger.info("Training column names loaded successfully.")

	# Load MRP bin edges
	mrp_bins_path = "mrp_bins.joblib"
	mrp_bins = joblib.load(mrp_bins_path)
	logger.info("MRP bin edges loaded successfully.")

	except FileNotFoundError as e:
	logger.error(f"Error loading training artifacts: {e}")
	except Exception as e:
	logger.error(f"An error occurred while loading training artifacts: {e}")


	# Load model, scaler, and training artifacts when the app starts
	load_model()
	load_scaler()
	load_training_artifacts()


	# Define a route for the home page (GET request)
	@superkart_sales_predictor_api.route('/')
	def home():
	"""
	This function handles GET requests to the root URL ('/') of the API.
	It returns a simple welcome message and model loading status.
	"""
	logger.info(f"Home page request")
	global model, scaler, training_columns, mrp_bins
	status_message = "Welcome to the SuperKart Sales Prediction API! "
	if model is None:
	status_message += "Model loading failed. "
	else:
	status_message += "Model loaded successfully. "
	if scaler is None:
	status_message += "Scaler loading failed. "
	else:
	status_message += "Scaler loaded successfully. "
	if training_columns is None or mrp_bins is None:
	status_message += "Training artifacts loading failed."
	else:
	status_message += "Training artifacts loaded successfully."


	return status_message


	# Define an endpoint for single sales prediction (POST request)
	@superkart_sales_predictor_api.route('/predict_sales', methods=['POST'])
	def predict_sales():
	"""
	This function handles POST requests to the '/predict_sales' endpoint.
	It expects a JSON payload containing product and store details and returns
	the predicted sales as a JSON response.
	"""
	global model, scaler, training_columns, mrp_bins

	if model is None or scaler is None or training_columns is None or mrp_bins is None:
	return jsonify({'error': 'Required artifacts (model, scaler, training columns, MRP bins) not loaded. Cannot make predictions.'}), 500

	try:
	# Get the JSON data from the request body
	input_data = request.get_json()
	logger.info(f"Received input data: {input_data}")

	# Convert the input data to a pandas DataFrame
	# Ensure the column order matches the training data
	input_df = pd.DataFrame([input_data])

	# Preprocess the input data similar to how the training data was preprocessed
	# This includes feature engineering, one-hot encoding, and scaling

	# 1. Feature Engineering
	# Calculate Years_Since_Establishment relative to a fixed year (e.g., 2025)
	input_df['Years_Since_Establishment'] = 2025 - input_df['Store_Establishment_Year']
	input_df['Product_Broad_Category'] = input_df['Product_Id'].apply(lambda x: x[:2])

	# Create 'MRP_Category' using loaded fixed bins from training data
	mrp_labels = ['Low', 'Medium', 'High']
	# Use pd.cut on the input data with the loaded fixed bins
	input_df['MRP_Category'] = pd.cut(input_df['Product_MRP'], bins=mrp_bins, labels=mrp_labels, include_lowest=True)


	# 2. One-Hot Encoding
	# Identify categorical columns (excluding the target variable and Product_Id which is now captured by Product_Broad_Category)
	categorical_cols = ['Product_Sugar_Content', 'Product_Type', 'Store_Id', 'Store_Size', 'Store_Location_City_Type', 'Store_Type', 'Product_Broad_Category', 'MRP_Category']

	# Apply one-hot encoding
	# Use pd.get_dummies which will create columns only for categories present in the input
	input_encoded = pd.get_dummies(input_df, columns=categorical_cols, drop_first=True)

	# 3. Align columns with training data
	# Ensure the order and presence of columns are the same as the training data
	# Add missing columns with default value 0 (for one-hot encoded columns not present in input)
	preprocessed_input = pd.DataFrame(columns=training_columns)
	for col in training_columns:
	if col in input_encoded.columns:
	preprocessed_input[col] = input_encoded[col]
	else:
	preprocessed_input[col] = 0

	# Ensure the order of columns is the same as training data
	preprocessed_input = preprocessed_input[training_columns]


	# 4. Scaling Numerical Features
	numerical_cols_to_scale = ['Product_Weight', 'Product_Allocated_Area', 'Product_MRP', 'Years_Since_Establishment']
	# Apply the fitted scaler to the numerical columns
	preprocessed_input[numerical_cols_to_scale] = scaler.transform(preprocessed_input[numerical_cols_to_scale])


	# Make prediction using the loaded model
	# The model was trained on log-transformed sales, so the prediction will be log-transformed
	predicted_sales_log = model.predict(preprocessed_input)[0]

	# Inverse transform the prediction to get the actual sales value
	predicted_sales = np.expm1(predicted_sales_log) # Use np.expm1 to reverse np.log1p

	# Return the prediction as a JSON response
	return jsonify({'predicted_sales': predicted_sales})

	except Exception as e:
	logger.error(f"Error during prediction: {e}")
	return jsonify({'error': str(e)}), 400

	# Define an endpoint for single sales prediction (POST request)
	@superkart_sales_predictor_api.post('/version')
	def home_version():
	"""
	This function handles GET requests to the root URL ('/') of the API.
	It returns a simple welcome message and model loading status.
	"""
	logger.info(f"Home page request")
	global model, scaler
	if model is None:
	load_model()
	if scaler is None:
	load_scaler() # Load scaler when the endpoint is called if not already loaded

	if model is None or scaler is None:
	return "Welcome to the SuperKart Sales Prediction API! Model loading failed version 1.0."
	else:
	return "Welcome to the SuperKart Sales Prediction API! Model loaded successfully version 1.0."


	# To run the Flask app (for local testing)
	if __name__ == '__main__':
	# In a production environment, you would typically use a production-ready WSGI server
	# such as Gunicorn or uWSGI.
	logger.info("About to start the SuperKart Sales Predictor API...")
	# Load the model and scaler when the app starts
	load_model()
	load_scaler()
	load_training_artifacts() # Load training artifacts as well

	superkart_sales_predictor_api.run(debug=True, host='0.0.0.0', port=7860)