Spaces:
Sleeping
Sleeping
File size: 9,042 Bytes
32e67e2 0a63305 6312228 32e67e2 6312228 32e67e2 6312228 32e67e2 0a63305 6312228 0a63305 6312228 0a63305 6312228 0a63305 32e67e2 c4f97e2 6312228 32e67e2 6312228 0a63305 6312228 9a844f5 6312228 9a844f5 6312228 9a844f5 c4f97e2 6312228 c4f97e2 32e67e2 6312228 32e67e2 6312228 0a63305 6312228 32e67e2 0a63305 32e67e2 0a63305 6312228 0a63305 6312228 0a63305 6312228 0a63305 6312228 0a63305 6312228 0a63305 6312228 0a63305 6312228 0a63305 6312228 32e67e2 0a63305 6312228 0a63305 32e67e2 0a63305 32e67e2 6312228 32e67e2 0a63305 6312228 32e67e2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 |
import numpy as np
import joblib # For loading the serialized model
import pandas as pd # For data manipulation
from flask import Flask, request, jsonify # For creating the Flask API
import os # To check if the model file exists
import logging
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
logger.info("Starting SuperKart Sales Predictor API loading file...")
# Initialize the Flask application
superkart_sales_predictor_api = Flask("SuperKart Sales Predictor")
# Define the path to the trained machine learning model
model_path = "superkart_regression_model_v1.0.joblib"
model = None
scaler = None # Initialize scaler
# Placeholder for training data columns and MRP min/max (replace with loading from saved files in production)
training_columns = None
mrp_bins = None # Use mrp_bins directly
def load_model():
"""
This function loads the trained machine learning model.
It should be called when the Flask app starts to ensure the model is ready for predictions.
"""
global model
if model is None:
try:
logger.info(f"Loading model from {model_path}...")
model = joblib.load(model_path)
logger.info("Model loaded successfully.")
except FileNotFoundError:
logger.error(f"Error: Model file not found at {model_path}")
except Exception as e:
logger.error(f"An error occurred while loading the model: {e}")
def load_scaler():
"""
This function loads the fitted StandardScaler.
"""
global scaler
if scaler is None:
try:
logger.info("Loading scaler...")
scaler_path = "scaler.joblib" # Define the path to your saved scaler
scaler = joblib.load(scaler_path)
logger.info("Scaler loaded successfully.")
except FileNotFoundError:
logger.error(f"Error: Scaler file not found at {scaler_path}")
except Exception as e:
logger.error(f"An error occurred while loading the scaler: {e}")
def load_training_artifacts():
"""
Loads artifacts from training like column names and bin edges.
"""
global training_columns, mrp_bins
try:
# Load training column names
training_columns_path = "training_columns.joblib"
training_columns = joblib.load(training_columns_path)
logger.info("Training column names loaded successfully.")
# Load MRP bin edges
mrp_bins_path = "mrp_bins.joblib"
mrp_bins = joblib.load(mrp_bins_path)
logger.info("MRP bin edges loaded successfully.")
except FileNotFoundError as e:
logger.error(f"Error loading training artifacts: {e}")
except Exception as e:
logger.error(f"An error occurred while loading training artifacts: {e}")
# Load model, scaler, and training artifacts when the app starts
load_model()
load_scaler()
load_training_artifacts()
# Define a route for the home page (GET request)
@superkart_sales_predictor_api.route('/')
def home():
"""
This function handles GET requests to the root URL ('/') of the API.
It returns a simple welcome message and model loading status.
"""
logger.info(f"Home page request")
global model, scaler, training_columns, mrp_bins
status_message = "Welcome to the SuperKart Sales Prediction API! "
if model is None:
status_message += "Model loading failed. "
else:
status_message += "Model loaded successfully. "
if scaler is None:
status_message += "Scaler loading failed. "
else:
status_message += "Scaler loaded successfully. "
if training_columns is None or mrp_bins is None:
status_message += "Training artifacts loading failed."
else:
status_message += "Training artifacts loaded successfully."
return status_message
# Define an endpoint for single sales prediction (POST request)
@superkart_sales_predictor_api.route('/predict_sales', methods=['POST'])
def predict_sales():
"""
This function handles POST requests to the '/predict_sales' endpoint.
It expects a JSON payload containing product and store details and returns
the predicted sales as a JSON response.
"""
global model, scaler, training_columns, mrp_bins
if model is None or scaler is None or training_columns is None or mrp_bins is None:
return jsonify({'error': 'Required artifacts (model, scaler, training columns, MRP bins) not loaded. Cannot make predictions.'}), 500
try:
# Get the JSON data from the request body
input_data = request.get_json()
logger.info(f"Received input data: {input_data}")
# Convert the input data to a pandas DataFrame
# Ensure the column order matches the training data
input_df = pd.DataFrame([input_data])
# Preprocess the input data similar to how the training data was preprocessed
# This includes feature engineering, one-hot encoding, and scaling
# 1. Feature Engineering
# Calculate Years_Since_Establishment relative to a fixed year (e.g., 2025)
input_df['Years_Since_Establishment'] = 2025 - input_df['Store_Establishment_Year']
input_df['Product_Broad_Category'] = input_df['Product_Id'].apply(lambda x: x[:2])
# Create 'MRP_Category' using loaded fixed bins from training data
mrp_labels = ['Low', 'Medium', 'High']
# Use pd.cut on the input data with the loaded fixed bins
input_df['MRP_Category'] = pd.cut(input_df['Product_MRP'], bins=mrp_bins, labels=mrp_labels, include_lowest=True)
# 2. One-Hot Encoding
# Identify categorical columns (excluding the target variable and Product_Id which is now captured by Product_Broad_Category)
categorical_cols = ['Product_Sugar_Content', 'Product_Type', 'Store_Id', 'Store_Size', 'Store_Location_City_Type', 'Store_Type', 'Product_Broad_Category', 'MRP_Category']
# Apply one-hot encoding
# Use pd.get_dummies which will create columns only for categories present in the input
input_encoded = pd.get_dummies(input_df, columns=categorical_cols, drop_first=True)
# 3. Align columns with training data
# Ensure the order and presence of columns are the same as the training data
# Add missing columns with default value 0 (for one-hot encoded columns not present in input)
preprocessed_input = pd.DataFrame(columns=training_columns)
for col in training_columns:
if col in input_encoded.columns:
preprocessed_input[col] = input_encoded[col]
else:
preprocessed_input[col] = 0
# Ensure the order of columns is the same as training data
preprocessed_input = preprocessed_input[training_columns]
# 4. Scaling Numerical Features
numerical_cols_to_scale = ['Product_Weight', 'Product_Allocated_Area', 'Product_MRP', 'Years_Since_Establishment']
# Apply the fitted scaler to the numerical columns
preprocessed_input[numerical_cols_to_scale] = scaler.transform(preprocessed_input[numerical_cols_to_scale])
# Make prediction using the loaded model
# The model was trained on log-transformed sales, so the prediction will be log-transformed
predicted_sales_log = model.predict(preprocessed_input)[0]
# Inverse transform the prediction to get the actual sales value
predicted_sales = np.expm1(predicted_sales_log) # Use np.expm1 to reverse np.log1p
# Return the prediction as a JSON response
return jsonify({'predicted_sales': predicted_sales})
except Exception as e:
logger.error(f"Error during prediction: {e}")
return jsonify({'error': str(e)}), 400
# Define an endpoint for single sales prediction (POST request)
@superkart_sales_predictor_api.post('/version')
def home_version():
"""
This function handles GET requests to the root URL ('/') of the API.
It returns a simple welcome message and model loading status.
"""
logger.info(f"Home page request")
global model, scaler
if model is None:
load_model()
if scaler is None:
load_scaler() # Load scaler when the endpoint is called if not already loaded
if model is None or scaler is None:
return "Welcome to the SuperKart Sales Prediction API! Model loading failed version 1.0."
else:
return "Welcome to the SuperKart Sales Prediction API! Model loaded successfully version 1.0."
# To run the Flask app (for local testing)
if __name__ == '__main__':
# In a production environment, you would typically use a production-ready WSGI server
# such as Gunicorn or uWSGI.
logger.info("About to start the SuperKart Sales Predictor API...")
# Load the model and scaler when the app starts
load_model()
load_scaler()
load_training_artifacts() # Load training artifacts as well
superkart_sales_predictor_api.run(debug=True, host='0.0.0.0', port=7860)
|