File size: 9,042 Bytes
32e67e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0a63305
6312228
 
 
 
32e67e2
 
 
 
 
 
 
 
 
 
 
 
 
6312228
32e67e2
6312228
32e67e2
0a63305
 
 
 
 
 
 
 
 
 
6312228
 
 
0a63305
6312228
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0a63305
6312228
 
 
 
0a63305
32e67e2
 
 
 
 
 
 
 
c4f97e2
6312228
 
32e67e2
6312228
0a63305
6312228
9a844f5
6312228
9a844f5
6312228
 
 
 
 
9a844f5
c4f97e2
6312228
c4f97e2
 
32e67e2
6312228
32e67e2
 
 
 
 
 
6312228
0a63305
6312228
 
32e67e2
 
 
 
0a63305
32e67e2
 
 
 
 
 
 
 
0a63305
6312228
0a63305
 
 
6312228
0a63305
6312228
0a63305
 
 
 
 
 
 
 
6312228
0a63305
 
 
6312228
0a63305
6312228
0a63305
6312228
 
 
 
0a63305
 
6312228
32e67e2
 
0a63305
 
6312228
0a63305
32e67e2
 
 
 
 
 
 
 
 
 
 
 
 
0a63305
32e67e2
 
6312228
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32e67e2
 
 
 
 
0a63305
 
 
6312228
 
32e67e2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
import numpy as np
import joblib  # For loading the serialized model
import pandas as pd  # For data manipulation
from flask import Flask, request, jsonify  # For creating the Flask API
import os # To check if the model file exists
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

logger.info("Starting SuperKart Sales Predictor API loading file...")
# Initialize the Flask application
superkart_sales_predictor_api = Flask("SuperKart Sales Predictor")

# Define the path to the trained machine learning model
model_path = "superkart_regression_model_v1.0.joblib"
model = None
scaler = None # Initialize scaler
# Placeholder for training data columns and MRP min/max (replace with loading from saved files in production)
training_columns = None
mrp_bins = None # Use mrp_bins directly


def load_model():
    """
    This function loads the trained machine learning model.
    It should be called when the Flask app starts to ensure the model is ready for predictions.
    """
    global model
    if model is None:
        try:
            logger.info(f"Loading model from {model_path}...")
            model = joblib.load(model_path)
            logger.info("Model loaded successfully.")
        except FileNotFoundError:
            logger.error(f"Error: Model file not found at {model_path}")
        except Exception as e:
            logger.error(f"An error occurred while loading the model: {e}")

def load_scaler():
    """
    This function loads the fitted StandardScaler.
    """
    global scaler
    if scaler is None:
        try:
            logger.info("Loading scaler...")
            scaler_path = "scaler.joblib" # Define the path to your saved scaler
            scaler = joblib.load(scaler_path)
            logger.info("Scaler loaded successfully.")
        except FileNotFoundError:
            logger.error(f"Error: Scaler file not found at {scaler_path}")
        except Exception as e:
            logger.error(f"An error occurred while loading the scaler: {e}")

def load_training_artifacts():
    """
    Loads artifacts from training like column names and bin edges.
    """
    global training_columns, mrp_bins
    try:
        # Load training column names
        training_columns_path = "training_columns.joblib"
        training_columns = joblib.load(training_columns_path)
        logger.info("Training column names loaded successfully.")

        # Load MRP bin edges
        mrp_bins_path = "mrp_bins.joblib"
        mrp_bins = joblib.load(mrp_bins_path)
        logger.info("MRP bin edges loaded successfully.")

    except FileNotFoundError as e:
        logger.error(f"Error loading training artifacts: {e}")
    except Exception as e:
        logger.error(f"An error occurred while loading training artifacts: {e}")


# Load model, scaler, and training artifacts when the app starts
load_model()
load_scaler()
load_training_artifacts()


# Define a route for the home page (GET request)
@superkart_sales_predictor_api.route('/')
def home():
    """
    This function handles GET requests to the root URL ('/') of the API.
    It returns a simple welcome message and model loading status.
    """
    logger.info(f"Home page request")
    global model, scaler, training_columns, mrp_bins
    status_message = "Welcome to the SuperKart Sales Prediction API! "
    if model is None:
        status_message += "Model loading failed. "
    else:
        status_message += "Model loaded successfully. "
    if scaler is None:
        status_message += "Scaler loading failed. "
    else:
         status_message += "Scaler loaded successfully. "
    if training_columns is None or mrp_bins is None:
        status_message += "Training artifacts loading failed."
    else:
        status_message += "Training artifacts loaded successfully."


    return status_message


# Define an endpoint for single sales prediction (POST request)
@superkart_sales_predictor_api.route('/predict_sales', methods=['POST'])
def predict_sales():
    """
    This function handles POST requests to the '/predict_sales' endpoint.
    It expects a JSON payload containing product and store details and returns
    the predicted sales as a JSON response.
    """
    global model, scaler, training_columns, mrp_bins

    if model is None or scaler is None or training_columns is None or mrp_bins is None:
        return jsonify({'error': 'Required artifacts (model, scaler, training columns, MRP bins) not loaded. Cannot make predictions.'}), 500

    try:
        # Get the JSON data from the request body
        input_data = request.get_json()
        logger.info(f"Received input data: {input_data}")

        # Convert the input data to a pandas DataFrame
        # Ensure the column order matches the training data
        input_df = pd.DataFrame([input_data])

        # Preprocess the input data similar to how the training data was preprocessed
        # This includes feature engineering, one-hot encoding, and scaling

        # 1. Feature Engineering
        # Calculate Years_Since_Establishment relative to a fixed year (e.g., 2025)
        input_df['Years_Since_Establishment'] = 2025 - input_df['Store_Establishment_Year']
        input_df['Product_Broad_Category'] = input_df['Product_Id'].apply(lambda x: x[:2])

        # Create 'MRP_Category' using loaded fixed bins from training data
        mrp_labels = ['Low', 'Medium', 'High']
        # Use pd.cut on the input data with the loaded fixed bins
        input_df['MRP_Category'] = pd.cut(input_df['Product_MRP'], bins=mrp_bins, labels=mrp_labels, include_lowest=True)


        # 2. One-Hot Encoding
        # Identify categorical columns (excluding the target variable and Product_Id which is now captured by Product_Broad_Category)
        categorical_cols = ['Product_Sugar_Content', 'Product_Type', 'Store_Id', 'Store_Size', 'Store_Location_City_Type', 'Store_Type', 'Product_Broad_Category', 'MRP_Category']

        # Apply one-hot encoding
        # Use pd.get_dummies which will create columns only for categories present in the input
        input_encoded = pd.get_dummies(input_df, columns=categorical_cols, drop_first=True)

        # 3. Align columns with training data
        # Ensure the order and presence of columns are the same as the training data
        # Add missing columns with default value 0 (for one-hot encoded columns not present in input)
        preprocessed_input = pd.DataFrame(columns=training_columns)
        for col in training_columns:
            if col in input_encoded.columns:
                preprocessed_input[col] = input_encoded[col]
            else:
                preprocessed_input[col] = 0

        # Ensure the order of columns is the same as training data
        preprocessed_input = preprocessed_input[training_columns]


        # 4. Scaling Numerical Features
        numerical_cols_to_scale = ['Product_Weight', 'Product_Allocated_Area', 'Product_MRP', 'Years_Since_Establishment']
        # Apply the fitted scaler to the numerical columns
        preprocessed_input[numerical_cols_to_scale] = scaler.transform(preprocessed_input[numerical_cols_to_scale])


        # Make prediction using the loaded model
        # The model was trained on log-transformed sales, so the prediction will be log-transformed
        predicted_sales_log = model.predict(preprocessed_input)[0]

        # Inverse transform the prediction to get the actual sales value
        predicted_sales = np.expm1(predicted_sales_log) # Use np.expm1 to reverse np.log1p

        # Return the prediction as a JSON response
        return jsonify({'predicted_sales': predicted_sales})

    except Exception as e:
        logger.error(f"Error during prediction: {e}")
        return jsonify({'error': str(e)}), 400

# Define an endpoint for single sales prediction (POST request)
@superkart_sales_predictor_api.post('/version')
def home_version():
    """
    This function handles GET requests to the root URL ('/') of the API.
    It returns a simple welcome message and model loading status.
    """
    logger.info(f"Home page request")
    global model, scaler
    if model is None:
        load_model()
    if scaler is None:
        load_scaler() # Load scaler when the endpoint is called if not already loaded

    if model is None or scaler is None:
        return "Welcome to the SuperKart Sales Prediction API! Model loading failed version 1.0."
    else:
        return "Welcome to the SuperKart Sales Prediction API! Model loaded successfully version 1.0."


# To run the Flask app (for local testing)
if __name__ == '__main__':
    # In a production environment, you would typically use a production-ready WSGI server
    # such as Gunicorn or uWSGI.
    logger.info("About to start the SuperKart Sales Predictor API...")
    # Load the model and scaler when the app starts
    load_model()
    load_scaler()
    load_training_artifacts() # Load training artifacts as well

    superkart_sales_predictor_api.run(debug=True, host='0.0.0.0', port=7860)