File size: 4,726 Bytes
1e9e683
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118

import sys
import joblib
import pandas as pd
import numpy as np
from flask import Flask, request, jsonify

from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor # Included for compatibility if you switch models

class FeatureEngineer(BaseEstimator, TransformerMixin):
    def __init__(self):
        self.le_prod = LabelEncoder()
        self.le_store = LabelEncoder()

    def fit(self, X, y=None):
        X_copy = X.copy()
        X_copy['Product_Id_Cd'] = X_copy['Product_Id'].apply(lambda x: x[:2])
        X_copy['Product_Sugar_Content_Corr'] = X_copy['Product_Sugar_Content'].str.replace('reg', 'Regular', regex=True)
        X_copy['Operation_Years'] = 2025 - X_copy['Store_Establishment_Year']
        
        self.le_prod.fit(X_copy['Product_Id_Cd'])
        le_feat=['Product_Sugar_Content_Corr','Store_Size','Store_Location_City_Type','Store_Type','Product_Id_Cd']
        for i in le_feat:
            self.le_prod.fit(X_copy[i])
        
        self.le_store.fit(X_copy['Store_Id'])
        return self

    def transform(self, X):
        X_copy = X.copy()
        X_copy['Product_Id_Cd'] = X_copy['Product_Id'].apply(lambda x: x[:2])
        X_copy['Product_Sugar_Content_Corr'] = X_copy['Product_Sugar_Content'].str.replace('reg', 'Regular', regex=True)
        X_copy['Operation_Years'] = 2013 - X_copy['Store_Establishment_Year']
        
        try:
            le_feat=['Product_Sugar_Content_Corr','Store_Size','Store_Location_City_Type','Store_Type','Product_Id_Cd']
            for i in le_feat:
                X_copy[i] = self.le_prod.transform(X_copy[i])
        except ValueError:
            X_copy['Product_Id_Cd'] = -1
        
        try:
            X_copy['Store'] = self.le_store.transform(X_copy['Store_Id'])
        except ValueError:
            X_copy['Store'] = -1
        
        rem_feat=['Product_Id','Store_Id','Product_Sugar_Content','Product_Type', 'Store_Establishment_Year']
        X_copy.drop(rem_feat, axis=1, inplace=True)
        
        return X_copy

# This allows joblib's pickle to find the class reference it saved during training.
sys.modules['__main__'].FeatureEngineer = FeatureEngineer

# Initialize Flask app with a name
app = Flask("SuperKart Sales Predictor")

# Load the trained churn prediction model
model = joblib.load("XGBoostRegressor_BEST_Pipeline.joblib")

# Define a route for the home page
@app.get('/')
def home():
    return "Welcome to the SuperKart Sales Prediction API"

# Define an endpoint to predict churn for a single customer
@app.post('/v1/product')
def predict_sales():
    # Get JSON data from the request
    customer_data = request.get_json()

    # Use .get() with a default value to avoid a KeyError
    required_keys = ['Product_Id', 'Product_Weight', 'Product_Sugar_Content', 'Product_Allocated_Area', 
                     'Product_Type', 'Product_MRP', 'Store_Id', 'Store_Establishment_Year', 
                     'Store_Size', 'Store_Location_City_Type', 'Store_Type']

    sample = {}
    for key in required_keys:
        sample[key] = customer_data.get(key)
        
        if sample[key] is None:
            return jsonify({'error': f'Missing key: {key}'}), 400
    
    # Extract relevant customer features from the input data
    sample = {
        'Product_Id': customer_data['Product_Id'],
        'Product_Weight': customer_data['Product_Weight'],
        'Product_Sugar_Content': customer_data['Product_Sugar_Content'],
        'Product_Allocated_Area': customer_data['Product_Allocated_Area'],
        'Product_Type': customer_data['Product_Type'],
        'Product_MRP': customer_data['Product_MRP'],
        'Store_Id': customer_data['Store_Id'],
        'Store_Establishment_Year': customer_data['Store_Establishment_Year'],
        'Store_Size': customer_data['Store_Size'],
        'Store_Location_City_Type': customer_data['Store_Location_City_Type'],
        'Store_Type': customer_data['Store_Type']
    }

    # Convert the extracted data into a DataFrame
    input_data = pd.DataFrame([sample])

    # Make a Sales prediction using the trained model
    prediction = model.predict(input_data).tolist()[0]
    
    # Return the prediction as a JSON response
    return jsonify({'Prediction': prediction})


# Run the Flask app in debug mode
if __name__ == '__main__':
    app.run(debug=True)