File size: 7,237 Bytes
28a53ce
 
 
 
 
 
 
 
 
 
 
 
90cb5b3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28a53ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90cb5b3
 
28a53ce
 
90cb5b3
28a53ce
 
90cb5b3
28a53ce
90cb5b3
 
 
 
28a53ce
90cb5b3
 
 
 
 
 
 
 
 
28a53ce
90cb5b3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28a53ce
ab25dac
28a53ce
 
 
 
 
 
 
 
 
 
 
 
 
 
a7574fb
28a53ce
51d3daf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28a53ce
51d3daf
a7574fb
28a53ce
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
# Import necessary libraries
import numpy as np
import joblib  # For loading the serialized model
import pandas as pd  # For data manipulation
from flask import Flask, request, jsonify  # For creating the Flask API

# Initialize the Flask application
product_sales_predictor_api = Flask("SuperKart Product Sales Predictor")

# Load the trained machine learning model
model = joblib.load("product_sales_prediction_model_v1_0.joblib")

store_id_categories = ['OUT001', 'OUT002', 'OUT003', 'OUT004']
store_size_categories = ['High', 'Medium', 'Small']
city_type_categories = ['Tier 1', 'Tier 2', 'Tier 3']
store_type_categories = ['Departmental Store', 'Food Mart', 'Supermarket Type1', 'Supermarket Type2']
product_group_categories = ['Non-Food/Household', 'Packaged/Processed Foods', 'Perishable Foods']
sugar_content_categories = ['Low Sugar', 'No Sugar', 'Regular']

model_columns = [
    'Product_Weight', 'Product_Allocated_Area', 'Product_MRP', 'Store_Age',
    'Product_Sugar_Content_Low Sugar', 'Product_Sugar_Content_No Sugar', 'Product_Sugar_Content_Regular',
    'Store_Id_OUT001', 'Store_Id_OUT002', 'Store_Id_OUT003', 'Store_Id_OUT004',
    'Store_Size_High', 'Store_Size_Medium', 'Store_Size_Small',
    'Store_Location_City_Type_Tier 1', 'Store_Location_City_Type_Tier 2', 'Store_Location_City_Type_Tier 3',
    'Store_Type_Departmental Store', 'Store_Type_Food Mart', 'Store_Type_Supermarket Type1', 'Store_Type_Supermarket Type2',
    'Product_Group_Non-Food/Household', 'Product_Group_Packaged/Processed Foods', 'Product_Group_Perishable Foods'
]

# Define a route for the home page (GET request)
@product_sales_predictor_api.get('/')
def home():
    """
    This function handles GET requests to the root URL ('/') of the API.
    It returns a simple welcome message.
    """
    return "Welcome to the SuperKart Product Sales Prediction API!"

# Define an endpoint for single property prediction (POST request)
@product_sales_predictor_api.post('/v1/sales')
def predict_product_sales():
    """
    This function handles POST requests to the '/v1/sales' endpoint.
    It expects a JSON payload containing product details and returns
    the predicted sales as a JSON response.
    """
    # Get the JSON data from the request body
    product_data = request.get_json()

    numeric_features = {
        'Product_Weight': product_data['Product_Weight'],
        'Product_Allocated_Area': product_data['Product_Allocated_Area'],
        'Product_MRP': product_data['Product_MRP'],
        'Store_Age': product_data['Store_Age']
    }

    one_hot_features = {col: 0 for col in model_columns if col not in numeric_features.keys()}

    # Fill one-hot for Product_Sugar_Content
    sugar_key = f"Product_Sugar_Content_{product_data['Product_Sugar_Content']}"
    if sugar_key in one_hot_features:
        one_hot_features[sugar_key] = 1

    # Fill one-hot for Store_Id
    store_id_key = f"Store_Id_{product_data['Store_Id']}"
    if store_id_key in one_hot_features:
        one_hot_features[store_id_key] = 1

    # Fill one-hot for Store_Size
    store_size_key = f"Store_Size_{product_data['Store_Size']}"
    if store_size_key in one_hot_features:
        one_hot_features[store_size_key] = 1

    # Fill one-hot for Store_Location_City_Type
    city_type_key = f"Store_Location_City_Type_{product_data['Store_Location_City_Type']}"
    if city_type_key in one_hot_features:
        one_hot_features[city_type_key] = 1

    # Fill one-hot for Store_Type
    store_type_key = f"Store_Type_{product_data['Store_Type']}"
    if store_type_key in one_hot_features:
        one_hot_features[store_type_key] = 1

    # Fill one-hot for Product_Group
    product_group_key = f"Product_Group_{product_data['Product_Group']}"
    if product_group_key in one_hot_features:
        one_hot_features[product_group_key] = 1

    # Combine all features into single dict
    final_features = {**numeric_features, **one_hot_features}

    # Create DataFrame with model columns order
    input_df = pd.DataFrame([final_features], columns=model_columns)

    # Predict with the model
    predicted_sale = model.predict(input_df).tolist()[0]
    predicted_sale = round(float(predicted_sale), 2)
    
    # Return the actual price
    return jsonify({'Predicted Sales': predicted_sale})


# Define an endpoint for batch prediction (POST request)
@product_sales_predictor_api.post('/v1/salesbatch')
def predict_product_sales_batch():
    """
    This function handles POST requests to the '/v1/salesbatch' endpoint.
    It expects a CSV file containing product details for multiple sales
    and returns the predicted sales as a dictionary in the JSON response.
    """
    # Get the uploaded CSV file from the request
    file = request.files['file']

    # Read the CSV file into a Pandas DataFrame
    df = pd.read_csv(file)

    #Creating df with numeric features. 
    numeric_features = df[['Product_Weight', 'Product_Allocated_Area', 'Product_MRP', 'Store_Age']]

    # One-hot encode categorical columns consistently for all rows
    # Create empty DataFrame for one-hot with all model columns except numeric ones
    one_hot_df = pd.DataFrame(0, index=df.index, columns=[col for col in model_columns if col not in numeric_features.columns])

    # Fill one hot columns for each categorical feature
    for category in ['Low Sugar', 'No Sugar', 'Regular']:
        col_name = f'Product_Sugar_Content_{category}'
        one_hot_df.loc[df['Product_Sugar_Content'] == category, col_name] = 1

    for store_id in ['OUT001', 'OUT002', 'OUT003', 'OUT004']:
        col_name = f'Store_Id_{store_id}'
        one_hot_df.loc[df['Store_Id'] == store_id, col_name] = 1

    for store_size in ['High', 'Medium', 'Small']:
        col_name = f'Store_Size_{store_size}'
        one_hot_df.loc[df['Store_Size'] == store_size, col_name] = 1

    for city_type in ['Tier 1', 'Tier 2', 'Tier 3']:
        col_name = f'Store_Location_City_Type_{city_type}'
        one_hot_df.loc[df['Store_Location_City_Type'] == city_type, col_name] = 1

    for store_type in ['Departmental Store', 'Food Mart', 'Supermarket Type1', 'Supermarket Type2']:
        col_name = f'Store_Type_{store_type}'
        one_hot_df.loc[df['Store_Type'] == store_type, col_name] = 1

    for product_group in ['Non-Food/Household', 'Packaged/Processed Foods', 'Perishable Foods']:
        col_name = f'Product_Group_{product_group}'
        one_hot_df.loc[df['Product_Group'] == product_group, col_name] = 1

    # Concatenate numeric features and one-hot encoded features in model column order
    input_df = pd.concat([numeric_features, one_hot_df], axis=1)
    input_df = input_df.reindex(columns=model_columns, fill_value=0)

    # Make predictions for all properties in the DataFrame (get log_prices)
    predicted_sales = model.predict(input_df).tolist()
    prod_id_list = df['Product_Id'].tolist()

    # Create a dictionary of predictions with property IDs as keys
    output_dict = dict(zip(prod_id_list, predicted_sales))
    
    # Return the predictions dictionary as a JSON response
    return output_dict

# Run the Flask application in debug mode if this script is executed directly
if __name__ == '__main__':
    product_sales_predictor_api.run(debug=True)