File size: 6,965 Bytes
3c58142
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# -------------------------------------------------------
# Flask Web Framework for Product Store Sales Prediction
# -------------------------------------------------------

# Import necessary libraries
import os
import numpy as np
import pandas as pd
import joblib
from flask import Flask, request, jsonify

# Initialize the Flask application
product_sales_api = Flask("SuperKart Product Sales Predictor")

# Define the path to the model file - it will be at the root of the Space
model_path_in_space = "random_forest_pipeline.joblib"

# Load the trained RandomForest model pipeline
try:
    model = joblib.load(model_path_in_space)
    print(f"Model loaded successfully from {model_path_in_space}")
except Exception as e:
    print(f"Error loading model: {e}")
    model = None # Set model to None to indicate loading failure


# -------------------------------------------------------
# Define a route for the home page (GET request)
# -------------------------------------------------------
@product_sales_api.route('/')
def home():
    """
    This function handles GET requests to the root URL ('/') of the API.
    It returns a simple welcome message.
    """
    if model is None:
        return "Error: Model could not be loaded. Please check the logs.", 500
    return "Welcome to the SuperKart Product Store Sales Prediction API!"

# -------------------------------------------------------
# Define an endpoint for single product prediction (POST request)
# -------------------------------------------------------
@product_sales_api.route('/v1/sales', methods=['POST'])
def predict_sales():
    """
    This function handles POST requests to the '/v1/sales' endpoint.
    It expects a JSON payload containing product features and returns
    the predicted Product_Store_Sales_Total as a JSON response.
    """
    if model is None:
        return jsonify({'error': 'Model not loaded'}), 500

    try:
        # Get the JSON data from the request body
        product_data = request.get_json()

        # Convert the JSON data into a Pandas DataFrame
        # Ensure the column names match the features used during training
        # and are in the correct order if your model/pipeline is sensitive to it.
        # Based on your preprocessing and model, the expected input features
        # after one-hot encoding are needed. You might need to map the input
        # JSON keys to the expected columns in your preprocessor/model.

        # A more robust approach here would be to reconstruct the expected
        # DataFrame structure based on the features your model was trained on.
        # For simplicity and demonstration, let's assume the input JSON
        # has keys corresponding to the original features BEFORE preprocessing
        # and the preprocessor handles the transformation.
        # These were: 'Product_Weight', 'Product_Allocated_Area', 'Product_MRP',
        # 'Store_Establishment_Year', 'Product_Sugar_Content', 'Product_Type',
        # 'Store_Id', 'Store_Size', 'Store_Location_City_Type', 'Store_Type'

        # It's crucial that the keys in the incoming JSON match these original column names.

        input_sample = {}
        # Populate input_sample from product_data, handle missing keys if necessary
        # For demonstration, assuming all keys are present:
        original_feature_cols = [
            'Product_Weight', 'Product_Allocated_Area', 'Product_MRP',
            'Store_Establishment_Year', 'Product_Sugar_Content', 'Product_Type',
            'Store_Id', 'Store_Size', 'Store_Location_City_Type', 'Store_Type'
        ]
        for col in original_feature_cols:
             # Use .get() to safely access keys, provide a default or handle missing later
             input_sample[col] = product_data.get(col)


        input_df = pd.DataFrame([input_sample])

        # Ensure categorical columns have the correct dtype
        categorical_cols = ['Product_Sugar_Content', 'Product_Type', 'Store_Id', 'Store_Size', 'Store_Location_City_Type', 'Store_Type']
        for col in categorical_cols:
            if col in input_df.columns:
                input_df[col] = input_df[col].astype('category')


        # Make prediction using the trained model pipeline
        # The pipeline handles preprocessing
        prediction = model.predict(input_df)[0]

        # Return the predicted sales total as JSON
        return jsonify({'Predicted_Product_Store_Sales_Total': float(prediction)})

    except Exception as e:
        # Log the error for debugging
        print(f"Error during single prediction: {e}")
        return jsonify({'error': str(e)}), 500

# -------------------------------------------------------
# Define an endpoint for batch predictions (CSV upload)
# -------------------------------------------------------
@product_sales_api.route('/v1/salesbatch', methods=['POST'])
def predict_sales_batch():
    """
    This function handles POST requests to the '/v1/salesbatch' endpoint.
    It expects a CSV file upload and returns predictions for multiple records.
    """
    if model is None:
        return jsonify({'error': 'Model not loaded'}), 500

    try:
        # Get the uploaded CSV file
        if 'file' not in request.files:
            return jsonify({'error': 'No file part in the request'}), 400

        file = request.files['file']

        # If the user does not select a file, the browser submits an
        # empty file without a filename.
        if file.filename == '':
            return jsonify({'error': 'No selected file'}), 400

        if file:
            # Read the CSV file into a DataFrame
            # Assume the CSV columns match the original training features
            data = pd.read_csv(file)

            # Ensure categorical columns have the correct dtype after reading from CSV
            categorical_cols = ['Product_Sugar_Content', 'Product_Type', 'Store_Id', 'Store_Size', 'Store_Location_City_Type', 'Store_Type']
            for col in categorical_cols:
                if col in data.columns:
                    data[col] = data[col].astype('category')


            # Make batch predictions using the trained model pipeline
            predictions = model.predict(data)
            data['Predicted_Product_Store_Sales_Total'] = predictions

            # Return the results as JSON
            return data.to_json(orient='records')

    except Exception as e:
        # Log the error for debugging
        print(f"Error during batch prediction: {e}")
        return jsonify({'error': str(e)}), 500

# -------------------------------------------------------
# Run the Flask API (typically not run in deployment, Gunicorn handles this)
# -------------------------------------------------------
# This part is mainly for local testing. In a Docker deployment with Gunicorn,
# Gunicorn will call the 'product_sales_api' application directly.
# if __name__ == '__main__':
#     product_sales_api.run(host='0.0.0.0', port=5000, debug=True)