File size: 3,465 Bytes
49bd160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import flask
from flask import Flask, request, jsonify
import joblib
import pandas as pd
import numpy as np
import sys # Import sys for stdout redirection

print('Starting Superkart Sales Predictor Flask app...', file=sys.stdout)

# Instantiate Flask app
app = Flask("Superkart Sales Predictor")
print('Flask app instantiated.', file=sys.stdout)

# Load the pre-trained model
print('Attempting to load model...', file=sys.stdout)
loaded_model = joblib.load('best_rf_model.joblib')
print('Model loaded successfully!', file=sys.stdout)

# For demonstration, let's manually define the expected features based on the notebook's X_train structure
# In a real app, you'd load this from a saved file to ensure consistency
expected_features_list = ['Product_Weight', 'Product_Sugar_Content', 'Product_Allocated_Area',
                        'Product_Type', 'Product_MRP', 'Store_Size',
                        'Store_Location_City_Type', 'Store_Type', 'yr_since_store_estab']

# Define the prediction endpoint
@app.route('/predict', methods=['POST'])
def predict():
    print('Prediction request received.', file=sys.stdout)
    try:
        # Get data from POST request
        print('Attempting to get JSON data from request...', file=sys.stdout)
        data = request.get_json(force=True)
        print(f'Received data: {data}', file=sys.stdout)

        # Convert incoming data to DataFrame matching training features structure
        if isinstance(data, dict):
            input_df = pd.DataFrame([data])
        elif isinstance(data, list):
            input_df = pd.DataFrame(data)
        else:
            print('Invalid input data format.', file=sys.stdout)
            return jsonify({'error': 'Invalid input data format, expected dict or list of dicts'}), 400

        # Reindex the DataFrame to ensure all expected columns are present, filling missing with NaN
        # The order of columns is crucial for the preprocessor.
        print('Reindexing input DataFrame and converting dtypes...', file=sys.stdout)
        input_df = input_df.reindex(columns=expected_features_list, fill_value=np.nan)

        # Ensure categorical columns have 'category' dtype as expected by the preprocessor
        # Identify categorical columns from the original X_train
        categorical_cols_expected = ['Product_Sugar_Content', 'Product_Type', 'Store_Size',
                                     'Store_Location_City_Type', 'Store_Type']

        for col in categorical_cols_expected:
            if col in input_df.columns:
                input_df[col] = input_df[col].astype('category')
            else:
                # Handle cases where a categorical column might be missing from input_df
                pass
        print('Input DataFrame prepared for prediction.', file=sys.stdout)

        # Make prediction using the loaded model pipeline
        print('Making prediction...', file=sys.stdout)
        predictions = loaded_model.predict(input_df)
        print('Prediction successful.', file=sys.stdout)

        # Convert predictions to a list or array for JSON response
        return jsonify(predictions.tolist())

    except Exception as e:
        print(f'Error during prediction: {str(e)}', file=sys.stderr) # Log errors to stderr
        return jsonify({'error': str(e)}), 500

# This part is for local testing and should be commented out or protected for deployment
# if __name__ == '__main__':
#     app.run(debug=True, host='0.0.0.0', port=5000)