File size: 3,368 Bytes
2f4c5e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import flask
from flask import Flask, request, jsonify
import joblib
import pandas as pd
import numpy as np

# Initialize the Flask application
app = Flask(__name__)

# Load the trained model pipeline
# Ensure the path to your joblib file is correct
try:
    model_pipeline = joblib.load('best_random_forest_pipeline.joblib')
except FileNotFoundError:
    print("Error: Model file not found. Make sure 'best_random_forest_pipeline.joblib' is in the same directory.")
    exit() # Exit if the model file is not found

@app.route('/')
def home():
    return "Flask app is running. Use the /predict endpoint to get predictions."

# Define an endpoint for a single prediction
@app.route('/predict', methods=['POST'])
def predict():
    if request.method == 'POST':
        try:
            # Get the data from the POST request
            # This endpoint expects a single JSON object representing one data point
            data = request.get_json()

            # Convert the incoming JSON data to a pandas DataFrame
            # Ensure the column names and order match the training data
            # It's crucial that the keys in the incoming JSON match the original feature names
            # expected by your preprocessor and model.

            # Example: Assuming the incoming JSON has keys matching the original column names
            # before one-hot encoding and dropping the target/log_sales.
            # You might need to add validation here to ensure all required keys are present.

            input_df = pd.DataFrame([data])

            # Ensure categorical columns in input_df are of 'category' dtype
            # This is important because the preprocessor expects this dtype for categorical columns
            categorical_cols = ['Product_Sugar_Content', 'Product_Type', 'Store_Id', 'Store_Size', 'Store_Location_City_Type', 'Store_Type'] # List your actual categorical columns
            for col in categorical_cols:
                 if col in input_df.columns:
                     input_df[col] = input_df[col].astype('category')
                     # If your original data had specific categories, you might need to set them here
                     # input_df[col] = input_df[col].cat.set_categories(your_original_data[col].cat.categories)


            # Make a prediction using the loaded model pipeline
            # The pipeline handles preprocessing internally
            prediction = model_pipeline.predict(input_df)

            # Return the prediction as a JSON response
            # Since this is a single prediction, return the first (and only) element
            return jsonify({'prediction': prediction[0]})

        except Exception as e:
            return jsonify({'error': str(e)})

if __name__ == '__main__':
    # To run this locally for testing:
    # 1. Save this code as app_flask.py
    # 2. Make sure your 'best_random_forest_pipeline.joblib' is in the same directory
    # 3. Run 'python app_flask.py' in your terminal

    # For Colab, you might need a tool like ngrok to expose the local server to the internet
    # Or you can adapt this to run directly within Colab if needed, but a separate file is standard for deployment.
    # Running directly in Colab:
    # from flask_ngrok2 import run_with_ngrok
    # run_with_ngrok(app)
    # app.run()

    # Standard way to run Flask app
    app.run(debug=True)