import flask from flask import Flask, request, jsonify import joblib import pandas as pd import numpy as np # Initialize the Flask application app = Flask(__name__) # Load the trained model pipeline # Ensure the path to your joblib file is correct try: model_pipeline = joblib.load('best_random_forest_pipeline.joblib') except FileNotFoundError: print("Error: Model file not found. Make sure 'best_random_forest_pipeline.joblib' is in the same directory.") exit() # Exit if the model file is not found @app.route('/') def home(): return "Flask app is running. Use the /predict endpoint to get predictions." # Define an endpoint for a single prediction @app.route('/predict', methods=['POST']) def predict(): if request.method == 'POST': try: # Get the data from the POST request # This endpoint expects a single JSON object representing one data point data = request.get_json() # Convert the incoming JSON data to a pandas DataFrame # Ensure the column names and order match the training data # It's crucial that the keys in the incoming JSON match the original feature names # expected by your preprocessor and model. # Example: Assuming the incoming JSON has keys matching the original column names # before one-hot encoding and dropping the target/log_sales. # You might need to add validation here to ensure all required keys are present. input_df = pd.DataFrame([data]) # Ensure categorical columns in input_df are of 'category' dtype # This is important because the preprocessor expects this dtype for categorical columns categorical_cols = ['Product_Sugar_Content', 'Product_Type', 'Store_Id', 'Store_Size', 'Store_Location_City_Type', 'Store_Type'] # List your actual categorical columns for col in categorical_cols: if col in input_df.columns: input_df[col] = input_df[col].astype('category') # If your original data had specific categories, you might need to set them here # input_df[col] = input_df[col].cat.set_categories(your_original_data[col].cat.categories) # Make a prediction using the loaded model pipeline # The pipeline handles preprocessing internally prediction = model_pipeline.predict(input_df) # Return the prediction as a JSON response # Since this is a single prediction, return the first (and only) element return jsonify({'prediction': prediction[0]}) except Exception as e: return jsonify({'error': str(e)}) if __name__ == '__main__': # To run this locally for testing: # 1. Save this code as app_flask.py # 2. Make sure your 'best_random_forest_pipeline.joblib' is in the same directory # 3. Run 'python app_flask.py' in your terminal # For Colab, you might need a tool like ngrok to expose the local server to the internet # Or you can adapt this to run directly within Colab if needed, but a separate file is standard for deployment. # Running directly in Colab: # from flask_ngrok2 import run_with_ngrok # run_with_ngrok(app) # app.run() # Standard way to run Flask app app.run(debug=True)