Spaces:
Paused
Paused
| import flask | |
| from flask import Flask, request, jsonify | |
| import joblib | |
| import pandas as pd | |
| import numpy as np | |
| # Initialize the Flask application | |
| app = Flask(__name__) | |
| # Load the trained model pipeline | |
| # Ensure the path to your joblib file is correct | |
| try: | |
| model_pipeline = joblib.load('best_random_forest_pipeline.joblib') | |
| except FileNotFoundError: | |
| print("Error: Model file not found. Make sure 'best_random_forest_pipeline.joblib' is in the same directory.") | |
| exit() # Exit if the model file is not found | |
| def home(): | |
| return "Flask app is running. Use the /predict endpoint to get predictions." | |
| # Define an endpoint for a single prediction | |
| def predict(): | |
| if request.method == 'POST': | |
| try: | |
| # Get the data from the POST request | |
| # This endpoint expects a single JSON object representing one data point | |
| data = request.get_json() | |
| # Convert the incoming JSON data to a pandas DataFrame | |
| # Ensure the column names and order match the training data | |
| # It's crucial that the keys in the incoming JSON match the original feature names | |
| # expected by your preprocessor and model. | |
| # Example: Assuming the incoming JSON has keys matching the original column names | |
| # before one-hot encoding and dropping the target/log_sales. | |
| # You might need to add validation here to ensure all required keys are present. | |
| input_df = pd.DataFrame([data]) | |
| # Ensure categorical columns in input_df are of 'category' dtype | |
| # This is important because the preprocessor expects this dtype for categorical columns | |
| categorical_cols = ['Product_Sugar_Content', 'Product_Type', 'Store_Id', 'Store_Size', 'Store_Location_City_Type', 'Store_Type'] # List your actual categorical columns | |
| for col in categorical_cols: | |
| if col in input_df.columns: | |
| input_df[col] = input_df[col].astype('category') | |
| # If your original data had specific categories, you might need to set them here | |
| # input_df[col] = input_df[col].cat.set_categories(your_original_data[col].cat.categories) | |
| # Make a prediction using the loaded model pipeline | |
| # The pipeline handles preprocessing internally | |
| prediction = model_pipeline.predict(input_df) | |
| # Return the prediction as a JSON response | |
| # Since this is a single prediction, return the first (and only) element | |
| return jsonify({'prediction': prediction[0]}) | |
| except Exception as e: | |
| return jsonify({'error': str(e)}) | |
| if __name__ == '__main__': | |
| # To run this locally for testing: | |
| # 1. Save this code as app_flask.py | |
| # 2. Make sure your 'best_random_forest_pipeline.joblib' is in the same directory | |
| # 3. Run 'python app_flask.py' in your terminal | |
| # For Colab, you might need a tool like ngrok to expose the local server to the internet | |
| # Or you can adapt this to run directly within Colab if needed, but a separate file is standard for deployment. | |
| # Running directly in Colab: | |
| # from flask_ngrok2 import run_with_ngrok | |
| # run_with_ngrok(app) | |
| # app.run() | |
| # Standard way to run Flask app | |
| app.run(debug=True) | |