space / app_flask.py
grkavi0912's picture
Upload app_flask.py with huggingface_hub
2f4c5e7 verified
import flask
from flask import Flask, request, jsonify
import joblib
import pandas as pd
import numpy as np
# Initialize the Flask application
app = Flask(__name__)
# Load the trained model pipeline
# Ensure the path to your joblib file is correct
try:
model_pipeline = joblib.load('best_random_forest_pipeline.joblib')
except FileNotFoundError:
print("Error: Model file not found. Make sure 'best_random_forest_pipeline.joblib' is in the same directory.")
exit() # Exit if the model file is not found
@app.route('/')
def home():
return "Flask app is running. Use the /predict endpoint to get predictions."
# Define an endpoint for a single prediction
@app.route('/predict', methods=['POST'])
def predict():
if request.method == 'POST':
try:
# Get the data from the POST request
# This endpoint expects a single JSON object representing one data point
data = request.get_json()
# Convert the incoming JSON data to a pandas DataFrame
# Ensure the column names and order match the training data
# It's crucial that the keys in the incoming JSON match the original feature names
# expected by your preprocessor and model.
# Example: Assuming the incoming JSON has keys matching the original column names
# before one-hot encoding and dropping the target/log_sales.
# You might need to add validation here to ensure all required keys are present.
input_df = pd.DataFrame([data])
# Ensure categorical columns in input_df are of 'category' dtype
# This is important because the preprocessor expects this dtype for categorical columns
categorical_cols = ['Product_Sugar_Content', 'Product_Type', 'Store_Id', 'Store_Size', 'Store_Location_City_Type', 'Store_Type'] # List your actual categorical columns
for col in categorical_cols:
if col in input_df.columns:
input_df[col] = input_df[col].astype('category')
# If your original data had specific categories, you might need to set them here
# input_df[col] = input_df[col].cat.set_categories(your_original_data[col].cat.categories)
# Make a prediction using the loaded model pipeline
# The pipeline handles preprocessing internally
prediction = model_pipeline.predict(input_df)
# Return the prediction as a JSON response
# Since this is a single prediction, return the first (and only) element
return jsonify({'prediction': prediction[0]})
except Exception as e:
return jsonify({'error': str(e)})
if __name__ == '__main__':
# To run this locally for testing:
# 1. Save this code as app_flask.py
# 2. Make sure your 'best_random_forest_pipeline.joblib' is in the same directory
# 3. Run 'python app_flask.py' in your terminal
# For Colab, you might need a tool like ngrok to expose the local server to the internet
# Or you can adapt this to run directly within Colab if needed, but a separate file is standard for deployment.
# Running directly in Colab:
# from flask_ngrok2 import run_with_ngrok
# run_with_ngrok(app)
# app.run()
# Standard way to run Flask app
app.run(debug=True)