Spaces:

grkavi0912
/

space

Paused

App Files Files Community

space / app_flask.py

grkavi0912

Upload app_flask.py with huggingface_hub

2f4c5e7 verified 4 months ago

raw

history blame contribute delete

3.37 kB

	import flask
	from flask import Flask, request, jsonify
	import joblib
	import pandas as pd
	import numpy as np

	# Initialize the Flask application
	app = Flask(__name__)

	# Load the trained model pipeline
	# Ensure the path to your joblib file is correct
	try:
	model_pipeline = joblib.load('best_random_forest_pipeline.joblib')
	except FileNotFoundError:
	print("Error: Model file not found. Make sure 'best_random_forest_pipeline.joblib' is in the same directory.")
	exit() # Exit if the model file is not found

	@app.route('/')
	def home():
	return "Flask app is running. Use the /predict endpoint to get predictions."

	# Define an endpoint for a single prediction
	@app.route('/predict', methods=['POST'])
	def predict():
	if request.method == 'POST':
	try:
	# Get the data from the POST request
	# This endpoint expects a single JSON object representing one data point
	data = request.get_json()

	# Convert the incoming JSON data to a pandas DataFrame
	# Ensure the column names and order match the training data
	# It's crucial that the keys in the incoming JSON match the original feature names
	# expected by your preprocessor and model.

	# Example: Assuming the incoming JSON has keys matching the original column names
	# before one-hot encoding and dropping the target/log_sales.
	# You might need to add validation here to ensure all required keys are present.

	input_df = pd.DataFrame([data])

	# Ensure categorical columns in input_df are of 'category' dtype
	# This is important because the preprocessor expects this dtype for categorical columns
	categorical_cols = ['Product_Sugar_Content', 'Product_Type', 'Store_Id', 'Store_Size', 'Store_Location_City_Type', 'Store_Type'] # List your actual categorical columns
	for col in categorical_cols:
	if col in input_df.columns:
	input_df[col] = input_df[col].astype('category')
	# If your original data had specific categories, you might need to set them here
	# input_df[col] = input_df[col].cat.set_categories(your_original_data[col].cat.categories)


	# Make a prediction using the loaded model pipeline
	# The pipeline handles preprocessing internally
	prediction = model_pipeline.predict(input_df)

	# Return the prediction as a JSON response
	# Since this is a single prediction, return the first (and only) element
	return jsonify({'prediction': prediction[0]})

	except Exception as e:
	return jsonify({'error': str(e)})

	if __name__ == '__main__':
	# To run this locally for testing:
	# 1. Save this code as app_flask.py
	# 2. Make sure your 'best_random_forest_pipeline.joblib' is in the same directory
	# 3. Run 'python app_flask.py' in your terminal

	# For Colab, you might need a tool like ngrok to expose the local server to the internet
	# Or you can adapt this to run directly within Colab if needed, but a separate file is standard for deployment.
	# Running directly in Colab:
	# from flask_ngrok2 import run_with_ngrok
	# run_with_ngrok(app)
	# app.run()

	# Standard way to run Flask app
	app.run(debug=True)