Spaces:

SharleyK
/

SuperKartPredictionBackend

Sleeping

App Files Files Community

SuperKartPredictionBackend / app.py

SharleyK

Upload folder using huggingface_hub

71c7b12 verified 6 months ago

raw

history blame contribute delete

5.63 kB

	# Import necessary libraries
	import numpy as np
	import joblib # For loading the serialized model
	import pandas as pd # For data manipulation
	from flask import Flask, request, jsonify # For creating the Flask API

	# Initialize the Flask application
	# Changed app name to reflect SuperKart Sales Prediction
	superkart_sales_api = Flask("SuperKart Sales Prediction API")

	# Load the trained machine learning model
	# Updated model filename to match the saved SuperKart model
	try:
	model = joblib.load("best_sales_forecasting_model.joblib")
	except FileNotFoundError:
	print("Error: 'best_sales_forecasting_model.joblib' not found. Make sure the model file is in the same directory as app.py.")
	# In a real deployment, you might want to handle this more gracefully
	exit() # Exit if the model file is not found

	# Define a route for the home page (GET request)
	@superkart_sales_api.get('/')
	def home():
	"""
	This function handles GET requests to the root URL ('/') of the API.
	It returns a simple welcome message.
	"""
	return "Welcome to the SuperKart Sales Prediction API!"

	# Define an endpoint for single product sales prediction (POST request)
	# Changed endpoint name to reflect the task
	@superkart_sales_api.post('/predict_sales')
	def predict_sales():
	"""
	This function handles POST requests to the '/predict_sales' endpoint.
	It expects a JSON payload containing product and store details
	and returns the predicted sales total as a JSON response.
	"""
	try:
	# Get the JSON data from the request body
	product_store_data = request.get_json()

	# Convert the extracted data into a Pandas DataFrame
	input_df = pd.DataFrame([product_store_data])

	# --- Feature Engineering: Calculate 'Store_Age' ---
	current_year = 2025 # Use the same current year as during training
	input_df['Store_Age'] = current_year - input_df['Store_Establishment_Year']
	# -------------------------------------------------

	# --- Feature Engineering: Create 'Product_Id_Prefix' ---
	if 'Product_Id' in input_df:
	input_df['Product_Id_Prefix'] = input_df['Product_Id'].apply(lambda x: x[:2])
	else:
	input_df['Product_Id_Prefix'] = 'NA'

	# --- Feature Engineering: Create 'Product_Perishability' ---
	perishable_types = ['Fruits and Vegetables', 'Dairy', 'Meat', 'Seafood', 'Breakfast']
	non_perishable_types = ['Snack Foods', 'Frozen Foods', 'Household', 'Baking Goods', 'Canned',
	'Health and Hygiene', 'Soft Drinks', 'Breads', 'Hard Drinks', 'Others',
	'Starchy Foods']
	input_df['Product_Perishability'] = input_df['Product_Type'].apply(lambda x: 'Perishable' if x in perishable_types else ('Non-Perishable' if x in non_perishable_types else 'Unknown'))
	# -------------------------------------------------

	# Drop identifier columns ('Product_Id' and 'Store_Id') after feature engineering
	# These columns were dropped before training the preprocessor and model.
	processed_input_df = input_df.drop(['Product_Id', 'Store_Id'], axis=1, errors='ignore')

	# Make prediction using the loaded model pipeline
	predicted_sales_total = model.predict(processed_input_df)[0]

	# Convert predicted_sales_total to Python float
	predicted_sales_total = round(float(predicted_sales_total), 2)

	# Return the predicted sales total as a JSON response
	return jsonify({'Predicted_Product_Store_Sales_Total': predicted_sales_total})

	except Exception as e:
	# Return an error message if something goes wrong
	return jsonify({'error': str(e)})

	# Define an endpoint for batch sales prediction (POST request with CSV)
	# This endpoint is commented out as the original task focused on a single prediction endpoint.
	# If batch prediction is needed, this section can be adapted.
	# @superkart_sales_api.post('/predict_sales_batch')
	# def predict_sales_batch():
	# """
	# This function handles POST requests to the '/predict_sales_batch' endpoint.
	# It expects a CSV file containing product and store details for multiple entries
	# and returns the predicted sales totals as a dictionary in the JSON response.
	# """
	# try:
	# # Get the uploaded CSV file from the request
	# file = request.files['file']
	#
	# # Read the CSV file into a Pandas DataFrame
	# input_data = pd.read_csv(file)
	#
	# # Drop identifier columns if they are present in the input CSV
	# input_data = input_data.drop(['Product_Id', 'Store_Id'], axis=1, errors='ignore')
	#
	# # Make predictions for all entries in the DataFrame
	# predicted_sales_totals = model.predict(input_data).tolist()
	#
	# # If you need to associate predictions with original identifiers, you would need
	# # to handle that before dropping 'Product_Id' and 'Store_Id'. For simplicity
	# # here, we return a list of predictions.
	# return jsonify({'predictions': predicted_sales_totals})
	#
	# except Exception as e:
	# # Return an error message if something goes wrong
	# return jsonify({'error': str(e)})


	# Standard Flask run configuration
	if __name__ == '__main__':
	# Run the Flask app
	# In a production environment, use a production-ready WSGI server like Gunicorn
	# Using host='0.0.0.0' makes the server accessible externally (important for Hugging Face Spaces)
	superkart_sales_api.run(debug=True, host='0.0.0.0', port=7860) # Listen on all interfaces and port 5000