SharleyK's picture
Upload folder using huggingface_hub
71c7b12 verified
# Import necessary libraries
import numpy as np
import joblib # For loading the serialized model
import pandas as pd # For data manipulation
from flask import Flask, request, jsonify # For creating the Flask API
# Initialize the Flask application
# Changed app name to reflect SuperKart Sales Prediction
superkart_sales_api = Flask("SuperKart Sales Prediction API")
# Load the trained machine learning model
# Updated model filename to match the saved SuperKart model
try:
model = joblib.load("best_sales_forecasting_model.joblib")
except FileNotFoundError:
print("Error: 'best_sales_forecasting_model.joblib' not found. Make sure the model file is in the same directory as app.py.")
# In a real deployment, you might want to handle this more gracefully
exit() # Exit if the model file is not found
# Define a route for the home page (GET request)
@superkart_sales_api.get('/')
def home():
"""
This function handles GET requests to the root URL ('/') of the API.
It returns a simple welcome message.
"""
return "Welcome to the SuperKart Sales Prediction API!"
# Define an endpoint for single product sales prediction (POST request)
# Changed endpoint name to reflect the task
@superkart_sales_api.post('/predict_sales')
def predict_sales():
"""
This function handles POST requests to the '/predict_sales' endpoint.
It expects a JSON payload containing product and store details
and returns the predicted sales total as a JSON response.
"""
try:
# Get the JSON data from the request body
product_store_data = request.get_json()
# Convert the extracted data into a Pandas DataFrame
input_df = pd.DataFrame([product_store_data])
# --- Feature Engineering: Calculate 'Store_Age' ---
current_year = 2025 # Use the same current year as during training
input_df['Store_Age'] = current_year - input_df['Store_Establishment_Year']
# -------------------------------------------------
# --- Feature Engineering: Create 'Product_Id_Prefix' ---
if 'Product_Id' in input_df:
input_df['Product_Id_Prefix'] = input_df['Product_Id'].apply(lambda x: x[:2])
else:
input_df['Product_Id_Prefix'] = 'NA'
# --- Feature Engineering: Create 'Product_Perishability' ---
perishable_types = ['Fruits and Vegetables', 'Dairy', 'Meat', 'Seafood', 'Breakfast']
non_perishable_types = ['Snack Foods', 'Frozen Foods', 'Household', 'Baking Goods', 'Canned',
'Health and Hygiene', 'Soft Drinks', 'Breads', 'Hard Drinks', 'Others',
'Starchy Foods']
input_df['Product_Perishability'] = input_df['Product_Type'].apply(lambda x: 'Perishable' if x in perishable_types else ('Non-Perishable' if x in non_perishable_types else 'Unknown'))
# -------------------------------------------------
# Drop identifier columns ('Product_Id' and 'Store_Id') after feature engineering
# These columns were dropped before training the preprocessor and model.
processed_input_df = input_df.drop(['Product_Id', 'Store_Id'], axis=1, errors='ignore')
# Make prediction using the loaded model pipeline
predicted_sales_total = model.predict(processed_input_df)[0]
# Convert predicted_sales_total to Python float
predicted_sales_total = round(float(predicted_sales_total), 2)
# Return the predicted sales total as a JSON response
return jsonify({'Predicted_Product_Store_Sales_Total': predicted_sales_total})
except Exception as e:
# Return an error message if something goes wrong
return jsonify({'error': str(e)})
# Define an endpoint for batch sales prediction (POST request with CSV)
# This endpoint is commented out as the original task focused on a single prediction endpoint.
# If batch prediction is needed, this section can be adapted.
# @superkart_sales_api.post('/predict_sales_batch')
# def predict_sales_batch():
# """
# This function handles POST requests to the '/predict_sales_batch' endpoint.
# It expects a CSV file containing product and store details for multiple entries
# and returns the predicted sales totals as a dictionary in the JSON response.
# """
# try:
# # Get the uploaded CSV file from the request
# file = request.files['file']
#
# # Read the CSV file into a Pandas DataFrame
# input_data = pd.read_csv(file)
#
# # Drop identifier columns if they are present in the input CSV
# input_data = input_data.drop(['Product_Id', 'Store_Id'], axis=1, errors='ignore')
#
# # Make predictions for all entries in the DataFrame
# predicted_sales_totals = model.predict(input_data).tolist()
#
# # If you need to associate predictions with original identifiers, you would need
# # to handle that before dropping 'Product_Id' and 'Store_Id'. For simplicity
# # here, we return a list of predictions.
# return jsonify({'predictions': predicted_sales_totals})
#
# except Exception as e:
# # Return an error message if something goes wrong
# return jsonify({'error': str(e)})
# Standard Flask run configuration
if __name__ == '__main__':
# Run the Flask app
# In a production environment, use a production-ready WSGI server like Gunicorn
# Using host='0.0.0.0' makes the server accessible externally (important for Hugging Face Spaces)
superkart_sales_api.run(debug=True, host='0.0.0.0', port=7860) # Listen on all interfaces and port 5000