# Import necessary libraries import numpy as np import joblib # For loading the serialized model import pandas as pd # For data manipulation from flask import Flask, request, jsonify # For creating the Flask API # Initialize the Flask application # Changed app name to reflect SuperKart Sales Prediction superkart_sales_api = Flask("SuperKart Sales Prediction API") # Load the trained machine learning model # Updated model filename to match the saved SuperKart model try: model = joblib.load("best_sales_forecasting_model.joblib") except FileNotFoundError: print("Error: 'best_sales_forecasting_model.joblib' not found. Make sure the model file is in the same directory as app.py.") # In a real deployment, you might want to handle this more gracefully exit() # Exit if the model file is not found # Define a route for the home page (GET request) @superkart_sales_api.get('/') def home(): """ This function handles GET requests to the root URL ('/') of the API. It returns a simple welcome message. """ return "Welcome to the SuperKart Sales Prediction API!" # Define an endpoint for single product sales prediction (POST request) # Changed endpoint name to reflect the task @superkart_sales_api.post('/predict_sales') def predict_sales(): """ This function handles POST requests to the '/predict_sales' endpoint. It expects a JSON payload containing product and store details and returns the predicted sales total as a JSON response. """ try: # Get the JSON data from the request body product_store_data = request.get_json() # Convert the extracted data into a Pandas DataFrame input_df = pd.DataFrame([product_store_data]) # --- Feature Engineering: Calculate 'Store_Age' --- current_year = 2025 # Use the same current year as during training input_df['Store_Age'] = current_year - input_df['Store_Establishment_Year'] # ------------------------------------------------- # --- Feature Engineering: Create 'Product_Id_Prefix' --- if 'Product_Id' in input_df: input_df['Product_Id_Prefix'] = input_df['Product_Id'].apply(lambda x: x[:2]) else: input_df['Product_Id_Prefix'] = 'NA' # --- Feature Engineering: Create 'Product_Perishability' --- perishable_types = ['Fruits and Vegetables', 'Dairy', 'Meat', 'Seafood', 'Breakfast'] non_perishable_types = ['Snack Foods', 'Frozen Foods', 'Household', 'Baking Goods', 'Canned', 'Health and Hygiene', 'Soft Drinks', 'Breads', 'Hard Drinks', 'Others', 'Starchy Foods'] input_df['Product_Perishability'] = input_df['Product_Type'].apply(lambda x: 'Perishable' if x in perishable_types else ('Non-Perishable' if x in non_perishable_types else 'Unknown')) # ------------------------------------------------- # Drop identifier columns ('Product_Id' and 'Store_Id') after feature engineering # These columns were dropped before training the preprocessor and model. processed_input_df = input_df.drop(['Product_Id', 'Store_Id'], axis=1, errors='ignore') # Make prediction using the loaded model pipeline predicted_sales_total = model.predict(processed_input_df)[0] # Convert predicted_sales_total to Python float predicted_sales_total = round(float(predicted_sales_total), 2) # Return the predicted sales total as a JSON response return jsonify({'Predicted_Product_Store_Sales_Total': predicted_sales_total}) except Exception as e: # Return an error message if something goes wrong return jsonify({'error': str(e)}) # Define an endpoint for batch sales prediction (POST request with CSV) # This endpoint is commented out as the original task focused on a single prediction endpoint. # If batch prediction is needed, this section can be adapted. # @superkart_sales_api.post('/predict_sales_batch') # def predict_sales_batch(): # """ # This function handles POST requests to the '/predict_sales_batch' endpoint. # It expects a CSV file containing product and store details for multiple entries # and returns the predicted sales totals as a dictionary in the JSON response. # """ # try: # # Get the uploaded CSV file from the request # file = request.files['file'] # # # Read the CSV file into a Pandas DataFrame # input_data = pd.read_csv(file) # # # Drop identifier columns if they are present in the input CSV # input_data = input_data.drop(['Product_Id', 'Store_Id'], axis=1, errors='ignore') # # # Make predictions for all entries in the DataFrame # predicted_sales_totals = model.predict(input_data).tolist() # # # If you need to associate predictions with original identifiers, you would need # # to handle that before dropping 'Product_Id' and 'Store_Id'. For simplicity # # here, we return a list of predictions. # return jsonify({'predictions': predicted_sales_totals}) # # except Exception as e: # # Return an error message if something goes wrong # return jsonify({'error': str(e)}) # Standard Flask run configuration if __name__ == '__main__': # Run the Flask app # In a production environment, use a production-ready WSGI server like Gunicorn # Using host='0.0.0.0' makes the server accessible externally (important for Hugging Face Spaces) superkart_sales_api.run(debug=True, host='0.0.0.0', port=7860) # Listen on all interfaces and port 5000