Spaces:
Sleeping
Sleeping
File size: 6,965 Bytes
3c58142 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 | # -------------------------------------------------------
# Flask Web Framework for Product Store Sales Prediction
# -------------------------------------------------------
# Import necessary libraries
import os
import numpy as np
import pandas as pd
import joblib
from flask import Flask, request, jsonify
# Initialize the Flask application
product_sales_api = Flask("SuperKart Product Sales Predictor")
# Define the path to the model file - it will be at the root of the Space
model_path_in_space = "random_forest_pipeline.joblib"
# Load the trained RandomForest model pipeline
try:
model = joblib.load(model_path_in_space)
print(f"Model loaded successfully from {model_path_in_space}")
except Exception as e:
print(f"Error loading model: {e}")
model = None # Set model to None to indicate loading failure
# -------------------------------------------------------
# Define a route for the home page (GET request)
# -------------------------------------------------------
@product_sales_api.route('/')
def home():
"""
This function handles GET requests to the root URL ('/') of the API.
It returns a simple welcome message.
"""
if model is None:
return "Error: Model could not be loaded. Please check the logs.", 500
return "Welcome to the SuperKart Product Store Sales Prediction API!"
# -------------------------------------------------------
# Define an endpoint for single product prediction (POST request)
# -------------------------------------------------------
@product_sales_api.route('/v1/sales', methods=['POST'])
def predict_sales():
"""
This function handles POST requests to the '/v1/sales' endpoint.
It expects a JSON payload containing product features and returns
the predicted Product_Store_Sales_Total as a JSON response.
"""
if model is None:
return jsonify({'error': 'Model not loaded'}), 500
try:
# Get the JSON data from the request body
product_data = request.get_json()
# Convert the JSON data into a Pandas DataFrame
# Ensure the column names match the features used during training
# and are in the correct order if your model/pipeline is sensitive to it.
# Based on your preprocessing and model, the expected input features
# after one-hot encoding are needed. You might need to map the input
# JSON keys to the expected columns in your preprocessor/model.
# A more robust approach here would be to reconstruct the expected
# DataFrame structure based on the features your model was trained on.
# For simplicity and demonstration, let's assume the input JSON
# has keys corresponding to the original features BEFORE preprocessing
# and the preprocessor handles the transformation.
# These were: 'Product_Weight', 'Product_Allocated_Area', 'Product_MRP',
# 'Store_Establishment_Year', 'Product_Sugar_Content', 'Product_Type',
# 'Store_Id', 'Store_Size', 'Store_Location_City_Type', 'Store_Type'
# It's crucial that the keys in the incoming JSON match these original column names.
input_sample = {}
# Populate input_sample from product_data, handle missing keys if necessary
# For demonstration, assuming all keys are present:
original_feature_cols = [
'Product_Weight', 'Product_Allocated_Area', 'Product_MRP',
'Store_Establishment_Year', 'Product_Sugar_Content', 'Product_Type',
'Store_Id', 'Store_Size', 'Store_Location_City_Type', 'Store_Type'
]
for col in original_feature_cols:
# Use .get() to safely access keys, provide a default or handle missing later
input_sample[col] = product_data.get(col)
input_df = pd.DataFrame([input_sample])
# Ensure categorical columns have the correct dtype
categorical_cols = ['Product_Sugar_Content', 'Product_Type', 'Store_Id', 'Store_Size', 'Store_Location_City_Type', 'Store_Type']
for col in categorical_cols:
if col in input_df.columns:
input_df[col] = input_df[col].astype('category')
# Make prediction using the trained model pipeline
# The pipeline handles preprocessing
prediction = model.predict(input_df)[0]
# Return the predicted sales total as JSON
return jsonify({'Predicted_Product_Store_Sales_Total': float(prediction)})
except Exception as e:
# Log the error for debugging
print(f"Error during single prediction: {e}")
return jsonify({'error': str(e)}), 500
# -------------------------------------------------------
# Define an endpoint for batch predictions (CSV upload)
# -------------------------------------------------------
@product_sales_api.route('/v1/salesbatch', methods=['POST'])
def predict_sales_batch():
"""
This function handles POST requests to the '/v1/salesbatch' endpoint.
It expects a CSV file upload and returns predictions for multiple records.
"""
if model is None:
return jsonify({'error': 'Model not loaded'}), 500
try:
# Get the uploaded CSV file
if 'file' not in request.files:
return jsonify({'error': 'No file part in the request'}), 400
file = request.files['file']
# If the user does not select a file, the browser submits an
# empty file without a filename.
if file.filename == '':
return jsonify({'error': 'No selected file'}), 400
if file:
# Read the CSV file into a DataFrame
# Assume the CSV columns match the original training features
data = pd.read_csv(file)
# Ensure categorical columns have the correct dtype after reading from CSV
categorical_cols = ['Product_Sugar_Content', 'Product_Type', 'Store_Id', 'Store_Size', 'Store_Location_City_Type', 'Store_Type']
for col in categorical_cols:
if col in data.columns:
data[col] = data[col].astype('category')
# Make batch predictions using the trained model pipeline
predictions = model.predict(data)
data['Predicted_Product_Store_Sales_Total'] = predictions
# Return the results as JSON
return data.to_json(orient='records')
except Exception as e:
# Log the error for debugging
print(f"Error during batch prediction: {e}")
return jsonify({'error': str(e)}), 500
# -------------------------------------------------------
# Run the Flask API (typically not run in deployment, Gunicorn handles this)
# -------------------------------------------------------
# This part is mainly for local testing. In a Docker deployment with Gunicorn,
# Gunicorn will call the 'product_sales_api' application directly.
# if __name__ == '__main__':
# product_sales_api.run(host='0.0.0.0', port=5000, debug=True)
|