Spaces:
Sleeping
Sleeping
| # ------------------------------------------------------- | |
| # Flask Web Framework for Product Store Sales Prediction | |
| # ------------------------------------------------------- | |
| # Import necessary libraries | |
| import os | |
| import numpy as np | |
| import pandas as pd | |
| import joblib | |
| from flask import Flask, request, jsonify | |
| # Initialize the Flask application | |
| product_sales_api = Flask("SuperKart Product Sales Predictor") | |
| # Define the path to the model file - it will be at the root of the Space | |
| model_path_in_space = "random_forest_pipeline.joblib" | |
| # Load the trained RandomForest model pipeline | |
| try: | |
| model = joblib.load(model_path_in_space) | |
| print(f"Model loaded successfully from {model_path_in_space}") | |
| except Exception as e: | |
| print(f"Error loading model: {e}") | |
| model = None # Set model to None to indicate loading failure | |
| # ------------------------------------------------------- | |
| # Define a route for the home page (GET request) | |
| # ------------------------------------------------------- | |
| def home(): | |
| """ | |
| This function handles GET requests to the root URL ('/') of the API. | |
| It returns a simple welcome message. | |
| """ | |
| if model is None: | |
| return "Error: Model could not be loaded. Please check the logs.", 500 | |
| return "Welcome to the SuperKart Product Store Sales Prediction API!" | |
| # ------------------------------------------------------- | |
| # Define an endpoint for single product prediction (POST request) | |
| # ------------------------------------------------------- | |
| def predict_sales(): | |
| """ | |
| This function handles POST requests to the '/v1/sales' endpoint. | |
| It expects a JSON payload containing product features and returns | |
| the predicted Product_Store_Sales_Total as a JSON response. | |
| """ | |
| if model is None: | |
| return jsonify({'error': 'Model not loaded'}), 500 | |
| try: | |
| # Get the JSON data from the request body | |
| product_data = request.get_json() | |
| # Convert the JSON data into a Pandas DataFrame | |
| # Ensure the column names match the features used during training | |
| # and are in the correct order if your model/pipeline is sensitive to it. | |
| # Based on your preprocessing and model, the expected input features | |
| # after one-hot encoding are needed. You might need to map the input | |
| # JSON keys to the expected columns in your preprocessor/model. | |
| # A more robust approach here would be to reconstruct the expected | |
| # DataFrame structure based on the features your model was trained on. | |
| # For simplicity and demonstration, let's assume the input JSON | |
| # has keys corresponding to the original features BEFORE preprocessing | |
| # and the preprocessor handles the transformation. | |
| # These were: 'Product_Weight', 'Product_Allocated_Area', 'Product_MRP', | |
| # 'Store_Establishment_Year', 'Product_Sugar_Content', 'Product_Type', | |
| # 'Store_Id', 'Store_Size', 'Store_Location_City_Type', 'Store_Type' | |
| # It's crucial that the keys in the incoming JSON match these original column names. | |
| input_sample = {} | |
| # Populate input_sample from product_data, handle missing keys if necessary | |
| # For demonstration, assuming all keys are present: | |
| original_feature_cols = [ | |
| 'Product_Weight', 'Product_Allocated_Area', 'Product_MRP', | |
| 'Store_Establishment_Year', 'Product_Sugar_Content', 'Product_Type', | |
| 'Store_Id', 'Store_Size', 'Store_Location_City_Type', 'Store_Type' | |
| ] | |
| for col in original_feature_cols: | |
| # Use .get() to safely access keys, provide a default or handle missing later | |
| input_sample[col] = product_data.get(col) | |
| input_df = pd.DataFrame([input_sample]) | |
| # Ensure categorical columns have the correct dtype | |
| categorical_cols = ['Product_Sugar_Content', 'Product_Type', 'Store_Id', 'Store_Size', 'Store_Location_City_Type', 'Store_Type'] | |
| for col in categorical_cols: | |
| if col in input_df.columns: | |
| input_df[col] = input_df[col].astype('category') | |
| # Make prediction using the trained model pipeline | |
| # The pipeline handles preprocessing | |
| prediction = model.predict(input_df)[0] | |
| # Return the predicted sales total as JSON | |
| return jsonify({'Predicted_Product_Store_Sales_Total': float(prediction)}) | |
| except Exception as e: | |
| # Log the error for debugging | |
| print(f"Error during single prediction: {e}") | |
| return jsonify({'error': str(e)}), 500 | |
| # ------------------------------------------------------- | |
| # Define an endpoint for batch predictions (CSV upload) | |
| # ------------------------------------------------------- | |
| def predict_sales_batch(): | |
| """ | |
| This function handles POST requests to the '/v1/salesbatch' endpoint. | |
| It expects a CSV file upload and returns predictions for multiple records. | |
| """ | |
| if model is None: | |
| return jsonify({'error': 'Model not loaded'}), 500 | |
| try: | |
| # Get the uploaded CSV file | |
| if 'file' not in request.files: | |
| return jsonify({'error': 'No file part in the request'}), 400 | |
| file = request.files['file'] | |
| # If the user does not select a file, the browser submits an | |
| # empty file without a filename. | |
| if file.filename == '': | |
| return jsonify({'error': 'No selected file'}), 400 | |
| if file: | |
| # Read the CSV file into a DataFrame | |
| # Assume the CSV columns match the original training features | |
| data = pd.read_csv(file) | |
| # Ensure categorical columns have the correct dtype after reading from CSV | |
| categorical_cols = ['Product_Sugar_Content', 'Product_Type', 'Store_Id', 'Store_Size', 'Store_Location_City_Type', 'Store_Type'] | |
| for col in categorical_cols: | |
| if col in data.columns: | |
| data[col] = data[col].astype('category') | |
| # Make batch predictions using the trained model pipeline | |
| predictions = model.predict(data) | |
| data['Predicted_Product_Store_Sales_Total'] = predictions | |
| # Return the results as JSON | |
| return data.to_json(orient='records') | |
| except Exception as e: | |
| # Log the error for debugging | |
| print(f"Error during batch prediction: {e}") | |
| return jsonify({'error': str(e)}), 500 | |
| # ------------------------------------------------------- | |
| # Run the Flask API (typically not run in deployment, Gunicorn handles this) | |
| # ------------------------------------------------------- | |
| # This part is mainly for local testing. In a Docker deployment with Gunicorn, | |
| # Gunicorn will call the 'product_sales_api' application directly. | |
| # if __name__ == '__main__': | |
| # product_sales_api.run(host='0.0.0.0', port=5000, debug=True) | |