Spaces:
Sleeping
Sleeping
| import numpy as np | |
| import joblib # For loading the serialized model | |
| import pandas as pd # For data manipulation | |
| from flask import Flask, request, jsonify # For creating the Flask API | |
| import os # To check if the model file exists | |
| import logging | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| logger.info("Starting SuperKart Sales Predictor API loading file...") | |
| # Initialize the Flask application | |
| superkart_sales_predictor_api = Flask("SuperKart Sales Predictor") | |
| # Define the path to the trained machine learning model | |
| model_path = "superkart_regression_model_v1.0.joblib" | |
| model = None | |
| scaler = None # Initialize scaler | |
| # Placeholder for training data columns and MRP min/max (replace with loading from saved files in production) | |
| training_columns = None | |
| mrp_bins = None # Use mrp_bins directly | |
| def load_model(): | |
| """ | |
| This function loads the trained machine learning model. | |
| It should be called when the Flask app starts to ensure the model is ready for predictions. | |
| """ | |
| global model | |
| if model is None: | |
| try: | |
| logger.info(f"Loading model from {model_path}...") | |
| model = joblib.load(model_path) | |
| logger.info("Model loaded successfully.") | |
| except FileNotFoundError: | |
| logger.error(f"Error: Model file not found at {model_path}") | |
| except Exception as e: | |
| logger.error(f"An error occurred while loading the model: {e}") | |
| def load_scaler(): | |
| """ | |
| This function loads the fitted StandardScaler. | |
| """ | |
| global scaler | |
| if scaler is None: | |
| try: | |
| logger.info("Loading scaler...") | |
| scaler_path = "scaler.joblib" # Define the path to your saved scaler | |
| scaler = joblib.load(scaler_path) | |
| logger.info("Scaler loaded successfully.") | |
| except FileNotFoundError: | |
| logger.error(f"Error: Scaler file not found at {scaler_path}") | |
| except Exception as e: | |
| logger.error(f"An error occurred while loading the scaler: {e}") | |
| def load_training_artifacts(): | |
| """ | |
| Loads artifacts from training like column names and bin edges. | |
| """ | |
| global training_columns, mrp_bins | |
| try: | |
| # Load training column names | |
| training_columns_path = "training_columns.joblib" | |
| training_columns = joblib.load(training_columns_path) | |
| logger.info("Training column names loaded successfully.") | |
| # Load MRP bin edges | |
| mrp_bins_path = "mrp_bins.joblib" | |
| mrp_bins = joblib.load(mrp_bins_path) | |
| logger.info("MRP bin edges loaded successfully.") | |
| except FileNotFoundError as e: | |
| logger.error(f"Error loading training artifacts: {e}") | |
| except Exception as e: | |
| logger.error(f"An error occurred while loading training artifacts: {e}") | |
| # Load model, scaler, and training artifacts when the app starts | |
| load_model() | |
| load_scaler() | |
| load_training_artifacts() | |
| # Define a route for the home page (GET request) | |
| def home(): | |
| """ | |
| This function handles GET requests to the root URL ('/') of the API. | |
| It returns a simple welcome message and model loading status. | |
| """ | |
| logger.info(f"Home page request") | |
| global model, scaler, training_columns, mrp_bins | |
| status_message = "Welcome to the SuperKart Sales Prediction API! " | |
| if model is None: | |
| status_message += "Model loading failed. " | |
| else: | |
| status_message += "Model loaded successfully. " | |
| if scaler is None: | |
| status_message += "Scaler loading failed. " | |
| else: | |
| status_message += "Scaler loaded successfully. " | |
| if training_columns is None or mrp_bins is None: | |
| status_message += "Training artifacts loading failed." | |
| else: | |
| status_message += "Training artifacts loaded successfully." | |
| return status_message | |
| # Define an endpoint for single sales prediction (POST request) | |
| def predict_sales(): | |
| """ | |
| This function handles POST requests to the '/predict_sales' endpoint. | |
| It expects a JSON payload containing product and store details and returns | |
| the predicted sales as a JSON response. | |
| """ | |
| global model, scaler, training_columns, mrp_bins | |
| if model is None or scaler is None or training_columns is None or mrp_bins is None: | |
| return jsonify({'error': 'Required artifacts (model, scaler, training columns, MRP bins) not loaded. Cannot make predictions.'}), 500 | |
| try: | |
| # Get the JSON data from the request body | |
| input_data = request.get_json() | |
| logger.info(f"Received input data: {input_data}") | |
| # Convert the input data to a pandas DataFrame | |
| # Ensure the column order matches the training data | |
| input_df = pd.DataFrame([input_data]) | |
| # Preprocess the input data similar to how the training data was preprocessed | |
| # This includes feature engineering, one-hot encoding, and scaling | |
| # 1. Feature Engineering | |
| # Calculate Years_Since_Establishment relative to a fixed year (e.g., 2025) | |
| input_df['Years_Since_Establishment'] = 2025 - input_df['Store_Establishment_Year'] | |
| input_df['Product_Broad_Category'] = input_df['Product_Id'].apply(lambda x: x[:2]) | |
| # Create 'MRP_Category' using loaded fixed bins from training data | |
| mrp_labels = ['Low', 'Medium', 'High'] | |
| # Use pd.cut on the input data with the loaded fixed bins | |
| input_df['MRP_Category'] = pd.cut(input_df['Product_MRP'], bins=mrp_bins, labels=mrp_labels, include_lowest=True) | |
| # 2. One-Hot Encoding | |
| # Identify categorical columns (excluding the target variable and Product_Id which is now captured by Product_Broad_Category) | |
| categorical_cols = ['Product_Sugar_Content', 'Product_Type', 'Store_Id', 'Store_Size', 'Store_Location_City_Type', 'Store_Type', 'Product_Broad_Category', 'MRP_Category'] | |
| # Apply one-hot encoding | |
| # Use pd.get_dummies which will create columns only for categories present in the input | |
| input_encoded = pd.get_dummies(input_df, columns=categorical_cols, drop_first=True) | |
| # 3. Align columns with training data | |
| # Ensure the order and presence of columns are the same as the training data | |
| # Add missing columns with default value 0 (for one-hot encoded columns not present in input) | |
| preprocessed_input = pd.DataFrame(columns=training_columns) | |
| for col in training_columns: | |
| if col in input_encoded.columns: | |
| preprocessed_input[col] = input_encoded[col] | |
| else: | |
| preprocessed_input[col] = 0 | |
| # Ensure the order of columns is the same as training data | |
| preprocessed_input = preprocessed_input[training_columns] | |
| # 4. Scaling Numerical Features | |
| numerical_cols_to_scale = ['Product_Weight', 'Product_Allocated_Area', 'Product_MRP', 'Years_Since_Establishment'] | |
| # Apply the fitted scaler to the numerical columns | |
| preprocessed_input[numerical_cols_to_scale] = scaler.transform(preprocessed_input[numerical_cols_to_scale]) | |
| # Make prediction using the loaded model | |
| # The model was trained on log-transformed sales, so the prediction will be log-transformed | |
| predicted_sales_log = model.predict(preprocessed_input)[0] | |
| # Inverse transform the prediction to get the actual sales value | |
| predicted_sales = np.expm1(predicted_sales_log) # Use np.expm1 to reverse np.log1p | |
| # Return the prediction as a JSON response | |
| return jsonify({'predicted_sales': predicted_sales}) | |
| except Exception as e: | |
| logger.error(f"Error during prediction: {e}") | |
| return jsonify({'error': str(e)}), 400 | |
| # Define an endpoint for single sales prediction (POST request) | |
| def home_version(): | |
| """ | |
| This function handles GET requests to the root URL ('/') of the API. | |
| It returns a simple welcome message and model loading status. | |
| """ | |
| logger.info(f"Home page request") | |
| global model, scaler | |
| if model is None: | |
| load_model() | |
| if scaler is None: | |
| load_scaler() # Load scaler when the endpoint is called if not already loaded | |
| if model is None or scaler is None: | |
| return "Welcome to the SuperKart Sales Prediction API! Model loading failed version 1.0." | |
| else: | |
| return "Welcome to the SuperKart Sales Prediction API! Model loaded successfully version 1.0." | |
| # To run the Flask app (for local testing) | |
| if __name__ == '__main__': | |
| # In a production environment, you would typically use a production-ready WSGI server | |
| # such as Gunicorn or uWSGI. | |
| logger.info("About to start the SuperKart Sales Predictor API...") | |
| # Load the model and scaler when the app starts | |
| load_model() | |
| load_scaler() | |
| load_training_artifacts() # Load training artifacts as well | |
| superkart_sales_predictor_api.run(debug=True, host='0.0.0.0', port=7860) | |