import flask from flask import Flask, request, jsonify import joblib import pandas as pd import numpy as np import sys # Import sys for stdout redirection print('Starting Superkart Sales Predictor Flask app...', file=sys.stdout) # Instantiate Flask app app = Flask("Superkart Sales Predictor") print('Flask app instantiated.', file=sys.stdout) # Load the pre-trained model print('Attempting to load model...', file=sys.stdout) loaded_model = joblib.load('best_rf_model.joblib') print('Model loaded successfully!', file=sys.stdout) # For demonstration, let's manually define the expected features based on the notebook's X_train structure # In a real app, you'd load this from a saved file to ensure consistency expected_features_list = ['Product_Weight', 'Product_Sugar_Content', 'Product_Allocated_Area', 'Product_Type', 'Product_MRP', 'Store_Size', 'Store_Location_City_Type', 'Store_Type', 'yr_since_store_estab'] # Define the prediction endpoint @app.route('/predict', methods=['POST']) def predict(): print('Prediction request received.', file=sys.stdout) try: # Get data from POST request print('Attempting to get JSON data from request...', file=sys.stdout) data = request.get_json(force=True) print(f'Received data: {data}', file=sys.stdout) # Convert incoming data to DataFrame matching training features structure if isinstance(data, dict): input_df = pd.DataFrame([data]) elif isinstance(data, list): input_df = pd.DataFrame(data) else: print('Invalid input data format.', file=sys.stdout) return jsonify({'error': 'Invalid input data format, expected dict or list of dicts'}), 400 # Reindex the DataFrame to ensure all expected columns are present, filling missing with NaN # The order of columns is crucial for the preprocessor. print('Reindexing input DataFrame and converting dtypes...', file=sys.stdout) input_df = input_df.reindex(columns=expected_features_list, fill_value=np.nan) # Ensure categorical columns have 'category' dtype as expected by the preprocessor # Identify categorical columns from the original X_train categorical_cols_expected = ['Product_Sugar_Content', 'Product_Type', 'Store_Size', 'Store_Location_City_Type', 'Store_Type'] for col in categorical_cols_expected: if col in input_df.columns: input_df[col] = input_df[col].astype('category') else: # Handle cases where a categorical column might be missing from input_df pass print('Input DataFrame prepared for prediction.', file=sys.stdout) # Make prediction using the loaded model pipeline print('Making prediction...', file=sys.stdout) predictions = loaded_model.predict(input_df) print('Prediction successful.', file=sys.stdout) # Convert predictions to a list or array for JSON response return jsonify(predictions.tolist()) except Exception as e: print(f'Error during prediction: {str(e)}', file=sys.stderr) # Log errors to stderr return jsonify({'error': str(e)}), 500 # This part is for local testing and should be commented out or protected for deployment # if __name__ == '__main__': # app.run(debug=True, host='0.0.0.0', port=5000)