Backend / app.py
HF-Sum's picture
Upload 4 files
49bd160 verified
Raw
History Blame Contribute Delete
3.47 kB
import flask
from flask import Flask, request, jsonify
import joblib
import pandas as pd
import numpy as np
import sys # Import sys for stdout redirection
print('Starting Superkart Sales Predictor Flask app...', file=sys.stdout)
# Instantiate Flask app
app = Flask("Superkart Sales Predictor")
print('Flask app instantiated.', file=sys.stdout)
# Load the pre-trained model
print('Attempting to load model...', file=sys.stdout)
loaded_model = joblib.load('best_rf_model.joblib')
print('Model loaded successfully!', file=sys.stdout)
# For demonstration, let's manually define the expected features based on the notebook's X_train structure
# In a real app, you'd load this from a saved file to ensure consistency
expected_features_list = ['Product_Weight', 'Product_Sugar_Content', 'Product_Allocated_Area',
'Product_Type', 'Product_MRP', 'Store_Size',
'Store_Location_City_Type', 'Store_Type', 'yr_since_store_estab']
# Define the prediction endpoint
@app.route('/predict', methods=['POST'])
def predict():
print('Prediction request received.', file=sys.stdout)
try:
# Get data from POST request
print('Attempting to get JSON data from request...', file=sys.stdout)
data = request.get_json(force=True)
print(f'Received data: {data}', file=sys.stdout)
# Convert incoming data to DataFrame matching training features structure
if isinstance(data, dict):
input_df = pd.DataFrame([data])
elif isinstance(data, list):
input_df = pd.DataFrame(data)
else:
print('Invalid input data format.', file=sys.stdout)
return jsonify({'error': 'Invalid input data format, expected dict or list of dicts'}), 400
# Reindex the DataFrame to ensure all expected columns are present, filling missing with NaN
# The order of columns is crucial for the preprocessor.
print('Reindexing input DataFrame and converting dtypes...', file=sys.stdout)
input_df = input_df.reindex(columns=expected_features_list, fill_value=np.nan)
# Ensure categorical columns have 'category' dtype as expected by the preprocessor
# Identify categorical columns from the original X_train
categorical_cols_expected = ['Product_Sugar_Content', 'Product_Type', 'Store_Size',
'Store_Location_City_Type', 'Store_Type']
for col in categorical_cols_expected:
if col in input_df.columns:
input_df[col] = input_df[col].astype('category')
else:
# Handle cases where a categorical column might be missing from input_df
pass
print('Input DataFrame prepared for prediction.', file=sys.stdout)
# Make prediction using the loaded model pipeline
print('Making prediction...', file=sys.stdout)
predictions = loaded_model.predict(input_df)
print('Prediction successful.', file=sys.stdout)
# Convert predictions to a list or array for JSON response
return jsonify(predictions.tolist())
except Exception as e:
print(f'Error during prediction: {str(e)}', file=sys.stderr) # Log errors to stderr
return jsonify({'error': str(e)}), 500
# This part is for local testing and should be commented out or protected for deployment
# if __name__ == '__main__':
# app.run(debug=True, host='0.0.0.0', port=5000)