Spaces:

HF-Sum
/

Backend

Runtime error

App Files Files Community

Backend / app.py

HF-Sum

Upload 4 files

49bd160 verified 5 months ago

Raw

History Blame Contribute Delete

3.47 kB

	import flask
	from flask import Flask, request, jsonify
	import joblib
	import pandas as pd
	import numpy as np
	import sys # Import sys for stdout redirection

	print('Starting Superkart Sales Predictor Flask app...', file=sys.stdout)

	# Instantiate Flask app
	app = Flask("Superkart Sales Predictor")
	print('Flask app instantiated.', file=sys.stdout)

	# Load the pre-trained model
	print('Attempting to load model...', file=sys.stdout)
	loaded_model = joblib.load('best_rf_model.joblib')
	print('Model loaded successfully!', file=sys.stdout)

	# For demonstration, let's manually define the expected features based on the notebook's X_train structure
	# In a real app, you'd load this from a saved file to ensure consistency
	expected_features_list = ['Product_Weight', 'Product_Sugar_Content', 'Product_Allocated_Area',
	'Product_Type', 'Product_MRP', 'Store_Size',
	'Store_Location_City_Type', 'Store_Type', 'yr_since_store_estab']

	# Define the prediction endpoint
	@app.route('/predict', methods=['POST'])
	def predict():
	print('Prediction request received.', file=sys.stdout)
	try:
	# Get data from POST request
	print('Attempting to get JSON data from request...', file=sys.stdout)
	data = request.get_json(force=True)
	print(f'Received data: {data}', file=sys.stdout)

	# Convert incoming data to DataFrame matching training features structure
	if isinstance(data, dict):
	input_df = pd.DataFrame([data])
	elif isinstance(data, list):
	input_df = pd.DataFrame(data)
	else:
	print('Invalid input data format.', file=sys.stdout)
	return jsonify({'error': 'Invalid input data format, expected dict or list of dicts'}), 400

	# Reindex the DataFrame to ensure all expected columns are present, filling missing with NaN
	# The order of columns is crucial for the preprocessor.
	print('Reindexing input DataFrame and converting dtypes...', file=sys.stdout)
	input_df = input_df.reindex(columns=expected_features_list, fill_value=np.nan)

	# Ensure categorical columns have 'category' dtype as expected by the preprocessor
	# Identify categorical columns from the original X_train
	categorical_cols_expected = ['Product_Sugar_Content', 'Product_Type', 'Store_Size',
	'Store_Location_City_Type', 'Store_Type']

	for col in categorical_cols_expected:
	if col in input_df.columns:
	input_df[col] = input_df[col].astype('category')
	else:
	# Handle cases where a categorical column might be missing from input_df
	pass
	print('Input DataFrame prepared for prediction.', file=sys.stdout)

	# Make prediction using the loaded model pipeline
	print('Making prediction...', file=sys.stdout)
	predictions = loaded_model.predict(input_df)
	print('Prediction successful.', file=sys.stdout)

	# Convert predictions to a list or array for JSON response
	return jsonify(predictions.tolist())

	except Exception as e:
	print(f'Error during prediction: {str(e)}', file=sys.stderr) # Log errors to stderr
	return jsonify({'error': str(e)}), 500

	# This part is for local testing and should be commented out or protected for deployment
	# if __name__ == '__main__':
	# app.run(debug=True, host='0.0.0.0', port=5000)