Spaces:

ssowmiya
/

SKBE

Runtime error

App Files Files Community

SKBE / app.py

ssowmiya

Upload 4 files

eb2f4db verified 6 months ago

raw

history blame contribute delete

8.97 kB

	# Import necessary libraries
	import numpy as np
	import joblib # For loading the serialized model
	import pandas as pd # For data manipulation
	from flask import Flask, request, jsonify # For creating the Flask API
	import os

	# Initialize the Flask application
	superkart_sales_predictor_api = Flask("SuperKart Sales Predictor")

	# Load the trained machine learning model
	model = joblib.load("best_sales_prediction_model.joblib")

	def prepare_input_data(data):
	"""Prepare input data with proper feature engineering and categorical value validation"""
	df = pd.DataFrame([data] if isinstance(data, dict) else data)

	# Define known categorical values from training data
	KNOWN_VALUES = {
	'Product_Sugar_Content': ['Low Sugar', 'Regular', 'No Sugar', 'reg'],
	'Product_Type': ['Dairy', 'Meat', 'Snack Foods', 'Fruits and Vegetables',
	'Frozen Foods', 'Baking Goods', 'Health and Hygiene', 'Canned',
	'Hard Drinks', 'Household', 'Soft Drinks', 'Starchy Foods',
	'Breakfast', 'Seafood', 'Bread', 'Others'],
	'Store_Id': ['OUT001', 'OUT002', 'OUT003', 'OUT004'],
	'Store_Size': ['Small', 'Medium', 'High'],
	'Store_Location_City_Type': ['Tier 1', 'Tier 2', 'Tier 3'],
	'Store_Type': ['Supermarket Type1', 'Supermarket Type2', 'Departmental Store', 'Food Mart'],
	'Product_Category_Code': ['DR', 'FD', 'NC']
	}

	# Feature engineering - same as in training
	if 'Product_Id' in df.columns:
	df['Product_Category_Code'] = df['Product_Id'].str[:2]
	df['Product_Number'] = df['Product_Id'].str[2:].astype(int)

	# Validate Product_Category_Code and fix invalid ones
	invalid_codes = ~df['Product_Category_Code'].isin(KNOWN_VALUES['Product_Category_Code'])
	if invalid_codes.any():
	df.loc[invalid_codes, 'Product_Category_Code'] = 'DR' # Default to DR for invalid codes

	# Create store age feature
	if 'Store_Establishment_Year' in df.columns:
	current_year = 2024
	df['Store_Age'] = current_year - df['Store_Establishment_Year']

	# Handle Fat_Content -> Sugar_Content mapping with validation
	if 'Product_Fat_Content' in df.columns:
	fat_to_sugar_map = {
	'Regular': 'Regular',
	'Low Fat': 'Low Sugar',
	'reg': 'reg',
	'LF': 'Low Sugar'
	}
	df['Product_Sugar_Content'] = df['Product_Fat_Content'].map(fat_to_sugar_map).fillna('Regular')
	df = df.drop('Product_Fat_Content', axis=1)

	# Remove Product_Id column for prediction
	if 'Product_Id' in df.columns:
	df = df.drop('Product_Id', axis=1)

	# Required columns in exact training order
	required_columns = [
	'Product_Weight', 'Product_Sugar_Content', 'Product_Allocated_Area',
	'Product_Type', 'Product_MRP', 'Store_Id', 'Store_Establishment_Year',
	'Store_Size', 'Store_Location_City_Type', 'Store_Type',
	'Product_Category_Code', 'Product_Number', 'Store_Age'
	]

	# Add missing columns with proper defaults
	for col in required_columns:
	if col not in df.columns:
	if col == 'Product_Allocated_Area':
	df[col] = 1000.0
	elif col == 'Store_Establishment_Year':
	df[col] = 2000
	elif col == 'Product_Sugar_Content':
	df[col] = 'Regular'
	elif col == 'Product_Type':
	df[col] = 'Dairy'
	elif col == 'Store_Id':
	df[col] = 'OUT001'
	elif col == 'Store_Size':
	df[col] = 'Medium'
	elif col == 'Store_Location_City_Type':
	df[col] = 'Tier 1'
	elif col == 'Store_Type':
	df[col] = 'Supermarket Type1'
	else:
	df[col] = 0

	# Validate and fix categorical values to prevent unknown category errors
	for col in KNOWN_VALUES:
	if col in df.columns:
	# Replace unknown values with the first known value
	df[col] = df[col].where(df[col].isin(KNOWN_VALUES[col]), KNOWN_VALUES[col][0])

	# Reorder columns to match exact training order
	df = df[required_columns]

	return df

	# Define a route for the home page (GET request)
	@superkart_sales_predictor_api.get('/')
	def home():
	"""
	This function handles GET requests to the root URL ('/') of the API.
	It returns a welcome message and API information.
	"""
	return jsonify({
	"message": "Welcome to the SuperKart Sales Prediction API!",
	"version": "v1.0",
	"endpoints": {
	"/": "GET - API information",
	"/health": "GET - Health check",
	"/v1/sales": "POST - Single product sales prediction",
	"/v1/salesbatch": "POST - Batch sales prediction"
	}
	})

	# Define a route for health check (GET request)
	@superkart_sales_predictor_api.get('/health')
	def health():
	"""
	Health check endpoint to verify API status.
	"""
	return jsonify({
	"status": "healthy",
	"model_status": "loaded",
	"api_version": "v1.0"
	})

	# Define an endpoint for single product sales prediction (POST request)
	@superkart_sales_predictor_api.post('/v1/sales')
	def predict_sales():
	"""
	This function handles POST requests to the '/v1/sales' endpoint.
	It expects a JSON payload containing product and store details and returns
	the predicted sales amount as a JSON response.
	"""
	try:
	# Get the JSON data from the request body
	product_data = request.get_json()

	if not product_data:
	return jsonify({'error': 'No data provided'}), 400

	# Prepare input data with proper preprocessing
	input_df = prepare_input_data(product_data)

	# Make prediction
	predicted_sales = model.predict(input_df)[0]

	# Convert to Python float and round to 2 decimal places
	predicted_sales = round(float(predicted_sales), 2)

	# Return the predicted sales
	return jsonify({
	'Predicted Sales (in dollars)': predicted_sales,
	'status': 'success'
	})

	except Exception as e:
	return jsonify({'error': str(e), 'status': 'error'}), 500

	# Define an endpoint for batch prediction (POST request)
	@superkart_sales_predictor_api.post('/v1/salesbatch')
	def predict_sales_batch():
	"""
	This function handles POST requests to the '/v1/salesbatch' endpoint.
	It expects a CSV file containing product and store details for multiple products
	and returns the predicted sales amounts as a dictionary in the JSON response.
	"""
	try:
	# Get the uploaded CSV file from the request
	file = request.files['file']

	if not file:
	return jsonify({'error': 'No file provided'}), 400

	# Read the CSV file into a Pandas DataFrame
	input_data = pd.read_csv(file)

	# Process each row through the preprocessing pipeline
	processed_rows = []
	for _, row in input_data.iterrows():
	row_dict = row.to_dict()
	processed_df = prepare_input_data(row_dict)
	processed_rows.append(processed_df)

	# Combine all processed rows
	batch_input_df = pd.concat(processed_rows, ignore_index=True)

	# Make predictions for all products in the DataFrame
	predicted_sales_list = model.predict(batch_input_df).tolist()

	# Round predictions to 2 decimal places
	predicted_sales_list = [round(float(sales), 2) for sales in predicted_sales_list]

	# Create a dictionary of predictions with indices or IDs as keys
	if 'Product_Id' in input_data.columns:
	product_ids = input_data['Product_Id'].tolist()
	output_dict = dict(zip(product_ids, predicted_sales_list))
	else:
	# Use row indices if no Product_Id column
	indices = [f"Product_{i+1}" for i in range(len(predicted_sales_list))]
	output_dict = dict(zip(indices, predicted_sales_list))

	# Return the predictions dictionary as a JSON response
	return jsonify({
	'predictions': output_dict,
	'total_products': len(predicted_sales_list),
	'status': 'success'
	})

	except Exception as e:
	return jsonify({'error': str(e), 'status': 'error'}), 500

	# Run the Flask application if this script is executed directly
	if __name__ == '__main__':
	port = int(os.environ.get('PORT', 7860)) # Hugging Face uses port 7860
	superkart_sales_predictor_api.run(host='0.0.0.0', port=port, debug=False)