Spaces:

grkavi0912
/

sk_model

Sleeping

App Files Files Community

sk_model / app.py

grkavi0912

Upload folder using huggingface_hub

5b9431d verified 4 months ago

raw

history blame contribute delete

5.54 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import joblib
	import joblib
	from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error
	from flask import Flask, request, jsonify

	app = Flask(__name__)

	# ✅ Load your trained model here
	model_pipeline = joblib.load("best_random_forest_model.joblib")

	# Load the trained model pipeline
	@st.cache_resource
	def load_model():
	return joblib.load('deployment_files/best_random_forest_pipeline.joblib')
	model = load_model
	#return joblib.load('deployment_files/tuned_random_forest_model.joblib')
	#model_pipeline = joblib.load('best_random_forest_pipeline.joblib')

	# Define the Streamlit app title and description
	st.title('SuperKart Sales Forecasting App')
	st.write('Enter the product and store details to get a sales forecast.')

	# Define input fields for the features
	# You need to create input fields for all the features used by your model
	# Based on your preprocessing, the features are:
	# Numerical: Product_Weight, Product_Allocated_Area, Product_MRP, Store_Establishment_Year
	# Categorical: Product_Sugar_Content, Product_Type, Store_Id, Store_Size, Store_Location_City_Type, Store_Type

	st.sidebar.header('Product and Store Details')

	# Numerical Inputs
	product_weight = st.sidebar.number_input('Product Weight', min_value=0.0, value=10.0)
	product_allocated_area = st.sidebar.number_input('Product Allocated Area', min_value=0.0, value=0.05)
	product_mrp = st.sidebar.number_input('Product MRP', min_value=0.0, value=100.0)
	store_establishment_year = st.sidebar.number_input('Store Establishment Year', min_value=1900, max_value=2024, value=2000)

	# Categorical Inputs (using unique values from your data)
	# Replace the options with the actual unique categories from your dataset
	sugar_content_options = ['Low Sugar', 'Regular', 'No Sugar'] # Update with actual unique values
	product_type_options = ['Frozen Foods', 'Dairy', 'Canned', 'Baking Goods', 'Health and Hygiene', 'Snack Foods', 'Household', 'Meat', 'Soft Drinks', 'Breads', 'Hard Drinks', 'Others', 'Starchy Foods', 'Breakfast', 'Seafood', 'Fruits and Vegetables'] # Update with actual unique values
	store_id_options = ['OUT004', 'OUT003', 'OUT001', 'OUT002'] # Update with actual unique values
	store_size_options = ['Medium', 'High', 'Small'] # Update with actual unique values
	store_location_options = ['Tier 2', 'Tier 1', 'Tier 3'] # Update with actual unique values
	store_type_options = ['Supermarket Type2', 'Departmental Store', 'Supermarket Type1', 'Food Mart'] # Update with actual unique values

	product_sugar_content = st.sidebar.selectbox('Product Sugar Content', sugar_content_options)
	product_type = st.sidebar.selectbox('Product Type', product_type_options)
	store_id = st.sidebar.selectbox('Store ID', store_id_options)
	store_size = st.sidebar.selectbox('Store Size', store_size_options)
	store_location_city_type = st.sidebar.selectbox('Store Location City Type', store_location_options)
	store_type = st.sidebar.selectbox('Store Type', store_type_options)


	# Create a dictionary from the input values
	input_data = {
	'Product_Weight': product_weight,
	'Product_Allocated_Area': product_allocated_area,
	'Product_MRP': product_mrp,
	'Store_Establishment_Year': store_establishment_year,
	'Product_Sugar_Content': product_sugar_content,
	'Product_Type': product_type,
	'Store_Id': store_id,
	'Store_Size': store_size,
	'Store_Location_City_Type': store_location_city_type,
	'Store_Type': store_type
	}

	# Convert the dictionary to a pandas DataFrame
	input_df = pd.DataFrame([input_data])

	# Display the input data
	st.subheader('Input Details:')
	st.write(input_df)


	# Make prediction when the button is clicked
	if st.button('Predict Sales'):
	# Ensure column order matches the training data features expected by the pipeline
	# This is crucial because the pipeline expects features in a specific order,
	# especially after one-hot encoding.
	# The easiest way to handle this is to ensure the input DataFrame has the same
	# columns and order as the training data features (X_train) before passing
	# it to the pipeline's predict method.

	# Recreate a dummy DataFrame with the same columns and order as X_train
	# and then populate it with the input values. This ensures the one-hot encoding
	# within the pipeline works correctly.

	# Get the column names from X_train (assuming X_train is available or you have saved its column names)
	# For this script, we'll assume the columns are in a specific order.
	# In a real deployment, you would save the column order or a sample of X_train
	# along with the model pipeline.

	# A safer approach is to pass the raw input_df to the pipeline,
	# as the preprocessor within the pipeline should handle the column transformations
	# based on how it was fitted on the training data.
	# However, the order of columns in the input DataFrame should ideally match the
	# order of columns in the original DataFrame before splitting/preprocessing.
	# Let's assume the order of columns in input_df matches the original data columns
	# that were used to create X_train.

	try:
	prediction = model_pipeline.predict(input_df)
	st.subheader('Predicted Product Store Sales Total:')
	st.write(f'{prediction[0]:,.2f}')
	except Exception as e:
	st.error(f"An error occurred during prediction: {e}")
	st.write("Please ensure the input features are correct and match the expected format.")