sk_model / app.py
grkavi0912's picture
Upload folder using huggingface_hub
5b9431d verified
import streamlit as st
import pandas as pd
import numpy as np
import joblib
import joblib
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error
from flask import Flask, request, jsonify
app = Flask(__name__)
# ✅ Load your trained model here
model_pipeline = joblib.load("best_random_forest_model.joblib")
# Load the trained model pipeline
@st.cache_resource
def load_model():
return joblib.load('deployment_files/best_random_forest_pipeline.joblib')
model = load_model
#return joblib.load('deployment_files/tuned_random_forest_model.joblib')
#model_pipeline = joblib.load('best_random_forest_pipeline.joblib')
# Define the Streamlit app title and description
st.title('SuperKart Sales Forecasting App')
st.write('Enter the product and store details to get a sales forecast.')
# Define input fields for the features
# You need to create input fields for all the features used by your model
# Based on your preprocessing, the features are:
# Numerical: Product_Weight, Product_Allocated_Area, Product_MRP, Store_Establishment_Year
# Categorical: Product_Sugar_Content, Product_Type, Store_Id, Store_Size, Store_Location_City_Type, Store_Type
st.sidebar.header('Product and Store Details')
# Numerical Inputs
product_weight = st.sidebar.number_input('Product Weight', min_value=0.0, value=10.0)
product_allocated_area = st.sidebar.number_input('Product Allocated Area', min_value=0.0, value=0.05)
product_mrp = st.sidebar.number_input('Product MRP', min_value=0.0, value=100.0)
store_establishment_year = st.sidebar.number_input('Store Establishment Year', min_value=1900, max_value=2024, value=2000)
# Categorical Inputs (using unique values from your data)
# Replace the options with the actual unique categories from your dataset
sugar_content_options = ['Low Sugar', 'Regular', 'No Sugar'] # Update with actual unique values
product_type_options = ['Frozen Foods', 'Dairy', 'Canned', 'Baking Goods', 'Health and Hygiene', 'Snack Foods', 'Household', 'Meat', 'Soft Drinks', 'Breads', 'Hard Drinks', 'Others', 'Starchy Foods', 'Breakfast', 'Seafood', 'Fruits and Vegetables'] # Update with actual unique values
store_id_options = ['OUT004', 'OUT003', 'OUT001', 'OUT002'] # Update with actual unique values
store_size_options = ['Medium', 'High', 'Small'] # Update with actual unique values
store_location_options = ['Tier 2', 'Tier 1', 'Tier 3'] # Update with actual unique values
store_type_options = ['Supermarket Type2', 'Departmental Store', 'Supermarket Type1', 'Food Mart'] # Update with actual unique values
product_sugar_content = st.sidebar.selectbox('Product Sugar Content', sugar_content_options)
product_type = st.sidebar.selectbox('Product Type', product_type_options)
store_id = st.sidebar.selectbox('Store ID', store_id_options)
store_size = st.sidebar.selectbox('Store Size', store_size_options)
store_location_city_type = st.sidebar.selectbox('Store Location City Type', store_location_options)
store_type = st.sidebar.selectbox('Store Type', store_type_options)
# Create a dictionary from the input values
input_data = {
'Product_Weight': product_weight,
'Product_Allocated_Area': product_allocated_area,
'Product_MRP': product_mrp,
'Store_Establishment_Year': store_establishment_year,
'Product_Sugar_Content': product_sugar_content,
'Product_Type': product_type,
'Store_Id': store_id,
'Store_Size': store_size,
'Store_Location_City_Type': store_location_city_type,
'Store_Type': store_type
}
# Convert the dictionary to a pandas DataFrame
input_df = pd.DataFrame([input_data])
# Display the input data
st.subheader('Input Details:')
st.write(input_df)
# Make prediction when the button is clicked
if st.button('Predict Sales'):
# Ensure column order matches the training data features expected by the pipeline
# This is crucial because the pipeline expects features in a specific order,
# especially after one-hot encoding.
# The easiest way to handle this is to ensure the input DataFrame has the same
# columns and order as the training data features (X_train) before passing
# it to the pipeline's predict method.
# Recreate a dummy DataFrame with the same columns and order as X_train
# and then populate it with the input values. This ensures the one-hot encoding
# within the pipeline works correctly.
# Get the column names from X_train (assuming X_train is available or you have saved its column names)
# For this script, we'll assume the columns are in a specific order.
# In a real deployment, you would save the column order or a sample of X_train
# along with the model pipeline.
# A safer approach is to pass the raw input_df to the pipeline,
# as the preprocessor within the pipeline should handle the column transformations
# based on how it was fitted on the training data.
# However, the order of columns in the input DataFrame should ideally match the
# order of columns in the original DataFrame before splitting/preprocessing.
# Let's assume the order of columns in input_df matches the original data columns
# that were used to create X_train.
try:
prediction = model_pipeline.predict(input_df)
st.subheader('Predicted Product Store Sales Total:')
st.write(f'{prediction[0]:,.2f}')
except Exception as e:
st.error(f"An error occurred during prediction: {e}")
st.write("Please ensure the input features are correct and match the expected format.")