File size: 5,544 Bytes
63d0f3b
 
 
 
f812f6f
5b9431d
f812f6f
 
 
 
 
 
63d0f3b
 
83d2673
 
 
 
 
 
63d0f3b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import streamlit as st
import pandas as pd
import numpy as np
import joblib
import joblib
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error
from flask import Flask, request, jsonify

app = Flask(__name__)

# ✅ Load your trained model here
model_pipeline = joblib.load("best_random_forest_model.joblib")

# Load the trained model pipeline
@st.cache_resource
def load_model():
    return joblib.load('deployment_files/best_random_forest_pipeline.joblib')
model = load_model    
    #return joblib.load('deployment_files/tuned_random_forest_model.joblib')
#model_pipeline = joblib.load('best_random_forest_pipeline.joblib')

# Define the Streamlit app title and description
st.title('SuperKart Sales Forecasting App')
st.write('Enter the product and store details to get a sales forecast.')

# Define input fields for the features
# You need to create input fields for all the features used by your model
# Based on your preprocessing, the features are:
# Numerical: Product_Weight, Product_Allocated_Area, Product_MRP, Store_Establishment_Year
# Categorical: Product_Sugar_Content, Product_Type, Store_Id, Store_Size, Store_Location_City_Type, Store_Type

st.sidebar.header('Product and Store Details')

# Numerical Inputs
product_weight = st.sidebar.number_input('Product Weight', min_value=0.0, value=10.0)
product_allocated_area = st.sidebar.number_input('Product Allocated Area', min_value=0.0, value=0.05)
product_mrp = st.sidebar.number_input('Product MRP', min_value=0.0, value=100.0)
store_establishment_year = st.sidebar.number_input('Store Establishment Year', min_value=1900, max_value=2024, value=2000)

# Categorical Inputs (using unique values from your data)
# Replace the options with the actual unique categories from your dataset
sugar_content_options = ['Low Sugar', 'Regular', 'No Sugar'] # Update with actual unique values
product_type_options = ['Frozen Foods', 'Dairy', 'Canned', 'Baking Goods', 'Health and Hygiene', 'Snack Foods', 'Household', 'Meat', 'Soft Drinks', 'Breads', 'Hard Drinks', 'Others', 'Starchy Foods', 'Breakfast', 'Seafood', 'Fruits and Vegetables'] # Update with actual unique values
store_id_options = ['OUT004', 'OUT003', 'OUT001', 'OUT002'] # Update with actual unique values
store_size_options = ['Medium', 'High', 'Small'] # Update with actual unique values
store_location_options = ['Tier 2', 'Tier 1', 'Tier 3'] # Update with actual unique values
store_type_options = ['Supermarket Type2', 'Departmental Store', 'Supermarket Type1', 'Food Mart'] # Update with actual unique values

product_sugar_content = st.sidebar.selectbox('Product Sugar Content', sugar_content_options)
product_type = st.sidebar.selectbox('Product Type', product_type_options)
store_id = st.sidebar.selectbox('Store ID', store_id_options)
store_size = st.sidebar.selectbox('Store Size', store_size_options)
store_location_city_type = st.sidebar.selectbox('Store Location City Type', store_location_options)
store_type = st.sidebar.selectbox('Store Type', store_type_options)


# Create a dictionary from the input values
input_data = {
    'Product_Weight': product_weight,
    'Product_Allocated_Area': product_allocated_area,
    'Product_MRP': product_mrp,
    'Store_Establishment_Year': store_establishment_year,
    'Product_Sugar_Content': product_sugar_content,
    'Product_Type': product_type,
    'Store_Id': store_id,
    'Store_Size': store_size,
    'Store_Location_City_Type': store_location_city_type,
    'Store_Type': store_type
}

# Convert the dictionary to a pandas DataFrame
input_df = pd.DataFrame([input_data])

# Display the input data
st.subheader('Input Details:')
st.write(input_df)


# Make prediction when the button is clicked
if st.button('Predict Sales'):
    # Ensure column order matches the training data features expected by the pipeline
    # This is crucial because the pipeline expects features in a specific order,
    # especially after one-hot encoding.
    # The easiest way to handle this is to ensure the input DataFrame has the same
    # columns and order as the training data features (X_train) before passing
    # it to the pipeline's predict method.

    # Recreate a dummy DataFrame with the same columns and order as X_train
    # and then populate it with the input values. This ensures the one-hot encoding
    # within the pipeline works correctly.

    # Get the column names from X_train (assuming X_train is available or you have saved its column names)
    # For this script, we'll assume the columns are in a specific order.
    # In a real deployment, you would save the column order or a sample of X_train
    # along with the model pipeline.

    # A safer approach is to pass the raw input_df to the pipeline,
    # as the preprocessor within the pipeline should handle the column transformations
    # based on how it was fitted on the training data.
    # However, the order of columns in the input DataFrame should ideally match the
    # order of columns in the original DataFrame before splitting/preprocessing.
    # Let's assume the order of columns in input_df matches the original data columns
    # that were used to create X_train.

    try:
        prediction = model_pipeline.predict(input_df)
        st.subheader('Predicted Product Store Sales Total:')
        st.write(f'{prediction[0]:,.2f}')
    except Exception as e:
        st.error(f"An error occurred during prediction: {e}")
        st.write("Please ensure the input features are correct and match the expected format.")