import streamlit as st import pandas as pd import numpy as np import joblib import joblib from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error from flask import Flask, request, jsonify app = Flask(__name__) # ✅ Load your trained model here model_pipeline = joblib.load("best_random_forest_model.joblib") # Load the trained model pipeline @st.cache_resource def load_model(): return joblib.load('deployment_files/best_random_forest_pipeline.joblib') model = load_model #return joblib.load('deployment_files/tuned_random_forest_model.joblib') #model_pipeline = joblib.load('best_random_forest_pipeline.joblib') # Define the Streamlit app title and description st.title('SuperKart Sales Forecasting App') st.write('Enter the product and store details to get a sales forecast.') # Define input fields for the features # You need to create input fields for all the features used by your model # Based on your preprocessing, the features are: # Numerical: Product_Weight, Product_Allocated_Area, Product_MRP, Store_Establishment_Year # Categorical: Product_Sugar_Content, Product_Type, Store_Id, Store_Size, Store_Location_City_Type, Store_Type st.sidebar.header('Product and Store Details') # Numerical Inputs product_weight = st.sidebar.number_input('Product Weight', min_value=0.0, value=10.0) product_allocated_area = st.sidebar.number_input('Product Allocated Area', min_value=0.0, value=0.05) product_mrp = st.sidebar.number_input('Product MRP', min_value=0.0, value=100.0) store_establishment_year = st.sidebar.number_input('Store Establishment Year', min_value=1900, max_value=2024, value=2000) # Categorical Inputs (using unique values from your data) # Replace the options with the actual unique categories from your dataset sugar_content_options = ['Low Sugar', 'Regular', 'No Sugar'] # Update with actual unique values product_type_options = ['Frozen Foods', 'Dairy', 'Canned', 'Baking Goods', 'Health and Hygiene', 'Snack Foods', 'Household', 'Meat', 'Soft Drinks', 'Breads', 'Hard Drinks', 'Others', 'Starchy Foods', 'Breakfast', 'Seafood', 'Fruits and Vegetables'] # Update with actual unique values store_id_options = ['OUT004', 'OUT003', 'OUT001', 'OUT002'] # Update with actual unique values store_size_options = ['Medium', 'High', 'Small'] # Update with actual unique values store_location_options = ['Tier 2', 'Tier 1', 'Tier 3'] # Update with actual unique values store_type_options = ['Supermarket Type2', 'Departmental Store', 'Supermarket Type1', 'Food Mart'] # Update with actual unique values product_sugar_content = st.sidebar.selectbox('Product Sugar Content', sugar_content_options) product_type = st.sidebar.selectbox('Product Type', product_type_options) store_id = st.sidebar.selectbox('Store ID', store_id_options) store_size = st.sidebar.selectbox('Store Size', store_size_options) store_location_city_type = st.sidebar.selectbox('Store Location City Type', store_location_options) store_type = st.sidebar.selectbox('Store Type', store_type_options) # Create a dictionary from the input values input_data = { 'Product_Weight': product_weight, 'Product_Allocated_Area': product_allocated_area, 'Product_MRP': product_mrp, 'Store_Establishment_Year': store_establishment_year, 'Product_Sugar_Content': product_sugar_content, 'Product_Type': product_type, 'Store_Id': store_id, 'Store_Size': store_size, 'Store_Location_City_Type': store_location_city_type, 'Store_Type': store_type } # Convert the dictionary to a pandas DataFrame input_df = pd.DataFrame([input_data]) # Display the input data st.subheader('Input Details:') st.write(input_df) # Make prediction when the button is clicked if st.button('Predict Sales'): # Ensure column order matches the training data features expected by the pipeline # This is crucial because the pipeline expects features in a specific order, # especially after one-hot encoding. # The easiest way to handle this is to ensure the input DataFrame has the same # columns and order as the training data features (X_train) before passing # it to the pipeline's predict method. # Recreate a dummy DataFrame with the same columns and order as X_train # and then populate it with the input values. This ensures the one-hot encoding # within the pipeline works correctly. # Get the column names from X_train (assuming X_train is available or you have saved its column names) # For this script, we'll assume the columns are in a specific order. # In a real deployment, you would save the column order or a sample of X_train # along with the model pipeline. # A safer approach is to pass the raw input_df to the pipeline, # as the preprocessor within the pipeline should handle the column transformations # based on how it was fitted on the training data. # However, the order of columns in the input DataFrame should ideally match the # order of columns in the original DataFrame before splitting/preprocessing. # Let's assume the order of columns in input_df matches the original data columns # that were used to create X_train. try: prediction = model_pipeline.predict(input_df) st.subheader('Predicted Product Store Sales Total:') st.write(f'{prediction[0]:,.2f}') except Exception as e: st.error(f"An error occurred during prediction: {e}") st.write("Please ensure the input features are correct and match the expected format.")