import streamlit as st import joblib import pandas as pd import numpy as np # Load the trained Random Forest pipeline model try: model = joblib.load('random_forest_pipeline.pkl') except FileNotFoundError: st.error("Model file 'random_forest_pipeline.pkl' not found. Please ensure the model is trained and saved.") model = None st.title('SuperKart Sales Prediction App') if model: st.sidebar.header('Input Features') # Define input fields for each feature based on your dataset columns # Product_Weight product_weight = st.sidebar.number_input('Product Weight(kg)', min_value=0.1, max_value=30.0, value=10.0) # Product_Sugar_Content sugar_content_options = ['Low Sugar', 'Regular', 'No Sugar', 'reg'] # Based on EDA product_sugar_content = st.sidebar.selectbox('Product Sugar Content', sugar_content_options) # Product_Allocated_Area product_allocated_area = st.sidebar.number_input('Product Allocated Area (sq. m)', min_value=0.0, max_value=1.0, value=0.05) # Product_Type product_type_options = ['Breads', 'Snack Foods', 'Frozen Foods', 'Dairy', 'Seafood', 'Starchy Foods', 'Soft Drinks', 'Meat', 'Hard Drinks', 'Health and Hygiene', 'Baking Goods', 'Breakfast', 'Canned', 'Fruits and Vegetables', 'Household', 'Others'] # Based on EDA product_type = st.sidebar.selectbox('Product Type', product_type_options) # Product_MRP product_mrp = st.sidebar.number_input('Product MRP ($)', min_value=10.0, max_value=300.0, value=150.0) # Store_Id - Using an example list, replace with actual Store IDs from your data if possible store_id_options = ['OUT027', 'OUT013', 'OUT011', 'OUT010', 'OUT004', 'OUT001', 'OUT002', 'OUT003'] store_id = st.sidebar.selectbox('Store ID', store_id_options) # Store_Establishment_Year store_establishment_year = st.sidebar.number_input('Store Establishment Year', min_value=1900, max_value=2024, value=2000) # Store_Size store_size_options = ['Medium', 'High', 'Low'] store_size = st.sidebar.selectbox('Store Size', store_size_options) # Store_Location_City_Type city_type_options = ['Tier 1', 'Tier 2', 'Tier 3'] store_location_city_type = st.sidebar.selectbox('Store Location City Type', city_type_options) # Store_Type store_type_options = ['Departmental Store', 'Supermarket Type 1', 'Supermarket Type 2', 'Food Mart'] store_type = st.sidebar.selectbox('Store Type', store_type_options) # Product_Id - Although not used in the model, it's in the original data structure # For prediction, we can use a placeholder or a dummy value if not strictly needed by the preprocessor product_id = 'dummy_product_id' # Placeholder # Create a DataFrame with the input features input_data = pd.DataFrame({ 'Product_Id': [product_id], 'Product_Weight': [product_weight], 'Product_Sugar_Content': [product_sugar_content], 'Product_Allocated_Area': [product_allocated_area], 'Product_Type': [product_type], 'Product_MRP': [product_mrp], 'Store_Id': [store_id], 'Store_Establishment_Year': [store_establishment_year], 'Store_Size': [store_size], 'Store_Location_City_Type': [store_location_city_type], 'Store_Type': [store_type] }) st.subheader('Input Data') st.write(input_data) # Make prediction if st.sidebar.button('Predict Sales'): try: prediction = model.predict(input_data) st.subheader('Predicted Product Store Sales Total') st.success(f'Predicted Sales: ${prediction[0]:,.2f}') except Exception as e: st.error(f"An error occurred during prediction: {e}") else: st.warning("Model not loaded. Please ensure the model file exists and the pipeline is correctly defined.")