Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import joblib | |
| import joblib | |
| from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error | |
| from flask import Flask, request, jsonify | |
| app = Flask(__name__) | |
| # ✅ Load your trained model here | |
| model_pipeline = joblib.load("best_random_forest_model.joblib") | |
| # Load the trained model pipeline | |
| def load_model(): | |
| return joblib.load('deployment_files/best_random_forest_pipeline.joblib') | |
| model = load_model | |
| #return joblib.load('deployment_files/tuned_random_forest_model.joblib') | |
| #model_pipeline = joblib.load('best_random_forest_pipeline.joblib') | |
| # Define the Streamlit app title and description | |
| st.title('SuperKart Sales Forecasting App') | |
| st.write('Enter the product and store details to get a sales forecast.') | |
| # Define input fields for the features | |
| # You need to create input fields for all the features used by your model | |
| # Based on your preprocessing, the features are: | |
| # Numerical: Product_Weight, Product_Allocated_Area, Product_MRP, Store_Establishment_Year | |
| # Categorical: Product_Sugar_Content, Product_Type, Store_Id, Store_Size, Store_Location_City_Type, Store_Type | |
| st.sidebar.header('Product and Store Details') | |
| # Numerical Inputs | |
| product_weight = st.sidebar.number_input('Product Weight', min_value=0.0, value=10.0) | |
| product_allocated_area = st.sidebar.number_input('Product Allocated Area', min_value=0.0, value=0.05) | |
| product_mrp = st.sidebar.number_input('Product MRP', min_value=0.0, value=100.0) | |
| store_establishment_year = st.sidebar.number_input('Store Establishment Year', min_value=1900, max_value=2024, value=2000) | |
| # Categorical Inputs (using unique values from your data) | |
| # Replace the options with the actual unique categories from your dataset | |
| sugar_content_options = ['Low Sugar', 'Regular', 'No Sugar'] # Update with actual unique values | |
| product_type_options = ['Frozen Foods', 'Dairy', 'Canned', 'Baking Goods', 'Health and Hygiene', 'Snack Foods', 'Household', 'Meat', 'Soft Drinks', 'Breads', 'Hard Drinks', 'Others', 'Starchy Foods', 'Breakfast', 'Seafood', 'Fruits and Vegetables'] # Update with actual unique values | |
| store_id_options = ['OUT004', 'OUT003', 'OUT001', 'OUT002'] # Update with actual unique values | |
| store_size_options = ['Medium', 'High', 'Small'] # Update with actual unique values | |
| store_location_options = ['Tier 2', 'Tier 1', 'Tier 3'] # Update with actual unique values | |
| store_type_options = ['Supermarket Type2', 'Departmental Store', 'Supermarket Type1', 'Food Mart'] # Update with actual unique values | |
| product_sugar_content = st.sidebar.selectbox('Product Sugar Content', sugar_content_options) | |
| product_type = st.sidebar.selectbox('Product Type', product_type_options) | |
| store_id = st.sidebar.selectbox('Store ID', store_id_options) | |
| store_size = st.sidebar.selectbox('Store Size', store_size_options) | |
| store_location_city_type = st.sidebar.selectbox('Store Location City Type', store_location_options) | |
| store_type = st.sidebar.selectbox('Store Type', store_type_options) | |
| # Create a dictionary from the input values | |
| input_data = { | |
| 'Product_Weight': product_weight, | |
| 'Product_Allocated_Area': product_allocated_area, | |
| 'Product_MRP': product_mrp, | |
| 'Store_Establishment_Year': store_establishment_year, | |
| 'Product_Sugar_Content': product_sugar_content, | |
| 'Product_Type': product_type, | |
| 'Store_Id': store_id, | |
| 'Store_Size': store_size, | |
| 'Store_Location_City_Type': store_location_city_type, | |
| 'Store_Type': store_type | |
| } | |
| # Convert the dictionary to a pandas DataFrame | |
| input_df = pd.DataFrame([input_data]) | |
| # Display the input data | |
| st.subheader('Input Details:') | |
| st.write(input_df) | |
| # Make prediction when the button is clicked | |
| if st.button('Predict Sales'): | |
| # Ensure column order matches the training data features expected by the pipeline | |
| # This is crucial because the pipeline expects features in a specific order, | |
| # especially after one-hot encoding. | |
| # The easiest way to handle this is to ensure the input DataFrame has the same | |
| # columns and order as the training data features (X_train) before passing | |
| # it to the pipeline's predict method. | |
| # Recreate a dummy DataFrame with the same columns and order as X_train | |
| # and then populate it with the input values. This ensures the one-hot encoding | |
| # within the pipeline works correctly. | |
| # Get the column names from X_train (assuming X_train is available or you have saved its column names) | |
| # For this script, we'll assume the columns are in a specific order. | |
| # In a real deployment, you would save the column order or a sample of X_train | |
| # along with the model pipeline. | |
| # A safer approach is to pass the raw input_df to the pipeline, | |
| # as the preprocessor within the pipeline should handle the column transformations | |
| # based on how it was fitted on the training data. | |
| # However, the order of columns in the input DataFrame should ideally match the | |
| # order of columns in the original DataFrame before splitting/preprocessing. | |
| # Let's assume the order of columns in input_df matches the original data columns | |
| # that were used to create X_train. | |
| try: | |
| prediction = model_pipeline.predict(input_df) | |
| st.subheader('Predicted Product Store Sales Total:') | |
| st.write(f'{prediction[0]:,.2f}') | |
| except Exception as e: | |
| st.error(f"An error occurred during prediction: {e}") | |
| st.write("Please ensure the input features are correct and match the expected format.") | |