import streamlit as st
import pandas as pd
import numpy as np
import joblib
import joblib
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error
from flask import Flask, request, jsonify

app = Flask(__name__)

# ✅ Load your trained model here
model_pipeline = joblib.load("best_random_forest_model.joblib")

# Load the trained model pipeline
@st.cache_resource
def load_model():
    return joblib.load('deployment_files/best_random_forest_pipeline.joblib')
model = load_model    
    #return joblib.load('deployment_files/tuned_random_forest_model.joblib')
#model_pipeline = joblib.load('best_random_forest_pipeline.joblib')

# Define the Streamlit app title and description
st.title('SuperKart Sales Forecasting App')
st.write('Enter the product and store details to get a sales forecast.')

# Define input fields for the features
# You need to create input fields for all the features used by your model
# Based on your preprocessing, the features are:
# Numerical: Product_Weight, Product_Allocated_Area, Product_MRP, Store_Establishment_Year
# Categorical: Product_Sugar_Content, Product_Type, Store_Id, Store_Size, Store_Location_City_Type, Store_Type

st.sidebar.header('Product and Store Details')

# Numerical Inputs
product_weight = st.sidebar.number_input('Product Weight', min_value=0.0, value=10.0)
product_allocated_area = st.sidebar.number_input('Product Allocated Area', min_value=0.0, value=0.05)
product_mrp = st.sidebar.number_input('Product MRP', min_value=0.0, value=100.0)
store_establishment_year = st.sidebar.number_input('Store Establishment Year', min_value=1900, max_value=2024, value=2000)

# Categorical Inputs (using unique values from your data)
# Replace the options with the actual unique categories from your dataset
sugar_content_options = ['Low Sugar', 'Regular', 'No Sugar'] # Update with actual unique values
product_type_options = ['Frozen Foods', 'Dairy', 'Canned', 'Baking Goods', 'Health and Hygiene', 'Snack Foods', 'Household', 'Meat', 'Soft Drinks', 'Breads', 'Hard Drinks', 'Others', 'Starchy Foods', 'Breakfast', 'Seafood', 'Fruits and Vegetables'] # Update with actual unique values
store_id_options = ['OUT004', 'OUT003', 'OUT001', 'OUT002'] # Update with actual unique values
store_size_options = ['Medium', 'High', 'Small'] # Update with actual unique values
store_location_options = ['Tier 2', 'Tier 1', 'Tier 3'] # Update with actual unique values
store_type_options = ['Supermarket Type2', 'Departmental Store', 'Supermarket Type1', 'Food Mart'] # Update with actual unique values

product_sugar_content = st.sidebar.selectbox('Product Sugar Content', sugar_content_options)
product_type = st.sidebar.selectbox('Product Type', product_type_options)
store_id = st.sidebar.selectbox('Store ID', store_id_options)
store_size = st.sidebar.selectbox('Store Size', store_size_options)
store_location_city_type = st.sidebar.selectbox('Store Location City Type', store_location_options)
store_type = st.sidebar.selectbox('Store Type', store_type_options)


# Create a dictionary from the input values
input_data = {
    'Product_Weight': product_weight,
    'Product_Allocated_Area': product_allocated_area,
    'Product_MRP': product_mrp,
    'Store_Establishment_Year': store_establishment_year,
    'Product_Sugar_Content': product_sugar_content,
    'Product_Type': product_type,
    'Store_Id': store_id,
    'Store_Size': store_size,
    'Store_Location_City_Type': store_location_city_type,
    'Store_Type': store_type
}

# Convert the dictionary to a pandas DataFrame
input_df = pd.DataFrame([input_data])

# Display the input data
st.subheader('Input Details:')
st.write(input_df)


# Make prediction when the button is clicked
if st.button('Predict Sales'):
    # Ensure column order matches the training data features expected by the pipeline
    # This is crucial because the pipeline expects features in a specific order,
    # especially after one-hot encoding.
    # The easiest way to handle this is to ensure the input DataFrame has the same
    # columns and order as the training data features (X_train) before passing
    # it to the pipeline's predict method.

    # Recreate a dummy DataFrame with the same columns and order as X_train
    # and then populate it with the input values. This ensures the one-hot encoding
    # within the pipeline works correctly.

    # Get the column names from X_train (assuming X_train is available or you have saved its column names)
    # For this script, we'll assume the columns are in a specific order.
    # In a real deployment, you would save the column order or a sample of X_train
    # along with the model pipeline.

    # A safer approach is to pass the raw input_df to the pipeline,
    # as the preprocessor within the pipeline should handle the column transformations
    # based on how it was fitted on the training data.
    # However, the order of columns in the input DataFrame should ideally match the
    # order of columns in the original DataFrame before splitting/preprocessing.
    # Let's assume the order of columns in input_df matches the original data columns
    # that were used to create X_train.

    try:
        prediction = model_pipeline.predict(input_df)
        st.subheader('Predicted Product Store Sales Total:')
        st.write(f'{prediction[0]:,.2f}')
    except Exception as e:
        st.error(f"An error occurred during prediction: {e}")
        st.write("Please ensure the input features are correct and match the expected format.")