1nox's picture
Update app.py
1ecf5ee verified
import streamlit as st
import pandas as pd
import pickle
import os
import numpy as np
import joblib
from sklearn.preprocessing import StandardScaler, LabelEncoder
import xgboost as xgb
from pathlib import Path
# Professional Blue Shades for Dark & Light Mode
HEADER_COLOR = "#0A84FF" # Bright Blue
SUBHEADER_COLOR = "#007AFF" # iOS Blue
TEXT_COLOR = "#A6B1C0" # Subtle grayish blue
INFO_COLOR = "#5AC8FA" # Light Cyan
PREDICTION_COLOR = "#34C759" # Greenish-Blue
# Read uploaded file
def read_file(uploaded_file):
file_type = uploaded_file.name.split(".")[-1].lower()
if file_type == "csv":
return pd.read_csv(uploaded_file)
elif file_type in ["xls", "xlsx"]:
return pd.read_excel(uploaded_file)
elif file_type == "json":
return pd.read_json(uploaded_file)
else:
st.error("❌ Unsupported file type! Please upload a CSV, Excel, or JSON file.")
return None
# Feature engineering functions
def split_dimensions(dim):
"""Process dimensions into separate components"""
if not isinstance(dim, list):
dim = [np.nan] * 5
return (dim[:5] + [np.nan] * 5)[:5] # Ensure exactly 5 elements
def split_qtd_price(qtd_price):
"""Split quantity and price values"""
if not isinstance(qtd_price, list) or len(qtd_price) != 2:
return [np.nan, np.nan]
return qtd_price
def prepare_advanced_features(df):
"""Prepare advanced features for prediction"""
df_processed = df.copy()
# Process dimensions
if 'Dimensions' in df_processed.columns:
dimensions_split = df_processed['Dimensions'].apply(split_dimensions).tolist()
dimensions_df = pd.DataFrame(dimensions_split, columns=['dimx', 'dimy', 'dimz', 'rim', 'pockets'])
df_processed = pd.concat([df_processed, dimensions_df], axis=1)
# Calculate derived features
df_processed['Volume'] = df_processed['dimx'] * df_processed['dimy'] * df_processed['dimz']
df_processed['SurfaceArea'] = df_processed['dimx'] * df_processed['dimy']
df_processed['Perimeter'] = 2 * (df_processed['dimx'] + df_processed['dimy'])
df_processed['AspectRatio'] = df_processed['dimx'] / df_processed['dimy']
df_processed['DensityIndex'] = df_processed['Volume'] / (df_processed['dimx'] * df_processed['dimy'] * df_processed['dimz'])
df_processed['SizeComplexity'] = np.log1p(df_processed['Volume']) * df_processed['AspectRatio']
return df_processed
def process_input_data(df, selected_features):
"""Process input data for prediction"""
# Apply feature engineering
df_processed = prepare_advanced_features(df)
# Ensure all required features are present
for feature in selected_features:
if feature not in df_processed.columns:
df_processed[feature] = 0
return df_processed[selected_features]
# Load the trained model and transformers into session state
@st.cache_resource
def load_models():
"""Load all necessary models and transformers"""
model_path = Path(__file__).parent / 'model.pkl'
scaler_path = Path(__file__).parent / 'scaler.pkl'
encoders_path = Path(__file__).parent / 'encoders.pkl'
model = joblib.load(model_path)
model.set_params(tree_method='hist', device='cpu')
scaler = joblib.load(scaler_path)
encoders = joblib.load(encoders_path)
# Extract model features (this assumes the model is an XGBRegressor or similar)
booster = model.get_booster()
model_features = [
'Volume', 'SurfaceArea', 'Perimeter',
'SizeComplexity', 'MainCategoryEncoded',
'SubCategoryEncoded', 'Quantity'
]
return model, scaler, encoders, model_features
# Main App
def main():
# Ensure models are loaded into session_state
model, scaler, encoders, model_features = load_models()
st.session_state['model'] = model
st.session_state['scaler'] = scaler
st.session_state['encoders'] = encoders
st.session_state['model_features'] = model_features
st.markdown(f"<h1 style='color: {HEADER_COLOR}; text-align: center;'>🔹 Filter's Price Prediction App 🔹</h1>", unsafe_allow_html=True)
st.markdown(f"<p style='color: {TEXT_COLOR}; font-size: 18px;'>This app uses a trained machine learning model to predict filter's prices based on input data.</p>", unsafe_allow_html=True)
st.markdown(f"<p style='color: {TEXT_COLOR}; font-size: 18px;'>App version model not updated.</p>", unsafe_allow_html=True)
# Model and Dataset Info
st.markdown(f"<h2 style='color: {SUBHEADER_COLOR};'>📊 Model & Dataset Info</h2>", unsafe_allow_html=True)
st.markdown(f"<p style='color: {INFO_COLOR};'>📌 Model:</p>", unsafe_allow_html=True)
st.write("✅ **Type**: XGBRegressor")
st.write(f"📈 **Features Used**:", model_features)
st.write("💡 **Target**: Price")
st.markdown(f"<p style='color: {INFO_COLOR};'>📚 Dataset:</p>", unsafe_allow_html=True)
st.write("📋 **Dataset Name**: Filter's Price Dataset")
st.write("📉 **Number of Rows**: 5,500")
st.write("📊 **Number of Features**:", len(model_features))
#st.write("🌐 **Source**: ")
# Manual input section
st.markdown(f"<h2 style='color: {SUBHEADER_COLOR};'>✍️ Manual Input</h2>", unsafe_allow_html=True)
with st.form("manual_input_form"):
col1, col2 = st.columns(2)
with col1:
dimx = st.number_input("Dimension X", min_value=0.0)
dimy = st.number_input("Dimension Y", min_value=0.0)
dimz = st.number_input("Dimension Z", min_value=0.0)
with col2:
quantity = st.number_input("Quantity", min_value=1)
# Category input
category = st.text_input("Main Category", help="Enter the main filter category (e.g., F7, MV/G4)")
subcategory = st.text_input("Subcategory", help="Enter the filter subcategory (e.g., PL, G4)")
submitted = st.form_submit_button("Calculate Price")
if submitted:
try:
# Create dataframe from manual input
manual_data = pd.DataFrame({
'dimx': [dimx],
'dimy': [dimy],
'dimz': [dimz],
'Quantity': [quantity],
'MainCategory': [category],
'SubCategory': [subcategory]
})
# Process manual input
manual_processed = process_input_data(manual_data, model_features)
# Display input features and feature engineering
st.markdown(f"<h3 style='color: {TEXT_COLOR};'>📝 Input Features and Feature Engineering:</h3>", unsafe_allow_html=True)
st.dataframe(manual_processed) # Display the processed features
# Scale the data and make prediction
manual_scaled = st.session_state['scaler'].transform(manual_processed)
prediction = st.session_state['model'].predict(manual_scaled)[0]
# Display prediction and its explanation
st.markdown(
f"<h3 style='color: {TEXT_COLOR}; display: inline;'>🔮 Predicted Price: "
f"<span style='color: {PREDICTION_COLOR};'>${prediction:.2f}</span></h3>",
unsafe_allow_html=True
)
except Exception as e:
st.error(f"Error calculating price: {str(e)}")
# Upload CSV for Prediction
# st.markdown(f"<h2 style='color: {SUBHEADER_COLOR};'>📂 Upload Data for Prediction</h2>", unsafe_allow_html=True)
# uploaded_file = st.file_uploader("📥 Upload a CSV, Excel, or JSON file", type=["csv", "xlsx", "xls", "json"])
# if uploaded_file is not None:
# input_data = read_file(uploaded_file)
# if input_data is not None:
# st.markdown(f"<p style='color: {INFO_COLOR};'>📜 Uploaded Data:</p>", unsafe_allow_html=True)
# st.dataframe(input_data) # Display uploaded data
# # Ensure the required columns exist in the input data
# if all(feature in input_data.columns for feature in model_features):
# # Process the input data
# processed_data = process_input_data(input_data, model_features)
# # Display processed features and engineering
# st.markdown(f"<h3 style='color: {TEXT_COLOR};'>📝 Processed Features and Feature Engineering:</h3>", unsafe_allow_html=True)
# st.dataframe(processed_data) # Show feature engineering results
# # Apply scaling to processed data
# scaled_data = st.session_state['scaler'].transform(processed_data)
# # Make predictions for all rows
# predictions = st.session_state['model'].predict(scaled_data)
# # Add the predictions to the dataframe
# input_data["Predicted Price"] = predictions
# # Display the final table with input features, feature engineering, and the predicted price
# st.markdown(f"<h3 style='color: {PREDICTION_COLOR};'>🔮 Predictions:</h3>", unsafe_allow_html=True)
# st.dataframe(input_data) # Display the final table
# else:
# st.error(f"❌ Uploaded data must contain the required features: {model_features}")
# Run the app
main()