import streamlit as st import pandas as pd import pickle import os import numpy as np import joblib from sklearn.preprocessing import StandardScaler, LabelEncoder import xgboost as xgb from pathlib import Path # Professional Blue Shades for Dark & Light Mode HEADER_COLOR = "#0A84FF" # Bright Blue SUBHEADER_COLOR = "#007AFF" # iOS Blue TEXT_COLOR = "#A6B1C0" # Subtle grayish blue INFO_COLOR = "#5AC8FA" # Light Cyan PREDICTION_COLOR = "#34C759" # Greenish-Blue # Read uploaded file def read_file(uploaded_file): file_type = uploaded_file.name.split(".")[-1].lower() if file_type == "csv": return pd.read_csv(uploaded_file) elif file_type in ["xls", "xlsx"]: return pd.read_excel(uploaded_file) elif file_type == "json": return pd.read_json(uploaded_file) else: st.error("❌ Unsupported file type! Please upload a CSV, Excel, or JSON file.") return None # Feature engineering functions def split_dimensions(dim): """Process dimensions into separate components""" if not isinstance(dim, list): dim = [np.nan] * 5 return (dim[:5] + [np.nan] * 5)[:5] # Ensure exactly 5 elements def split_qtd_price(qtd_price): """Split quantity and price values""" if not isinstance(qtd_price, list) or len(qtd_price) != 2: return [np.nan, np.nan] return qtd_price def prepare_advanced_features(df): """Prepare advanced features for prediction""" df_processed = df.copy() # Process dimensions if 'Dimensions' in df_processed.columns: dimensions_split = df_processed['Dimensions'].apply(split_dimensions).tolist() dimensions_df = pd.DataFrame(dimensions_split, columns=['dimx', 'dimy', 'dimz', 'rim', 'pockets']) df_processed = pd.concat([df_processed, dimensions_df], axis=1) # Calculate derived features df_processed['Volume'] = df_processed['dimx'] * df_processed['dimy'] * df_processed['dimz'] df_processed['SurfaceArea'] = df_processed['dimx'] * df_processed['dimy'] df_processed['Perimeter'] = 2 * (df_processed['dimx'] + df_processed['dimy']) df_processed['AspectRatio'] = df_processed['dimx'] / df_processed['dimy'] df_processed['DensityIndex'] = df_processed['Volume'] / (df_processed['dimx'] * df_processed['dimy'] * df_processed['dimz']) df_processed['SizeComplexity'] = np.log1p(df_processed['Volume']) * df_processed['AspectRatio'] return df_processed def process_input_data(df, selected_features): """Process input data for prediction""" # Apply feature engineering df_processed = prepare_advanced_features(df) # Ensure all required features are present for feature in selected_features: if feature not in df_processed.columns: df_processed[feature] = 0 return df_processed[selected_features] # Load the trained model and transformers into session state @st.cache_resource def load_models(): """Load all necessary models and transformers""" model_path = Path(__file__).parent / 'model.pkl' scaler_path = Path(__file__).parent / 'scaler.pkl' encoders_path = Path(__file__).parent / 'encoders.pkl' model = joblib.load(model_path) model.set_params(tree_method='hist', device='cpu') scaler = joblib.load(scaler_path) encoders = joblib.load(encoders_path) # Extract model features (this assumes the model is an XGBRegressor or similar) booster = model.get_booster() model_features = [ 'Volume', 'SurfaceArea', 'Perimeter', 'SizeComplexity', 'MainCategoryEncoded', 'SubCategoryEncoded', 'Quantity' ] return model, scaler, encoders, model_features # Main App def main(): # Ensure models are loaded into session_state model, scaler, encoders, model_features = load_models() st.session_state['model'] = model st.session_state['scaler'] = scaler st.session_state['encoders'] = encoders st.session_state['model_features'] = model_features st.markdown(f"
This app uses a trained machine learning model to predict filter's prices based on input data.
", unsafe_allow_html=True) st.markdown(f"App version model not updated.
", unsafe_allow_html=True) # Model and Dataset Info st.markdown(f"📌 Model:
", unsafe_allow_html=True) st.write("✅ **Type**: XGBRegressor") st.write(f"📈 **Features Used**:", model_features) st.write("💡 **Target**: Price") st.markdown(f"📚 Dataset:
", unsafe_allow_html=True) st.write("📋 **Dataset Name**: Filter's Price Dataset") st.write("📉 **Number of Rows**: 5,500") st.write("📊 **Number of Features**:", len(model_features)) #st.write("🌐 **Source**: ") # Manual input section st.markdown(f"📜 Uploaded Data:
", unsafe_allow_html=True) # st.dataframe(input_data) # Display uploaded data # # Ensure the required columns exist in the input data # if all(feature in input_data.columns for feature in model_features): # # Process the input data # processed_data = process_input_data(input_data, model_features) # # Display processed features and engineering # st.markdown(f"